diff --git a/.changeset/rich-coins-leave.md b/.changeset/rich-coins-leave.md new file mode 100644 index 0000000000000000000000000000000000000000..44da9104700ff11bc820409be26b4581a0f37e87 --- /dev/null +++ b/.changeset/rich-coins-leave.md @@ -0,0 +1,5 @@ +--- +"@llamaindex/core": minor +--- + +add defaultQuestionExtractPrompt diff --git a/.changeset/three-gorillas-do.md b/.changeset/three-gorillas-do.md new file mode 100644 index 0000000000000000000000000000000000000000..72a69c0c5de2c560e22e03c60d9c4665b746f8a4 --- /dev/null +++ b/.changeset/three-gorillas-do.md @@ -0,0 +1,5 @@ +--- +"llamaindex": minor +--- + +Correct initialization of QuestionsAnsweredExtractor so that it uses the promptTemplate arg when passed in diff --git a/packages/core/src/prompts/index.ts b/packages/core/src/prompts/index.ts index c8ae7087f48b1397d7b816eb4f595019bf51b5de..992411a5b9a8be89b2562da4503aa92f6cd637cb 100644 --- a/packages/core/src/prompts/index.ts +++ b/packages/core/src/prompts/index.ts @@ -13,6 +13,7 @@ export { defaultContextSystemPrompt, defaultKeywordExtractPrompt, defaultQueryKeywordExtractPrompt, + defaultQuestionExtractPrompt, defaultRefinePrompt, defaultSubQuestionPrompt, defaultSummaryPrompt, @@ -25,6 +26,7 @@ export type { ContextSystemPrompt, KeywordExtractPrompt, QueryKeywordExtractPrompt, + QuestionExtractPrompt, RefinePrompt, SubQuestionPrompt, SummaryPrompt, diff --git a/packages/core/src/prompts/prompt.ts b/packages/core/src/prompts/prompt.ts index aeb6a7e642ff82fb872d1e5402483df0f6a73ad9..1efa0289f12a96fb9734fca71d5b6981bf9d663d 100644 --- a/packages/core/src/prompts/prompt.ts +++ b/packages/core/src/prompts/prompt.ts @@ -15,6 +15,7 @@ export type CondenseQuestionPrompt = PromptTemplate< export type ContextSystemPrompt = PromptTemplate<["context"]>; export type KeywordExtractPrompt = PromptTemplate<["context"]>; export type QueryKeywordExtractPrompt = PromptTemplate<["question"]>; +export type QuestionExtractPrompt = PromptTemplate<["context", "numQuestions"]>; export const defaultTextQAPrompt: TextQAPrompt = new PromptTemplate({ templateVars: ["context", "query"], @@ -253,3 +254,17 @@ export const defaultQueryKeywordExtractPrompt = new PromptTemplate({ }).partialFormat({ maxKeywords: "10", }); + +export const defaultQuestionExtractPrompt = new PromptTemplate({ + templateVars: ["numQuestions", "context"], + template: `( + "Given the contextual informations below, generate {numQuestions} questions this context can provides specific answers to which are unlikely to be found else where. Higher-level summaries of surrounding context may be provided as well. " + "Try using these summaries to generate better questions that this context can answer." + "---------------------" + "{context}" + "---------------------" + "Provide questions in the following format: 'QUESTIONS: <questions>'" +)`, +}).partialFormat({ + numQuestions: "5", +}); diff --git a/packages/llamaindex/src/extractors/MetadataExtractors.ts b/packages/llamaindex/src/extractors/MetadataExtractors.ts index b1d0ce5a47412627745c0375ef86b1a0f30e27b9..e43ef941521cfda2602cde871639bc66cbc15a5b 100644 --- a/packages/llamaindex/src/extractors/MetadataExtractors.ts +++ b/packages/llamaindex/src/extractors/MetadataExtractors.ts @@ -1,10 +1,14 @@ import type { LLM } from "@llamaindex/core/llms"; +import { + PromptTemplate, + defaultQuestionExtractPrompt, + type QuestionExtractPrompt, +} from "@llamaindex/core/prompts"; import type { BaseNode } from "@llamaindex/core/schema"; import { MetadataMode, TextNode } from "@llamaindex/core/schema"; import { OpenAI } from "@llamaindex/openai"; import { defaultKeywordExtractorPromptTemplate, - defaultQuestionAnswerPromptTemplate, defaultSummaryExtractorPromptTemplate, defaultTitleCombinePromptTemplate, defaultTitleExtractorPromptTemplate, @@ -247,7 +251,7 @@ export class TitleExtractor extends BaseExtractor { type QuestionAnswerExtractArgs = { llm?: LLM; questions?: number; - promptTemplate?: string; + promptTemplate?: QuestionExtractPrompt["template"]; embeddingOnly?: boolean; }; @@ -276,7 +280,7 @@ export class QuestionsAnsweredExtractor extends BaseExtractor { * The prompt template to use for the question extractor. * @type {string} */ - promptTemplate: string; + promptTemplate: QuestionExtractPrompt; /** * Wheter to use metadata for embeddings only @@ -289,7 +293,7 @@ export class QuestionsAnsweredExtractor extends BaseExtractor { * Constructor for the QuestionsAnsweredExtractor class. * @param {LLM} llm LLM instance. * @param {number} questions Number of questions to generate. - * @param {string} promptTemplate The prompt template to use for the question extractor. + * @param {TextQAPrompt} promptTemplate The prompt template to use for the question extractor. * @param {boolean} embeddingOnly Wheter to use metadata for embeddings only. */ constructor(options?: QuestionAnswerExtractArgs) { @@ -300,12 +304,14 @@ export class QuestionsAnsweredExtractor extends BaseExtractor { this.llm = options?.llm ?? new OpenAI(); this.questions = options?.questions ?? 5; - this.promptTemplate = - options?.promptTemplate ?? - defaultQuestionAnswerPromptTemplate({ - numQuestions: this.questions, - contextStr: "", - }); + this.promptTemplate = options?.promptTemplate + ? new PromptTemplate({ + templateVars: ["numQuestions", "context"], + template: options.promptTemplate, + }).partialFormat({ + numQuestions: "5", + }) + : defaultQuestionExtractPrompt; this.embeddingOnly = options?.embeddingOnly ?? false; } @@ -323,9 +329,9 @@ export class QuestionsAnsweredExtractor extends BaseExtractor { const contextStr = node.getContent(this.metadataMode); - const prompt = defaultQuestionAnswerPromptTemplate({ - contextStr, - numQuestions: this.questions, + const prompt = this.promptTemplate.format({ + context: contextStr, + numQuestions: this.questions.toString(), }); const questions = await this.llm.complete({ diff --git a/packages/llamaindex/src/extractors/prompts.ts b/packages/llamaindex/src/extractors/prompts.ts index 5c2d5448ac5e66f4577cdafa0bcca90ca71705d7..0065e5d834887588caa3da1c80f6e5634285d48c 100644 --- a/packages/llamaindex/src/extractors/prompts.ts +++ b/packages/llamaindex/src/extractors/prompts.ts @@ -7,11 +7,6 @@ export interface DefaultKeywordExtractorPromptTemplate keywords: number; } -export interface DefaultQuestionAnswerPromptTemplate - extends DefaultPromptTemplate { - numQuestions: number; -} - export interface DefaultNodeTextTemplate { metadataStr: string; content: string; @@ -41,16 +36,6 @@ export const defaultTitleCombinePromptTemplate = ( Based on the above candidate titles and contents, what is the comprehensive title for this document? Title: `; -export const defaultQuestionAnswerPromptTemplate = ( - { contextStr = "", numQuestions = 5 }: DefaultQuestionAnswerPromptTemplate = { - contextStr: "", - numQuestions: 5, - }, -) => `${contextStr} -Given the contextual informations, generate ${numQuestions} questions this context can provides specific answers to which are unlikely to be found else where. Higher-level summaries of surrounding context may be provideds as well. -Try using these summaries to generate better questions that this context can answer. -`; - export const defaultSummaryExtractorPromptTemplate = ( { contextStr = "" }: DefaultPromptTemplate = { contextStr: "", diff --git a/packages/llamaindex/tests/MetadataExtractors.test.ts b/packages/llamaindex/tests/MetadataExtractors.test.ts index fe0e76c95607cddef1cd9c22c64abcd12946baf5..6cda9693866157d6aad6265cfa8b3df1c2da1b9e 100644 --- a/packages/llamaindex/tests/MetadataExtractors.test.ts +++ b/packages/llamaindex/tests/MetadataExtractors.test.ts @@ -102,6 +102,35 @@ describe("[MetadataExtractor]: Extractors should populate the metadata", () => { }); }); + test("[MetadataExtractor] QuestionsAnsweredExtractor uses custom prompt template", async () => { + const nodeParser = new SentenceSplitter(); + + const nodes = nodeParser.getNodesFromDocuments([ + new Document({ text: DEFAULT_LLM_TEXT_OUTPUT }), + ]); + + const llmCompleteSpy = vi.spyOn(serviceContext.llm, "complete"); + + const questionsAnsweredExtractor = new QuestionsAnsweredExtractor({ + llm: serviceContext.llm, + questions: 5, + promptTemplate: `This is a custom prompt template for {context} with {numQuestions} questions`, + }); + + await questionsAnsweredExtractor.processNodes(nodes); + + expect(llmCompleteSpy).toHaveBeenCalled(); + + // Build the expected prompt + const expectedPrompt = `This is a custom prompt template for ${DEFAULT_LLM_TEXT_OUTPUT} with 5 questions`; + + // Get the actual prompt used in llm.complete + const actualPrompt = llmCompleteSpy.mock?.calls?.[0]?.[0]; + + // Assert that the prompts match + expect(actualPrompt).toEqual({ prompt: expectedPrompt }); + }); + test("[MetadataExtractor] SumamryExtractor returns sectionSummary metadata", async () => { const nodeParser = new SentenceSplitter();