diff --git a/apps/simple/simple.txt b/apps/simple/simple.txt deleted file mode 100644 index 7cd89b8d0d51646af616cc9f784f61fcb1469bf1..0000000000000000000000000000000000000000 --- a/apps/simple/simple.txt +++ /dev/null @@ -1,9 +0,0 @@ -Simple flow: - -Get document list, in this case one document. -Split each document into nodes, in this case sentences or lines. -Embed each of the nodes and get vectors. Store them in memory for now. -Embed query. -Compare query with nodes and get the top n -Put the top n nodes into the prompt. -Execute prompt, get result. diff --git a/apps/simple/subquestion.ts b/apps/simple/subquestion.ts new file mode 100644 index 0000000000000000000000000000000000000000..a3a85273dd4ef8e812ef92fdf1756607324d4a92 --- /dev/null +++ b/apps/simple/subquestion.ts @@ -0,0 +1,60 @@ +// from llama_index import SimpleDirectoryReader, VectorStoreIndex +// from llama_index.query_engine import SubQuestionQueryEngine +// from llama_index.tools import QueryEngineTool, ToolMetadata + +// # load data +// pg_essay = SimpleDirectoryReader( +// input_dir="docs/examples/data/paul_graham/" +// ).load_data() + +// # build index and query engine +// query_engine = VectorStoreIndex.from_documents(pg_essay).as_query_engine() + +// # setup base query engine as tool +// query_engine_tools = [ +// QueryEngineTool( +// query_engine=query_engine, +// metadata=ToolMetadata( +// name="pg_essay", description="Paul Graham essay on What I Worked On" +// ), +// ) +// ] + +// query_engine = SubQuestionQueryEngine.from_defaults( +// query_engine_tools=query_engine_tools +// ) + +// response = query_engine.query( +// "How was Paul Grahams life different before and after YC?" +// ) + +// print(response) + +import { Document } from "@llamaindex/core/src/Node"; +import { VectorStoreIndex } from "@llamaindex/core/src/BaseIndex"; +import { SubQuestionQueryEngine } from "@llamaindex/core/src/QueryEngine"; + +import essay from "./essay"; + +(async () => { + const document = new Document({ text: essay }); + const index = await VectorStoreIndex.fromDocuments([document]); + + const queryEngine = SubQuestionQueryEngine.fromDefaults({ + queryEngineTools: [ + { + queryEngine: index.asQueryEngine(), + metadata: { + name: "pg_essay", + description: "Paul Graham essay on What I Worked On", + }, + }, + ], + }); + + const response = await queryEngine.aquery( + "How was Paul Grahams life different before and after YC?" + ); + + console.log(response); +})(); diff --git a/packages/core/src/OutputParser.ts b/packages/core/src/OutputParser.ts index b7ab9bf310e5455d872c9440c8ea70f54f26446a..a5e0dded714507b64b8ff61de5d168673fd0fbd4 100644 --- a/packages/core/src/OutputParser.ts +++ b/packages/core/src/OutputParser.ts @@ -1,12 +1,13 @@ import { SubQuestion } from "./QuestionGenerator"; -interface BaseOutputParser { - parse(output: string): any; +export interface BaseOutputParser<T> { + parse(output: string): T; format(output: string): string; } -interface StructuredOutput { +export interface StructuredOutput<T> { rawOutput: string; + parsedOutput: T; } class OutputParserError extends Error { @@ -62,10 +63,15 @@ function parseJsonMarkdown(text: string) { } } -class SubQuestionOutputParser implements BaseOutputParser { - parse(output: string): SubQuestion[] { - const subQuestions = JSON.parse(output); - return subQuestions; +export class SubQuestionOutputParser + implements BaseOutputParser<StructuredOutput<SubQuestion[]>> +{ + parse(output: string): StructuredOutput<SubQuestion[]> { + const parsed = parseJsonMarkdown(output); + + // TODO add zod validation + + return { rawOutput: output, parsedOutput: parsed }; } format(output: string): string { diff --git a/packages/core/src/QuestionGenerator.ts b/packages/core/src/QuestionGenerator.ts index be3b903601fab9d1e595cd4c5baa320a9c17d03a..fad1f9732c952095034e7b07ba22ea6e03b2ebf0 100644 --- a/packages/core/src/QuestionGenerator.ts +++ b/packages/core/src/QuestionGenerator.ts @@ -1,5 +1,14 @@ import { BaseLLMPredictor, ChatGPTLLMPredictor } from "./LLMPredictor"; -import { SimplePrompt, defaultSubQuestionPrompt } from "./Prompt"; +import { + BaseOutputParser, + StructuredOutput, + SubQuestionOutputParser, +} from "./OutputParser"; +import { + SimplePrompt, + buildToolsText, + defaultSubQuestionPrompt, +} from "./Prompt"; import { ToolMetadata } from "./Tool"; export interface SubQuestion { @@ -14,16 +23,27 @@ export interface BaseQuestionGenerator { export class LLMQuestionGenerator implements BaseQuestionGenerator { llmPredictor: BaseLLMPredictor; prompt: SimplePrompt; + outputParser: BaseOutputParser<StructuredOutput<SubQuestion[]>>; constructor(init?: Partial<LLMQuestionGenerator>) { this.llmPredictor = init?.llmPredictor ?? new ChatGPTLLMPredictor(); this.prompt = init?.prompt ?? defaultSubQuestionPrompt; + this.outputParser = init?.outputParser ?? new SubQuestionOutputParser(); } async agenerate( tools: ToolMetadata[], query: string ): Promise<SubQuestion[]> { - throw new Error("Method not implemented."); + const toolsStr = buildToolsText(tools); + const queryStr = query; + const prediction = await this.llmPredictor.apredict(this.prompt, { + toolsStr, + queryStr, + }); + + const structuredOutput = this.outputParser.parse(prediction); + + return structuredOutput.parsedOutput; } }