finished subquestion demo

0c881c8f · Yi Ding · 815a3416 · 815a3416 · 0c881c8f · 0c881c8f
Commit 0c881c8f authored 1 year ago by Yi Ding
--- a/apps/simple/simple.txt
+++ b/apps/simple/simple.txt
-Simple flow:
-Get document list, in this case one document.
-Split each document into nodes, in this case sentences or lines.
-Embed each of the nodes and get vectors. Store them in memory for now.
-Embed query.
-Compare query with nodes and get the top n
-Put the top n nodes into the prompt.
-Execute prompt, get result.
--- a/apps/simple/subquestion.ts
+++ b/apps/simple/subquestion.ts
+// from llama_index import SimpleDirectoryReader, VectorStoreIndex
+// from llama_index.query_engine import SubQuestionQueryEngine
+// from llama_index.tools import QueryEngineTool, ToolMetadata
+// # load data
+// pg_essay = SimpleDirectoryReader(
+//     input_dir="docs/examples/data/paul_graham/"
+// ).load_data()
+// # build index and query engine
+// query_engine = VectorStoreIndex.from_documents(pg_essay).as_query_engine()
+// # setup base query engine as tool
+// query_engine_tools = [
+//     QueryEngineTool(
+//         query_engine=query_engine,
+//         metadata=ToolMetadata(
+//             name="pg_essay", description="Paul Graham essay on What I Worked On"
+//         ),
+//     )
+// ]
+// query_engine = SubQuestionQueryEngine.from_defaults(
+//     query_engine_tools=query_engine_tools
+// )
+// response = query_engine.query(
+//     "How was Paul Grahams life different before and after YC?"
+// )
+// print(response)
+import { Document } from "@llamaindex/core/src/Node";
+import { VectorStoreIndex } from "@llamaindex/core/src/BaseIndex";
+import { SubQuestionQueryEngine } from "@llamaindex/core/src/QueryEngine";
+import essay from "./essay";
+(async () => {
+  const document = new Document({ text: essay });
+  const index = await VectorStoreIndex.fromDocuments([document]);
+  const queryEngine = SubQuestionQueryEngine.fromDefaults({
+    queryEngineTools: [
+      {
+        queryEngine: index.asQueryEngine(),
+        metadata: {
+          name: "pg_essay",
+          description: "Paul Graham essay on What I Worked On",
+        },
+      },
+    ],
+  });
+  const response = await queryEngine.aquery(
+    "How was Paul Grahams life different before and after YC?"
+  );
+  console.log(response);
+})();
--- a/packages/core/src/OutputParser.ts
+++ b/packages/core/src/OutputParser.ts
 import { SubQuestion } from "./QuestionGenerator";
-interface BaseOutputParser {
+export interface BaseOutputParser<T> {
-  parse(output: string): any;
+  parse(output: string): T;
  format(output: string): string;
 }
-interface StructuredOutput {
+export interface StructuredOutput<T> {
  rawOutput: string;
+  parsedOutput: T;
 }
 class OutputParserError extends Error {
@@ -62,10 +63,15 @@ function parseJsonMarkdown(text: string) {
  }
 }
-class SubQuestionOutputParser implements BaseOutputParser {
+export class SubQuestionOutputParser
-  parse(output: string): SubQuestion[] {
+  implements BaseOutputParser<StructuredOutput<SubQuestion[]>>
-    const subQuestions = JSON.parse(output);
+{
-    return subQuestions;
+  parse(output: string): StructuredOutput<SubQuestion[]> {
+    const parsed = parseJsonMarkdown(output);
+    // TODO add zod validation
+    return { rawOutput: output, parsedOutput: parsed };
  }
  format(output: string): string {

--- a/packages/core/src/QuestionGenerator.ts
+++ b/packages/core/src/QuestionGenerator.ts
 import { BaseLLMPredictor, ChatGPTLLMPredictor } from "./LLMPredictor";
-import { SimplePrompt, defaultSubQuestionPrompt } from "./Prompt";
+import {
+  BaseOutputParser,
+  StructuredOutput,
+  SubQuestionOutputParser,
+} from "./OutputParser";
+import {
+  SimplePrompt,
+  buildToolsText,
+  defaultSubQuestionPrompt,
+} from "./Prompt";
 import { ToolMetadata } from "./Tool";
 export interface SubQuestion {
@@ -14,16 +23,27 @@ export interface BaseQuestionGenerator {
 export class LLMQuestionGenerator implements BaseQuestionGenerator {
  llmPredictor: BaseLLMPredictor;
  prompt: SimplePrompt;
+  outputParser: BaseOutputParser<StructuredOutput<SubQuestion[]>>;
  constructor(init?: Partial<LLMQuestionGenerator>) {
    this.llmPredictor = init?.llmPredictor ?? new ChatGPTLLMPredictor();
    this.prompt = init?.prompt ?? defaultSubQuestionPrompt;
+    this.outputParser = init?.outputParser ?? new SubQuestionOutputParser();
  }
  async agenerate(
    tools: ToolMetadata[],
    query: string
  ): Promise<SubQuestion[]> {
-    throw new Error("Method not implemented.");
+    const toolsStr = buildToolsText(tools);
+    const queryStr = query;
+    const prediction = await this.llmPredictor.apredict(this.prompt, {
+      toolsStr,
+      queryStr,
+    });
+    const structuredOutput = this.outputParser.parse(prediction);
+    return structuredOutput.parsedOutput;
  }
 }