From b179f61c6f3d0ee61ec34d8da4e6b826c0947d31 Mon Sep 17 00:00:00 2001
From: Elliot Kang <kkang2097@gmail.com>
Date: Sun, 10 Sep 2023 21:39:41 -0700
Subject: [PATCH] Update OutputParser.ts

Essentially, we're giving OutputParser an option to parse List[JSON object] in case our LLM doesn't give us the exact output we want.
---
 packages/core/src/OutputParser.ts | 34 ++++++++++++++++++++++++++++---
 1 file changed, 31 insertions(+), 3 deletions(-)

diff --git a/packages/core/src/OutputParser.ts b/packages/core/src/OutputParser.ts
index 32a2cdc4a..c067297d5 100644
--- a/packages/core/src/OutputParser.ts
+++ b/packages/core/src/OutputParser.ts
@@ -64,15 +64,43 @@ function parseJsonMarkdown(text: string) {
     endDelimiter,
     beginIndex + beginDelimiter.length,
   );
-  if (beginIndex === -1 || endIndex === -1) {
-    throw new OutputParserError("Not a json markdown", { output: text });
-  }
 
   const jsonText = text.substring(beginIndex + beginDelimiter.length, endIndex);
 
+  //Scenario 1: LLM follows instruction format. However, it doesn't always do this.
   try {
     return JSON.parse(jsonText);
   } catch (e) {
+    //Fall through
+  }
+
+  //Scenario 2: LLM follows instruction format roughly, but doesn't do this exactly.
+  // For example: [```json] part was not returned, or there are irregular \n spaces.
+  try {
+    //This isn't a JSON markdown, but we should try again with something else.
+    //Try to get data_str to be a list of JSON objects
+    const new_data_str: string[] = text
+      .replace("[", " ")
+      .replace("]", " ")
+      .replace("\n", " ")
+      .trim()
+      //Warning: This regex might be slow.
+      .split(/(?=},)/g);
+    const arr_length = new_data_str.length;
+
+    //String formatting
+    //First to penultimate element
+    for (let i = 0; i < arr_length - 1; i++) {
+      new_data_str[i] += "}";
+    }
+    //Second to final element
+    for (let i = 1; i < arr_length; i++) {
+      new_data_str[i] = new_data_str[i].replace("},", " ");
+    }
+    const output: object[] = new_data_str.map((item) => JSON.parse(item));
+    return output;
+  } catch (e) {
+    //In the worst case scenario and our options are exhausted, throw error.
     throw new OutputParserError("Not a valid json", {
       cause: e as Error,
       output: text,
-- 
GitLab