From 26a90435c77c6eb4f59a281c7b2ada1adf78c385 Mon Sep 17 00:00:00 2001
From: Elliot Kang <kkang2097@gmail.com>
Date: Mon, 11 Sep 2023 14:55:07 -0700
Subject: [PATCH] Revert "Simplified OutputParser"

This reverts commit ff0e831da9458ca2cc0ded63938e408245f786c8.
---
 .husky/pre-commit                 |  4 +++
 .husky/pre-push                   |  4 +++
 packages/core/src/OutputParser.ts | 52 +++++++++++++++++++++++++++----
 3 files changed, 54 insertions(+), 6 deletions(-)
 create mode 100755 .husky/pre-commit
 create mode 100755 .husky/pre-push

diff --git a/.husky/pre-commit b/.husky/pre-commit
new file mode 100755
index 000000000..58993aaee
--- /dev/null
+++ b/.husky/pre-commit
@@ -0,0 +1,4 @@
+#!/usr/bin/env sh
+. "$(dirname -- "$0")/_/husky.sh"
+
+pnpm lint
diff --git a/.husky/pre-push b/.husky/pre-push
new file mode 100755
index 000000000..af0cff7ed
--- /dev/null
+++ b/.husky/pre-push
@@ -0,0 +1,4 @@
+#!/usr/bin/env sh
+. "$(dirname -- "$0")/_/husky.sh"
+
+pnpm test
diff --git a/packages/core/src/OutputParser.ts b/packages/core/src/OutputParser.ts
index 1498f3f00..11bd7f023 100644
--- a/packages/core/src/OutputParser.ts
+++ b/packages/core/src/OutputParser.ts
@@ -56,14 +56,54 @@ class OutputParserError extends Error {
 function parseJsonMarkdown(text: string) {
   text = text.trim();
 
-  //This code is more general than the previous version, and should be faster.
-  const beginIndex = text.indexOf("[");
-  const endIndex = text.lastIndexOf("]");
-  const jsonText = text.substring(beginIndex, endIndex + 1);
-  try {
+  const beginDelimiter = "```json";
+  const endDelimiter = "```";
+
+  const beginIndex = text.indexOf(beginDelimiter);
+  const endIndex = text.indexOf(
+    endDelimiter,
+    beginIndex + beginDelimiter.length,
+  );
+  //Scenario 1: LLM follows instruction format. However, it doesn't always do this.
+  if (!(beginIndex === -1 || endIndex === -1)) {
+    const jsonText = text.substring(
+      beginIndex + beginDelimiter.length,
+      endIndex,
+    );
     return JSON.parse(jsonText);
+  }
+
+  //Scenario 2: LLM follows instruction format roughly, but doesn't do this exactly.
+  // For example: [```json] part was not returned, or there are irregular \n spaces.
+  try {
+    //This isn't a JSON markdown, but we should try again with something else.
+    //Try to get data_str to be a list of JSON objects
+    const new_data_str: string[] = text
+      .replace("[", " ")
+      .replace("]", " ")
+      .replace("\n", " ")
+      .trim()
+      //Warning: This regex might be slow.
+      .split(/(?=},)/g);
+    const arr_length = new_data_str.length;
+
+    //String formatting
+    //First to penultimate element
+    for (let i = 0; i < arr_length - 1; i++) {
+      new_data_str[i] += "}";
+    }
+    //Second to final element
+    for (let i = 1; i < arr_length; i++) {
+      new_data_str[i] = new_data_str[i].replace("},", " ");
+    }
+    const output: object[] = new_data_str.map((item) => JSON.parse(item));
+    return output;
   } catch (e) {
-    throw new OutputParserError("Not a json markdown", { output: text });
+    //In the worst case scenario and our options are exhausted, throw error.
+    throw new OutputParserError("Not a valid json", {
+      cause: e as Error,
+      output: text,
+    });
   }
 }
 
-- 
GitLab