From 26a90435c77c6eb4f59a281c7b2ada1adf78c385 Mon Sep 17 00:00:00 2001 From: Elliot Kang <kkang2097@gmail.com> Date: Mon, 11 Sep 2023 14:55:07 -0700 Subject: [PATCH] Revert "Simplified OutputParser" This reverts commit ff0e831da9458ca2cc0ded63938e408245f786c8. --- .husky/pre-commit | 4 +++ .husky/pre-push | 4 +++ packages/core/src/OutputParser.ts | 52 +++++++++++++++++++++++++++---- 3 files changed, 54 insertions(+), 6 deletions(-) create mode 100755 .husky/pre-commit create mode 100755 .husky/pre-push diff --git a/.husky/pre-commit b/.husky/pre-commit new file mode 100755 index 000000000..58993aaee --- /dev/null +++ b/.husky/pre-commit @@ -0,0 +1,4 @@ +#!/usr/bin/env sh +. "$(dirname -- "$0")/_/husky.sh" + +pnpm lint diff --git a/.husky/pre-push b/.husky/pre-push new file mode 100755 index 000000000..af0cff7ed --- /dev/null +++ b/.husky/pre-push @@ -0,0 +1,4 @@ +#!/usr/bin/env sh +. "$(dirname -- "$0")/_/husky.sh" + +pnpm test diff --git a/packages/core/src/OutputParser.ts b/packages/core/src/OutputParser.ts index 1498f3f00..11bd7f023 100644 --- a/packages/core/src/OutputParser.ts +++ b/packages/core/src/OutputParser.ts @@ -56,14 +56,54 @@ class OutputParserError extends Error { function parseJsonMarkdown(text: string) { text = text.trim(); - //This code is more general than the previous version, and should be faster. - const beginIndex = text.indexOf("["); - const endIndex = text.lastIndexOf("]"); - const jsonText = text.substring(beginIndex, endIndex + 1); - try { + const beginDelimiter = "```json"; + const endDelimiter = "```"; + + const beginIndex = text.indexOf(beginDelimiter); + const endIndex = text.indexOf( + endDelimiter, + beginIndex + beginDelimiter.length, + ); + //Scenario 1: LLM follows instruction format. However, it doesn't always do this. + if (!(beginIndex === -1 || endIndex === -1)) { + const jsonText = text.substring( + beginIndex + beginDelimiter.length, + endIndex, + ); return JSON.parse(jsonText); + } + + //Scenario 2: LLM follows instruction format roughly, but doesn't do this exactly. + // For example: [```json] part was not returned, or there are irregular \n spaces. + try { + //This isn't a JSON markdown, but we should try again with something else. + //Try to get data_str to be a list of JSON objects + const new_data_str: string[] = text + .replace("[", " ") + .replace("]", " ") + .replace("\n", " ") + .trim() + //Warning: This regex might be slow. + .split(/(?=},)/g); + const arr_length = new_data_str.length; + + //String formatting + //First to penultimate element + for (let i = 0; i < arr_length - 1; i++) { + new_data_str[i] += "}"; + } + //Second to final element + for (let i = 1; i < arr_length; i++) { + new_data_str[i] = new_data_str[i].replace("},", " "); + } + const output: object[] = new_data_str.map((item) => JSON.parse(item)); + return output; } catch (e) { - throw new OutputParserError("Not a json markdown", { output: text }); + //In the worst case scenario and our options are exhausted, throw error. + throw new OutputParserError("Not a valid json", { + cause: e as Error, + output: text, + }); } } -- GitLab