From b179f61c6f3d0ee61ec34d8da4e6b826c0947d31 Mon Sep 17 00:00:00 2001 From: Elliot Kang <kkang2097@gmail.com> Date: Sun, 10 Sep 2023 21:39:41 -0700 Subject: [PATCH] Update OutputParser.ts Essentially, we're giving OutputParser an option to parse List[JSON object] in case our LLM doesn't give us the exact output we want. --- packages/core/src/OutputParser.ts | 34 ++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/packages/core/src/OutputParser.ts b/packages/core/src/OutputParser.ts index 32a2cdc4a..c067297d5 100644 --- a/packages/core/src/OutputParser.ts +++ b/packages/core/src/OutputParser.ts @@ -64,15 +64,43 @@ function parseJsonMarkdown(text: string) { endDelimiter, beginIndex + beginDelimiter.length, ); - if (beginIndex === -1 || endIndex === -1) { - throw new OutputParserError("Not a json markdown", { output: text }); - } const jsonText = text.substring(beginIndex + beginDelimiter.length, endIndex); + //Scenario 1: LLM follows instruction format. However, it doesn't always do this. try { return JSON.parse(jsonText); } catch (e) { + //Fall through + } + + //Scenario 2: LLM follows instruction format roughly, but doesn't do this exactly. + // For example: [```json] part was not returned, or there are irregular \n spaces. + try { + //This isn't a JSON markdown, but we should try again with something else. + //Try to get data_str to be a list of JSON objects + const new_data_str: string[] = text + .replace("[", " ") + .replace("]", " ") + .replace("\n", " ") + .trim() + //Warning: This regex might be slow. + .split(/(?=},)/g); + const arr_length = new_data_str.length; + + //String formatting + //First to penultimate element + for (let i = 0; i < arr_length - 1; i++) { + new_data_str[i] += "}"; + } + //Second to final element + for (let i = 1; i < arr_length; i++) { + new_data_str[i] = new_data_str[i].replace("},", " "); + } + const output: object[] = new_data_str.map((item) => JSON.parse(item)); + return output; + } catch (e) { + //In the worst case scenario and our options are exhausted, throw error. throw new OutputParserError("Not a valid json", { cause: e as Error, output: text, -- GitLab