From 153836ff899c31e3fe548d2f3de23919fb317284 Mon Sep 17 00:00:00 2001
From: dylan1218 <dylan1218@live.com>
Date: Wed, 28 Feb 2024 12:00:14 -0500
Subject: [PATCH] JSON path prompt debug facilitation  (#9097)

---
 .../core/indices/struct_store/json_query.py   | 26 +++++++++++++++++++
 .../core/prompts/default_prompts.py           |  8 +++++-
 2 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/llama-index-core/llama_index/core/indices/struct_store/json_query.py b/llama-index-core/llama_index/core/indices/struct_store/json_query.py
index 4a76dcf513..8d8bee667d 100644
--- a/llama-index-core/llama_index/core/indices/struct_store/json_query.py
+++ b/llama-index-core/llama_index/core/indices/struct_store/json_query.py
@@ -1,5 +1,6 @@
 import json
 import logging
+import re
 from typing import Any, Callable, Dict, List, Optional, Union
 
 from llama_index.core.base.base_query_engine import BaseQueryEngine
@@ -43,6 +44,19 @@ DEFAULT_RESPONSE_SYNTHESIS_PROMPT = PromptTemplate(
 )
 
 
+def default_output_response_parser(llm_output: str) -> str:
+    """Attempts to parse the JSON path prompt output. Only applicable if the default prompt is used."""
+    try:
+        llm_output_parsed = re.search(
+            pattern=r"JSONPath:\s+(.*)", string=llm_output
+        ).groups()[0]
+    except Exception as exc:
+        raise ValueError(
+            f"JSON Path could not be parsed in the LLM response after the 'JSONPath' identifier. Try passing a custom JSON path prompt and processor."
+        ) from exc
+    return llm_output_parsed
+
+
 def default_output_processor(llm_output: str, json_value: JSONType) -> JSONType:
     """Default output processor that extracts values based on JSON Path expressions."""
     # Split the given string into separate JSON Path expressions
@@ -164,6 +178,12 @@ class JSONQueryEngine(BaseQueryEngine):
             **self._output_kwargs,
         )
 
+        # removes JSONPath: prefix from returned JSON path prompt call
+        if self._json_path_prompt == DEFAULT_JSON_PATH_PROMPT:
+            json_path_response_str = default_output_response_parser(
+                json_path_response_str
+            )
+
         if self._verbose:
             print_text(f"> JSONPath Output: {json_path_output}\n")
 
@@ -193,6 +213,12 @@ class JSONQueryEngine(BaseQueryEngine):
             query_str=query_bundle.query_str,
         )
 
+        # removes JSONPath: prefix from returned JSON path prompt call
+        if self._json_path_prompt == DEFAULT_JSON_PATH_PROMPT:
+            json_path_response_str = default_output_response_parser(
+                json_path_response_str
+            )
+
         if self._verbose:
             print_text(
                 f"> JSONPath Instructions:\n" f"```\n{json_path_response_str}\n```\n"
diff --git a/llama-index-core/llama_index/core/prompts/default_prompts.py b/llama-index-core/llama_index/core/prompts/default_prompts.py
index 997ec53165..3049970893 100644
--- a/llama-index-core/llama_index/core/prompts/default_prompts.py
+++ b/llama-index-core/llama_index/core/prompts/default_prompts.py
@@ -390,8 +390,14 @@ DEFAULT_JSON_PATH_TMPL = (
     "{schema}\n"
     "Given a task, respond with a JSON Path query that "
     "can retrieve data from a JSON value that matches the schema.\n"
+    "Provide the JSON Path query in the following format: 'JSONPath: <JSONPath>'\n"
+    "You must include the value 'JSONPath:' before the provided JSON Path query."
+    "Example Format:\n"
+    "Task: What is John's age?\n"
+    "Response: JSONPath: $.John.age\n"
+    "Let's try this now: \n\n"
     "Task: {query_str}\n"
-    "JSONPath: "
+    "Response: "
 )
 
 DEFAULT_JSON_PATH_PROMPT = PromptTemplate(
-- 
GitLab