From 5c53f41712785e5558156372bdc4f33a6326fa5f Mon Sep 17 00:00:00 2001 From: haarisedhi102 <52220274+haarisedhi102@users.noreply.github.com> Date: Sat, 16 Mar 2024 20:57:58 -0500 Subject: [PATCH] remove space from MarkdownNodeParser Header metadata (#11982) --- .../core/node_parser/file/markdown.py | 4 ++-- .../tests/node_parser/test_markdown.py | 16 ++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/llama-index-core/llama_index/core/node_parser/file/markdown.py b/llama-index-core/llama_index/core/node_parser/file/markdown.py index de4450d60..6f8ecb984 100644 --- a/llama-index-core/llama_index/core/node_parser/file/markdown.py +++ b/llama-index-core/llama_index/core/node_parser/file/markdown.py @@ -99,11 +99,11 @@ class MarkdownNodeParser(NodeParser): updated_headers = {} for i in range(1, new_header_level): - key = f"Header {i}" + key = f"Header_{i}" if key in headers_metadata: updated_headers[key] = headers_metadata[key] - updated_headers[f"Header {new_header_level}"] = new_header + updated_headers[f"Header_{new_header_level}"] = new_header return updated_headers def _build_node_from_split( diff --git a/llama-index-core/tests/node_parser/test_markdown.py b/llama-index-core/tests/node_parser/test_markdown.py index 46577747e..876aae23e 100644 --- a/llama-index-core/tests/node_parser/test_markdown.py +++ b/llama-index-core/tests/node_parser/test_markdown.py @@ -19,8 +19,8 @@ Header 2 content ] ) assert len(splits) == 2 - assert splits[0].metadata == {"Header 1": "Main Header"} - assert splits[1].metadata == {"Header 1": "Header 2"} + assert splits[0].metadata == {"Header_1": "Main Header"} + assert splits[1].metadata == {"Header_1": "Header 2"} assert splits[0].text == "Main Header\n\nHeader 1 content" assert splits[1].text == "Header 2\nHeader 2 content" @@ -80,11 +80,11 @@ Content ] ) assert len(splits) == 4 - assert splits[0].metadata == {"Header 1": "Main Header"} - assert splits[1].metadata == {"Header 1": "Main Header", "Header 2": "Sub-header"} + assert splits[0].metadata == {"Header_1": "Main Header"} + assert splits[1].metadata == {"Header_1": "Main Header", "Header_2": "Sub-header"} assert splits[2].metadata == { - "Header 1": "Main Header", - "Header 2": "Sub-header", - "Header 3": "Sub-sub header", + "Header_1": "Main Header", + "Header_2": "Sub-header", + "Header_3": "Sub-sub header", } - assert splits[3].metadata == {"Header 1": "New title"} + assert splits[3].metadata == {"Header_1": "New title"} -- GitLab