From 477e0495778388b6c20c5b7aa62fb72dada18fe9 Mon Sep 17 00:00:00 2001
From: Ryan Peach <ryanpeach@users.noreply.github.com>
Date: Thu, 7 Mar 2024 23:26:36 -0500
Subject: [PATCH] Code Hierarchy Node Parser v0.10 (#10671)

---
 .../core/command_line/mappings.json           |    2 +
 .../.gitignore                                |    2 +
 .../llama-index-packs-code-hierarchy/BUILD    |    4 +
 .../README.md                                 |  136 ++
 .../CodeHierarchyNodeParserUsage.ipynb        | 1165 +++++++++++++++++
 .../llama_index/packs/code_hierarchy/BUILD    |    1 +
 .../packs/code_hierarchy/__init__.py          |   11 +
 .../llama_index/packs/code_hierarchy/base.py  |   43 +
 .../packs/code_hierarchy/code_hierarchy.py    |  845 ++++++++++++
 .../packs/code_hierarchy/query_engine.py      |  155 +++
 .../pyproject.toml                            |   53 +
 .../tests/BUILD                               |    3 +
 .../tests/__init__.py                         |    0
 .../tests/test_code_hierarchy_no_skeleton.py  |  705 ++++++++++
 .../test_code_hierarchy_with_skeleton.py      |  526 ++++++++
 .../tests/test_query_engine.py                |   97 ++
 .../tests/test_utility_methods.py             |  133 ++
 17 files changed, 3881 insertions(+)
 create mode 100644 llama-index-packs/llama-index-packs-code-hierarchy/.gitignore
 create mode 100644 llama-index-packs/llama-index-packs-code-hierarchy/BUILD
 create mode 100644 llama-index-packs/llama-index-packs-code-hierarchy/README.md
 create mode 100644 llama-index-packs/llama-index-packs-code-hierarchy/examples/CodeHierarchyNodeParserUsage.ipynb
 create mode 100644 llama-index-packs/llama-index-packs-code-hierarchy/llama_index/packs/code_hierarchy/BUILD
 create mode 100644 llama-index-packs/llama-index-packs-code-hierarchy/llama_index/packs/code_hierarchy/__init__.py
 create mode 100644 llama-index-packs/llama-index-packs-code-hierarchy/llama_index/packs/code_hierarchy/base.py
 create mode 100644 llama-index-packs/llama-index-packs-code-hierarchy/llama_index/packs/code_hierarchy/code_hierarchy.py
 create mode 100644 llama-index-packs/llama-index-packs-code-hierarchy/llama_index/packs/code_hierarchy/query_engine.py
 create mode 100644 llama-index-packs/llama-index-packs-code-hierarchy/pyproject.toml
 create mode 100644 llama-index-packs/llama-index-packs-code-hierarchy/tests/BUILD
 create mode 100644 llama-index-packs/llama-index-packs-code-hierarchy/tests/__init__.py
 create mode 100644 llama-index-packs/llama-index-packs-code-hierarchy/tests/test_code_hierarchy_no_skeleton.py
 create mode 100644 llama-index-packs/llama-index-packs-code-hierarchy/tests/test_code_hierarchy_with_skeleton.py
 create mode 100644 llama-index-packs/llama-index-packs-code-hierarchy/tests/test_query_engine.py
 create mode 100644 llama-index-packs/llama-index-packs-code-hierarchy/tests/test_utility_methods.py

diff --git a/llama-index-core/llama_index/core/command_line/mappings.json b/llama-index-core/llama_index/core/command_line/mappings.json
index 45d01daa27..a3f397589d 100644
--- a/llama-index-core/llama_index/core/command_line/mappings.json
+++ b/llama-index-core/llama_index/core/command_line/mappings.json
@@ -279,6 +279,8 @@
   "LLMQuestionGenerator": "llama_index.core.question_gen",
   "SubQuestionOutputParser": "llama_index.core.question_gen",
   "ReaderConfig": "llama_index.core.readers",
+  "CodeHierarchyNodeParser": "llama_index.packs.code_hierarchy",
+  "CodeHierarchyKeywordQueryEngine": "llama_index.packs.code_hierarchy",
   "StringIterableReader": "llama_index.core.readers",
   "ResponseMode": "llama_index.core.response_synthesizers",
   "BaseSynthesizer": "llama_index.core.response_synthesizers",
diff --git a/llama-index-packs/llama-index-packs-code-hierarchy/.gitignore b/llama-index-packs/llama-index-packs-code-hierarchy/.gitignore
new file mode 100644
index 0000000000..ff3ea062f2
--- /dev/null
+++ b/llama-index-packs/llama-index-packs-code-hierarchy/.gitignore
@@ -0,0 +1,2 @@
+.env
+poetry.lock
diff --git a/llama-index-packs/llama-index-packs-code-hierarchy/BUILD b/llama-index-packs/llama-index-packs-code-hierarchy/BUILD
new file mode 100644
index 0000000000..d665cd47d1
--- /dev/null
+++ b/llama-index-packs/llama-index-packs-code-hierarchy/BUILD
@@ -0,0 +1,4 @@
+poetry_requirements(
+    name="poetry",
+    module_mapping={"llama-index-readers-file": ["llama_index.readers.file", "llama_index.readers"]}
+)
diff --git a/llama-index-packs/llama-index-packs-code-hierarchy/README.md b/llama-index-packs/llama-index-packs-code-hierarchy/README.md
new file mode 100644
index 0000000000..92d6fb29e5
--- /dev/null
+++ b/llama-index-packs/llama-index-packs-code-hierarchy/README.md
@@ -0,0 +1,136 @@
+# CodeHierarchyAgentPack
+
+```bash
+# install
+pip install llama-index-packs-code-hierarchy
+
+# download source code
+llamaindex-cli download-llamapack CodeHierarchyAgentPack -d ./code_hierarchy_pack
+```
+
+The `CodeHierarchyAgentPack` is useful to split long code files into more reasonable chunks, while creating an agent on top to navigate the code. What this will do is create a "Hierarchy" of sorts, where sections of the code are made more reasonable by replacing the scope body with short comments telling the LLM to search for a referenced node if it wants to read that context body.
+
+Nodes in this hierarchy will be split based on scope, like function, class, or method scope, and will have links to their children and parents so the LLM can traverse the tree.
+
+```python
+from llama_index.core.text_splitter import CodeSplitter
+from llama_index.llms.openai import OpenAI
+from llama_index.packs.code_hierarchy import (
+    CodeHierarchyAgentPack,
+    CodeHierarchyNodeParser,
+)
+
+llm = OpenAI(model="gpt-4", temperature=0.2)
+
+documents = SimpleDirectoryReader(
+    input_files=[
+        Path("../llama_index/packs/code_hierarchy/code_hierarchy.py")
+    ],
+    file_metadata=lambda x: {"filepath": x},
+).load_data()
+
+split_nodes = CodeHierarchyNodeParser(
+    language="python",
+    # You can further parameterize the CodeSplitter to split the code
+    # into "chunks" that match your context window size using
+    # chunck_lines and max_chars parameters, here we just use the defaults
+    code_splitter=CodeSplitter(
+        language="python", max_chars=1000, chunk_lines=10
+    ),
+).get_nodes_from_documents(documents)
+
+pack = CodeHierarchyAgentPack(split_nodes=split_nodes, llm=llm)
+
+pack.run(
+    "How does the get_code_hierarchy_from_nodes function from the code hierarchy node parser work? Provide specific implementation details."
+)
+```
+
+A full example can be found [here in combination with `](https://github.com/run-llama/llama_index/blob/main/llama-index-packs/llama-index-packs-code-hierarchy/examples/CodeHierarchyNodeParserUsage.ipynb).
+
+## Repo Maps
+
+The pack contains a `CodeHierarchyKeywordQueryEngine` that uses a `CodeHierarchyNodeParser` to generate a map of a repository's structure and contents. This is useful for the LLM to understand the structure of a codebase, and to be able to reference specific files or directories.
+
+For example:
+
+- code_hierarchy
+  - \_SignatureCaptureType
+  - \_SignatureCaptureOptions
+  - \_ScopeMethod
+  - \_CommentOptions
+  - \_ScopeItem
+  - \_ChunkNodeOutput
+  - CodeHierarchyNodeParser
+    - class_name
+    - **init**
+    - \_get_node_name
+      - recur
+    - \_get_node_signature
+      - find_start
+      - find_end
+    - \_chunk_node
+    - get_code_hierarchy_from_nodes
+      - get_subdict
+      - recur_inclusive_scope
+      - dict_to_markdown
+    - \_parse_nodes
+    - \_get_indentation
+    - \_get_comment_text
+    - \_create_comment_line
+    - \_get_replacement_text
+    - \_skeletonize
+    - \_skeletonize_list
+      - recur
+
+## Usage as a Tool with an Agent
+
+You can create a tool for any agent using the nodes from the node parser:
+
+```python
+from llama_index.agent.openai import OpenAIAgent
+from llama_index.core.tools import QueryEngineTool
+from llama_index.packs.code_hierarchy import CodeHierarchyKeywordQueryEngine
+
+query_engine = CodeHierarchyKeywordQueryEngine(
+    nodes=split_nodes,
+)
+
+tool = QueryEngineTool.from_defaults(
+    query_engine=query_engine,
+    name="code_lookup",
+    description="Useful for looking up information about the code hierarchy codebase.",
+)
+
+agent = OpenAIAgent.from_tools(
+    [tool], system_prompt=query_engine.get_tool_instructions(), verbose=True
+)
+```
+
+## Adding new languages
+
+To add a new language you need to edit `_DEFAULT_SIGNATURE_IDENTIFIERS` in `code_hierarchy.py`.
+
+The docstrings are infomative as how you ought to do this and its nuances, it should work for most languages.
+
+Please **test your new language** by adding a new file to `tests/file/code/` and testing all your edge cases.
+
+People often ask "how do I find the Node Types I need for a new language?" The best way is to use breakpoints.
+I have added a comment `TIP: This is a wonderful place to put a debug breakpoint` in the `code_hierarchy.py` file, put a breakpoint there, input some code in the desired language, and step through it to find the name
+of the node you want to capture.
+
+The code as it is should handle any language which:
+
+1. expects you to indent deeper scopes
+2. has a way to comment, either full line or between delimiters
+
+## Future
+
+I'm considering adding all the languages from [aider](https://github.com/paul-gauthier/aider/tree/main/aider/queries)
+by incorporating `.scm` files instead of `_SignatureCaptureType`, `_SignatureCaptureOptions`, and `_DEFAULT_SIGNATURE_IDENTIFIERS`
+
+## Contributing
+
+You will need to set your `OPENAI_API_KEY` in your env to run the notebook or test the pack.
+
+You can run tests with `pytest tests` in the root directory of this pack.
diff --git a/llama-index-packs/llama-index-packs-code-hierarchy/examples/CodeHierarchyNodeParserUsage.ipynb b/llama-index-packs/llama-index-packs-code-hierarchy/examples/CodeHierarchyNodeParserUsage.ipynb
new file mode 100644
index 0000000000..2d5a02fccc
--- /dev/null
+++ b/llama-index-packs/llama-index-packs-code-hierarchy/examples/CodeHierarchyNodeParserUsage.ipynb
@@ -0,0 +1,1165 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Code Hierarchy Node Parser\n",
+    "\n",
+    "The `CodeHierarchyNodeParser` is useful to split long code files into more reasonable chunks. What this will do is create a \"Hierarchy\" of sorts, where sections of the code are made more reasonable by replacing the scope body with short comments telling the LLM to search for a referenced node if it wants to read that context body. This is called skeletonization, and is toggled by setting `skeleton` to `True` which it is by default. Nodes in this hierarchy will be split based on scope, like function, class, or method scope, and will have links to their children and parents so the LLM can traverse the tree.\n",
+    "\n",
+    "This notebook gives an initial demo of the pack, and then dives into a deeper technical exploration of how it works.\n",
+    "\n",
+    "**NOTE:** Currently, this pack is configured to only work with `OpenAI` LLMs. But feel free to copy/download the source code and edit as needed!"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Installation and Import\n",
+    "\n",
+    "First be sure to install the necessary [tree-sitter](https://tree-sitter.github.io/tree-sitter/) libraries."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Requirement already satisfied: tree-sitter in /home/ryanpeach/.pyenv/versions/3.11.6/envs/llama_index/lib/python3.11/site-packages (0.20.4)\n",
+      "Requirement already satisfied: tree-sitter-languages in /home/ryanpeach/.pyenv/versions/3.11.6/envs/llama_index/lib/python3.11/site-packages (1.10.2)\n",
+      "Requirement already satisfied: python-dotenv in /home/ryanpeach/.pyenv/versions/3.11.6/envs/llama_index/lib/python3.11/site-packages (1.0.1)\n",
+      "\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.0\u001b[0m\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
+     ]
+    }
+   ],
+   "source": [
+    "!pip install llama-index-packs-code-hierarchy"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from llama_index.core import SimpleDirectoryReader\n",
+    "from llama_index.core.text_splitter import CodeSplitter\n",
+    "from llama_index.llms.openai import OpenAI\n",
+    "from llama_index.packs.code_hierarchy import CodeHierarchyNodeParser\n",
+    "from llama_index.packs.code_hierarchy import CodeHierarchyAgentPack\n",
+    "from pathlib import Path"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from IPython.display import Markdown, display\n",
+    "\n",
+    "\n",
+    "def print_python(python_text):\n",
+    "    \"\"\"This function prints python text in ipynb nicely formatted.\"\"\"\n",
+    "    display(Markdown(\"```python\\n\" + python_text + \"```\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "\n",
+    "os.environ[\"OPENAI_API_KEY\"] = \"sk-...\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Initial Demo\n",
+    "\n",
+    "First, lets run the pack by using nodes from the included `CodeHierarchyNodeParser`, and from there, explore further how it actually works."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "llm = OpenAI(model=\"gpt-4\", temperature=0.2)\n",
+    "\n",
+    "documents = SimpleDirectoryReader(\n",
+    "    input_files=[Path(\"../llama_index/packs/code_hierarchy/code_hierarchy.py\")],\n",
+    "    file_metadata=lambda x: {\"filepath\": x},\n",
+    ").load_data()\n",
+    "\n",
+    "split_nodes = CodeHierarchyNodeParser(\n",
+    "    language=\"python\",\n",
+    "    # You can further parameterize the CodeSplitter to split the code\n",
+    "    # into \"chunks\" that match your context window size using\n",
+    "    # chunck_lines and max_chars parameters, here we just use the defaults\n",
+    "    code_splitter=CodeSplitter(language=\"python\", max_chars=1000, chunk_lines=10),\n",
+    ").get_nodes_from_documents(documents)\n",
+    "\n",
+    "pack = CodeHierarchyAgentPack(split_nodes=split_nodes, llm=llm)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Added user message to memory: How does the get_code_hierarchy_from_nodes function from the code hierarchy node parser work? Provide specific implementation details.\n",
+      "=== Calling Function ===\n",
+      "Calling function: code_search with args: {\n",
+      "  \"input\": \"get_code_hierarchy_from_nodes\"\n",
+      "}\n",
+      "Got output: def get_code_hierarchy_from_nodes(\n",
+      "        nodes: Sequence[BaseNode],\n",
+      "        max_depth: int = -1,\n",
+      "    ) -> Tuple[Dict[str, Any], str]:\n",
+      "# Code replaced for brevity. See node_id 1b2cbe9a-5846-4110-aaa5-26327110c9ab\n",
+      "========================\n",
+      "\n",
+      "=== Calling Function ===\n",
+      "Calling function: code_search with args: {\n",
+      "  \"input\": \"1b2cbe9a-5846-4110-aaa5-26327110c9ab\"\n",
+      "}\n",
+      "Got output: # Code replaced for brevity. See node_id ce774d77-8687-4ae5-af74-4a990c085362\n",
+      "\"\"\"\n",
+      "        Creates a code hierarchy appropriate to put into a tool description or context\n",
+      "        to make it easier to search for code.\n",
+      "\n",
+      "        Call after `get_nodes_from_documents` and pass that output to this function.\n",
+      "        \"\"\"\n",
+      "        out: Dict[str, Any] = defaultdict(dict)\n",
+      "\n",
+      "        def get_subdict(keys: list[str]) -> Dict[str, Any]:\n",
+      "            # Get the dictionary we are operating on\n",
+      "                # Code replaced for brevity. See node_id 409c28d7-6fc2-434e-99ad-e870bf22e963\n",
+      "\n",
+      "        def recur_inclusive_scope(node: BaseNode, i: int, keys: list[str]) -> None:\n",
+      "                # Code replaced for brevity. See node_id 0fb89089-8cb3-40ab-af65-d655b7511430\n",
+      "\n",
+      "        def dict_to_markdown(d: dict[str, Any], depth: int = 0) -> str:\n",
+      "                # Code replaced for brevity. See node_id 27ef7ca1-0709-4dfd-9cd0-0e2c6b930fa0\n",
+      "# Code replaced for brevity. See node_id e8fe25ee-d9bf-4fc0-adf5-6db6ec3020a0\n",
+      "========================\n",
+      "\n",
+      "=== Calling Function ===\n",
+      "Calling function: code_search with args: {\n",
+      "  \"input\": \"409c28d7-6fc2-434e-99ad-e870bf22e963\"\n",
+      "}\n",
+      "Got output: def get_subdict(keys: list[str]) -> Dict[str, Any]:\n",
+      "            # Get the dictionary we are operating on\n",
+      "            this_dict = out\n",
+      "            for key in keys:\n",
+      "                if key not in this_dict:\n",
+      "                    this_dict[key] = defaultdict(dict)\n",
+      "                this_dict = this_dict[key]\n",
+      "            return this_dict\n",
+      "========================\n",
+      "\n",
+      "=== Calling Function ===\n",
+      "Calling function: code_search with args: {\n",
+      "  \"input\": \"0fb89089-8cb3-40ab-af65-d655b7511430\"\n",
+      "}\n",
+      "Got output: def recur_inclusive_scope(node: BaseNode, i: int, keys: list[str]) -> None:\n",
+      "            if \"inclusive_scopes\" not in node.metadata:\n",
+      "                raise KeyError(\"inclusive_scopes not in node.metadata\")\n",
+      "            if i >= len(node.metadata[\"inclusive_scopes\"]):\n",
+      "                return\n",
+      "            scope = node.metadata[\"inclusive_scopes\"][i]\n",
+      "\n",
+      "            this_dict = get_subdict(keys)\n",
+      "\n",
+      "            if scope[\"name\"] not in this_dict:\n",
+      "                this_dict[scope[\"name\"]] = defaultdict(dict)\n",
+      "\n",
+      "            if i < max_depth or max_depth == -1:\n",
+      "                recur_inclusive_scope(node, i + 1, [*keys, scope[\"name\"]])\n",
+      "========================\n",
+      "\n",
+      "=== Calling Function ===\n",
+      "Calling function: code_search with args: {\n",
+      "  \"input\": \"27ef7ca1-0709-4dfd-9cd0-0e2c6b930fa0\"\n",
+      "}\n",
+      "Got output: def dict_to_markdown(d: dict[str, Any], depth: int = 0) -> str:\n",
+      "            markdown = \"\"\n",
+      "            indent = \"  \" * depth  # Two spaces per depth level\n",
+      "\n",
+      "            for key, value in d.items():\n",
+      "                if isinstance(value, dict):  # Check if value is a dict\n",
+      "                    # Add the key with a bullet point and increase depth for nested dicts\n",
+      "                    markdown += f\"{indent}- {key}\\n{dict_to_markdown(value, depth + 1)}\"\n",
+      "                else:\n",
+      "                    # Handle non-dict items if necessary\n",
+      "                    markdown += f\"{indent}- {key}: {value}\\n\"\n",
+      "\n",
+      "            return markdown\n",
+      "========================\n",
+      "\n",
+      "=== Calling Function ===\n",
+      "Calling function: code_search with args: {\n",
+      "  \"input\": \"e8fe25ee-d9bf-4fc0-adf5-6db6ec3020a0\"\n",
+      "}\n",
+      "Got output: # Code replaced for brevity. See node_id 1b2cbe9a-5846-4110-aaa5-26327110c9ab\n",
+      "for node in nodes:\n",
+      "            filepath = node.metadata[\"filepath\"].split(\"/\")\n",
+      "            filepath[-1] = filepath[-1].split(\".\")[0]\n",
+      "            recur_inclusive_scope(node, 0, filepath)\n",
+      "\n",
+      "        return out, dict_to_markdown(out)\n",
+      "========================\n",
+      "\n",
+      "The `get_code_hierarchy_from_nodes` function in the `CodeHierarchyNodeParser` class is used to create a code hierarchy that can be put into a tool description or context to make it easier to search for code. This function should be called after `get_nodes_from_documents` and the output of that function should be passed to this function.\n",
+      "\n",
+      "Here's a breakdown of how this function works:\n",
+      "\n",
+      "1. It initializes an empty dictionary `out`.\n",
+      "\n",
+      "2. It defines a helper function `get_subdict(keys)`, which navigates through the `out` dictionary using the provided keys, creating new dictionaries along the way if a key does not exist. It returns the final dictionary it navigates to.\n",
+      "\n",
+      "3. It defines another helper function `recur_inclusive_scope(node, i, keys)`, which recursively navigates through the inclusive scopes of a node and adds them to the `out` dictionary using `get_subdict(keys)`. If the maximum depth is reached or there are no more inclusive scopes, it stops the recursion.\n",
+      "\n",
+      "4. It defines a third helper function `dict_to_markdown(d, depth)`, which converts the `out` dictionary to a markdown string. It does this by iterating through the dictionary and adding each key-value pair to the string. If a value is a dictionary, it recursively calls `dict_to_markdown` on that dictionary and increases the depth.\n",
+      "\n",
+      "5. It then iterates over the provided nodes, splits the filepath of each node into a list of directories, and calls `recur_inclusive_scope` on each node with the filepath as the keys.\n",
+      "\n",
+      "6. Finally, it returns the `out` dictionary and the markdown string generated by `dict_to_markdown(out)`.\n",
+      "\n",
+      "This function is a part of the `CodeHierarchyNodeParser` class, which is used to parse code hierarchy nodes.\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\n",
+    "    pack.run(\n",
+    "        \"How does the get_code_hierarchy_from_nodes function from the code hierarchy node parser work? Provide specific implementation details.\"\n",
+    "    )\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can see that the agent explored the hierarchy of the code by requesting specific function names and IDs, in order to provide a full explanation of how the function works!"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Technical Explanations/Exploration"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Prepare your Data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Choose a directory you want to scan, and glob for all the code files you want to import.\n",
+    "\n",
+    "In this case I'm going to glob all \"*.py\" files in the `llama_index/node_parser` directory."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "documents = SimpleDirectoryReader(\n",
+    "    input_files=[Path(\"../llama_index/packs/code_hierarchy/code_hierarchy.py\")],\n",
+    "    file_metadata=lambda x: {\"filepath\": x},\n",
+    ").load_data()\n",
+    "\n",
+    "split_nodes = CodeHierarchyNodeParser(\n",
+    "    language=\"python\",\n",
+    "    # You can further parameterize the CodeSplitter to split the code\n",
+    "    # into \"chunks\" that match your context window size using\n",
+    "    # chunck_lines and max_chars parameters, here we just use the defaults\n",
+    "    code_splitter=CodeSplitter(language=\"python\", max_chars=1000, chunk_lines=10),\n",
+    ").get_nodes_from_documents(documents)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This should be the code hierarchy node parser itself. Lets have it parse itself!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Length of text: 33375\n"
+     ]
+    },
+    {
+     "data": {
+      "text/markdown": [
+       "```python\n",
+       "from collections import defaultdict\n",
+       "from enum import Enum\n",
+       "from tree_sitter import Node\n",
+       "from typing import Any, Dict, List, Optional, Sequence, Tuple\n",
+       "\n",
+       "\n",
+       "from llama_index.core.bridge.pydantic import BaseModel, Field\n",
+       "from llama_index.core.callbacks.base import CallbackManager\n",
+       "from llama_index.core.extractors.metadata_extractors import BaseExtractor\n",
+       "from llama_index.core.node_parser.interface import NodeParser\n",
+       "from llama_index.core.schema import BaseNode, NodeRelationship, TextNode\n",
+       "from llama_index.core.text_splitter import CodeSplitter\n",
+       "from llama_index.core.utils import get_tqdm_iterable\n",
+       "\n",
+       "\n",
+       "class _SignatureCaptureType(BaseModel):\n",
+       "    \"\"\"\n",
+       "    Unfortunately some languages need special options for how to make a signature.\n",
+       "\n",
+       "    For example, html element signatures should include their closing >, there is no\n",
+       "    easy way to include this using an always-exclusive system.\n",
+       "\n",
+       "    However, using an always-inclusive system, python decorators don't work,\n",
+       "    as there isn't an easy to define terminator for decorators that is inclusive\n",
+       "    to their signature.\n",
+       "    \"\"\"\n",
+       "\n",
+       "    type: str = Field(description=\"The type string to match on.\")\n",
+       "    inclusive: bool = Field(\n",
+       "        description=(\n",
+       "            \"Whether to include the text of the node matched by this type or not.\"\n",
+       "        ),\n",
+       "    )\n",
+       "\n",
+       "\n",
+       "class _SignatureCaptureOptions(BaseModel):\n",
+       "    \"\"\"\n",
+       "    Options for capturing the signature of a node.\n",
+       "    \"\"\"\n",
+       "\n",
+       "    start_signature_types: Optional[List[_SignatureCaptureType]] = Field(\n",
+       "        None,\n",
+       "        descripti\n",
+       "\n",
+       "# ...```"
+      ],
+      "text/plain": [
+       "<IPython.core.display.Markdown object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "print(f\"Length of text: {len(documents[0].text)}\")\n",
+    "print_python(documents[0].text[:1500] + \"\\n\\n# ...\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This is way too long to fit into the context of our LLM. So what are we to do? Well we will split it. We are going to use the `CodeHierarchyNodeParser` to split the nodes into more reasonable chunks."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Number of nodes after splitting: 90\n"
+     ]
+    }
+   ],
+   "source": [
+    "split_nodes = CodeHierarchyNodeParser(\n",
+    "    language=\"python\",\n",
+    "    # You can further parameterize the CodeSplitter to split the code\n",
+    "    # into \"chunks\" that match your context window size using\n",
+    "    # chunck_lines and max_chars parameters, here we just use the defaults\n",
+    "    code_splitter=CodeSplitter(language=\"python\", max_chars=1000, chunk_lines=10),\n",
+    ").get_nodes_from_documents(documents)\n",
+    "print(\"Number of nodes after splitting:\", len(split_nodes))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Great! So that split up our data from 1 node into quite a few nodes! Whats the max length of any of these nodes?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Longest text in nodes: 1152\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(f\"Longest text in nodes: {max(len(n.text) for n in split_nodes)}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "That's much shorter than before! Let's look at a sample."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/markdown": [
+       "```python\n",
+       "from collections import defaultdict\n",
+       "from enum import Enum\n",
+       "from tree_sitter import Node\n",
+       "from typing import Any, Dict, List, Optional, Sequence, Tuple\n",
+       "\n",
+       "\n",
+       "from llama_index.core.bridge.pydantic import BaseModel, Field\n",
+       "from llama_index.core.callbacks.base import CallbackManager\n",
+       "from llama_index.core.extractors.metadata_extractors import BaseExtractor\n",
+       "from llama_index.core.node_parser.interface import NodeParser\n",
+       "from llama_index.core.schema import BaseNode, NodeRelationship, TextNode\n",
+       "from llama_index.core.text_splitter import CodeSplitter\n",
+       "from llama_index.core.utils import get_tqdm_iterable\n",
+       "\n",
+       "\n",
+       "class _SignatureCaptureType(BaseModel):\n",
+       "    # Code replaced for brevity. See node_id b30b6043-4cba-420e-bd6b-e91beea08819\n",
+       "\n",
+       "\n",
+       "class _SignatureCaptureOptions(BaseModel):\n",
+       "    # Code replaced for brevity. See node_id e0961aad-bd9f-4295-927d-90ac6e2b06c8\n",
+       "# Code replaced for brevity. See node_id 0f6bc262-ef8b-4051-8c8e-486863e4cbe2```"
+      ],
+      "text/plain": [
+       "<IPython.core.display.Markdown object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "print_python(split_nodes[0].text)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Without even needing a long printout we can see everything this module imported in the first document (which is at the module level) and some classes it defines.\n",
+    "\n",
+    "We also see that it has put comments in place of code that was removed to make the text size more reasonable.\n",
+    "These can appear at the beginning or end of a chunk, or at a new scope level, like a class or function declaration.\n",
+    "\n",
+    "`# Code replaced for brevity. See node_id {node_id}`"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Code Hierarchy\n",
+    "\n",
+    "These scopes can be listed by the `CodeHierarchyNodeParser`, giving a \"repo map\" of sorts.\n",
+    "The namesake of this node parser, it creates a tree of scope names to use to search the code."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(defaultdict(<class 'dict'>, {'..': defaultdict(<class 'dict'>, {'llama_index': defaultdict(<class 'dict'>, {'packs': defaultdict(<class 'dict'>, {'code_hierarchy': defaultdict(<class 'dict'>, {'code_hierarchy': defaultdict(<class 'dict'>, {'_SignatureCaptureType': defaultdict(<class 'dict'>, {}), '_SignatureCaptureOptions': defaultdict(<class 'dict'>, {}), '_ScopeMethod': defaultdict(<class 'dict'>, {}), '_CommentOptions': defaultdict(<class 'dict'>, {}), '_ScopeItem': defaultdict(<class 'dict'>, {}), '_ChunkNodeOutput': defaultdict(<class 'dict'>, {}), 'CodeHierarchyNodeParser': defaultdict(<class 'dict'>, {'class_name': defaultdict(<class 'dict'>, {}), '__init__': defaultdict(<class 'dict'>, {}), '_get_node_name': defaultdict(<class 'dict'>, {'recur': defaultdict(<class 'dict'>, {})}), '_get_node_signature': defaultdict(<class 'dict'>, {'find_start': defaultdict(<class 'dict'>, {}), 'find_end': defaultdict(<class 'dict'>, {})}), '_chunk_node': defaultdict(<class 'dict'>, {}), 'get_code_hierarchy_from_nodes': defaultdict(<class 'dict'>, {'get_subdict': defaultdict(<class 'dict'>, {}), 'recur_inclusive_scope': defaultdict(<class 'dict'>, {}), 'dict_to_markdown': defaultdict(<class 'dict'>, {})}), '_parse_nodes': defaultdict(<class 'dict'>, {}), '_get_indentation': defaultdict(<class 'dict'>, {}), '_get_comment_text': defaultdict(<class 'dict'>, {}), '_create_comment_line': defaultdict(<class 'dict'>, {}), '_get_replacement_text': defaultdict(<class 'dict'>, {}), '_skeletonize': defaultdict(<class 'dict'>, {}), '_skeletonize_list': defaultdict(<class 'dict'>, {'recur': defaultdict(<class 'dict'>, {})})})})})})})})}), '- ..\\n  - llama_index\\n    - packs\\n      - code_hierarchy\\n        - code_hierarchy\\n          - _SignatureCaptureType\\n          - _SignatureCaptureOptions\\n          - _ScopeMethod\\n          - _CommentOptions\\n          - _ScopeItem\\n          - _ChunkNodeOutput\\n          - CodeHierarchyNodeParser\\n            - class_name\\n            - __init__\\n            - _get_node_name\\n              - recur\\n            - _get_node_signature\\n              - find_start\\n              - find_end\\n            - _chunk_node\\n            - get_code_hierarchy_from_nodes\\n              - get_subdict\\n              - recur_inclusive_scope\\n              - dict_to_markdown\\n            - _parse_nodes\\n            - _get_indentation\\n            - _get_comment_text\\n            - _create_comment_line\\n            - _get_replacement_text\\n            - _skeletonize\\n            - _skeletonize_list\\n              - recur\\n')\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(CodeHierarchyNodeParser.get_code_hierarchy_from_nodes(split_nodes))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Exploration by the Programmer\n",
+    "\n",
+    "So that we understand what is going on under the hood, what if we go to that node_id we found above?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Going to print the node with UUID: 6d205ded-3ee7-454a-9498-7d5f63963d4c\n"
+     ]
+    },
+    {
+     "data": {
+      "text/markdown": [
+       "```python\n",
+       "class CodeHierarchyNodeParser(NodeParser):\n",
+       "# Code replaced for brevity. See node_id 1b87e4b8-08ef-4b34-ac71-9fbcca8bed76```"
+      ],
+      "text/plain": [
+       "<IPython.core.display.Markdown object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "split_nodes_by_id = {n.node_id: n for n in split_nodes}\n",
+    "uuid_from_text = split_nodes[9].text.splitlines()[-1].split(\" \")[-1]\n",
+    "print(\"Going to print the node with UUID:\", uuid_from_text)\n",
+    "print_python(split_nodes_by_id[uuid_from_text].text)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This is the next split in the file. It is prepended with the node before it and appended with the node after it as a comment.\n",
+    "\n",
+    "We can also see the relationships on this node programmatically."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='6d205ded-3ee7-454a-9498-7d5f63963d4c', node_type=<ObjectType.TEXT: '1'>, metadata={'language': 'python', 'inclusive_scopes': [{'name': 'CodeHierarchyNodeParser', 'type': 'class_definition', 'signature': 'class CodeHierarchyNodeParser(NodeParser):'}], 'start_byte': 6241, 'end_byte': 33374, 'filepath': '../llama_index/packs/code_hierarchy/code_hierarchy.py'}, hash='714b8e8a6c2e99ae5f43521fe600587eda6d2cee8411082c4ba3255701ad443f'),\n",
+       " <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='1b87e4b8-08ef-4b34-ac71-9fbcca8bed76', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='443391a4ee2bdb50953f94fe46e8d93c8be044ea84c7cc30efdfd5a2234a3c6f'),\n",
+       " <NodeRelationship.CHILD: '5'>: [RelatedNodeInfo(node_id='c81c5ec6-02da-43f1-beab-70cdff2ea7e8', node_type=<ObjectType.TEXT: '1'>, metadata={'inclusive_scopes': [{'name': 'CodeHierarchyNodeParser', 'type': 'class_definition', 'signature': 'class CodeHierarchyNodeParser(NodeParser):'}, {'name': 'class_name', 'type': 'function_definition', 'signature': 'def class_name(cls) -> str:'}], 'start_byte': 6443, 'end_byte': 6545}, hash='390461d846c3220bc4f5323b1ca81cc5e6d5c2183ac5b61c78919be82574cfb9'),\n",
+       "  RelatedNodeInfo(node_id='d027c1a6-bae5-4c7f-be81-ca2e6fb53d67', node_type=<ObjectType.TEXT: '1'>, metadata={'inclusive_scopes': [{'name': 'CodeHierarchyNodeParser', 'type': 'class_definition', 'signature': 'class CodeHierarchyNodeParser(NodeParser):'}, {'name': '__init__', 'type': 'function_definition', 'signature': 'def __init__(\\n        self,\\n        language: str,\\n        skeleton: bool = True,\\n        signature_identifiers: Optional[Dict[str, _SignatureCaptureOptions]] = None,\\n        code_splitter: Optional[CodeSplitter] = None,\\n        callback_manager: Optional[CallbackManager] = None,\\n        metadata_extractor: Optional[BaseExtractor] = None,\\n        chunk_min_characters: int = 80,\\n    ):'}], 'start_byte': 7844, 'end_byte': 8992}, hash='89029f5dbeed6fb78c458c8b71a341bb97b9a76aa614b6f267e58ad3ec387556'),\n",
+       "  RelatedNodeInfo(node_id='affd8a83-a1bf-4451-9ce6-128080938210', node_type=<ObjectType.TEXT: '1'>, metadata={'inclusive_scopes': [{'name': 'CodeHierarchyNodeParser', 'type': 'class_definition', 'signature': 'class CodeHierarchyNodeParser(NodeParser):'}, {'name': '_get_node_name', 'type': 'function_definition', 'signature': 'def _get_node_name(self, node: Node) -> str:'}], 'start_byte': 8994, 'end_byte': 9541}, hash='af1f5405245e154c3bc13a9b796d6f6c4acf21a03431a3b2d160cf88fb9467f8'),\n",
+       "  RelatedNodeInfo(node_id='0ee4146b-58b1-4a93-ac31-c36268b4424c', node_type=<ObjectType.TEXT: '1'>, metadata={'inclusive_scopes': [{'name': 'CodeHierarchyNodeParser', 'type': 'class_definition', 'signature': 'class CodeHierarchyNodeParser(NodeParser):'}, {'name': '_get_node_signature', 'type': 'function_definition', 'signature': 'def _get_node_signature(self, text: str, node: Node) -> str:'}], 'start_byte': 9543, 'end_byte': 11135}, hash='9c6d0238ac9f394a428be03647887c2bc23b46bd108c0f8946da56e4742a6bda'),\n",
+       "  RelatedNodeInfo(node_id='10ed6803-e6d2-4d58-8dd9-52d60b570320', node_type=<ObjectType.TEXT: '1'>, metadata={'inclusive_scopes': [{'name': 'CodeHierarchyNodeParser', 'type': 'class_definition', 'signature': 'class CodeHierarchyNodeParser(NodeParser):'}, {'name': '_chunk_node', 'type': 'function_definition', 'signature': 'def _chunk_node(\\n        self,\\n        parent: Node,\\n        text: str,\\n        _context_list: Optional[List[_ScopeItem]] = None,\\n        _root: bool = True,\\n    ) -> _ChunkNodeOutput:'}], 'start_byte': 11137, 'end_byte': 17216}, hash='3c35fe3a0f12b328eeac49c925b39dffffa3d793cfae20a4413f5766b7ce0c1a'),\n",
+       "  RelatedNodeInfo(node_id='8399170e-d4d8-4a9e-8d82-8cecd8cf4afd', node_type=<ObjectType.TEXT: '1'>, metadata={'inclusive_scopes': [{'name': 'CodeHierarchyNodeParser', 'type': 'class_definition', 'signature': 'class CodeHierarchyNodeParser(NodeParser):'}, {'name': 'get_code_hierarchy_from_nodes', 'type': 'function_definition', 'signature': 'def get_code_hierarchy_from_nodes(\\n        nodes: Sequence[BaseNode],\\n        max_depth: int = -1,\\n    ) -> Tuple[Dict[str, Any], str]:'}], 'start_byte': 17236, 'end_byte': 19499}, hash='59235d7c8606a4b3bab9a5d404c1c4d2d8bb6e4468cd4ee01cf219881fc8fd8f'),\n",
+       "  RelatedNodeInfo(node_id='576ef5c2-5ef9-457d-acea-c61d11c6d4e6', node_type=<ObjectType.TEXT: '1'>, metadata={'inclusive_scopes': [{'name': 'CodeHierarchyNodeParser', 'type': 'class_definition', 'signature': 'class CodeHierarchyNodeParser(NodeParser):'}, {'name': '_parse_nodes', 'type': 'function_definition', 'signature': 'def _parse_nodes(\\n        self,\\n        nodes: Sequence[BaseNode],\\n        show_progress: bool = False,\\n        **kwargs: Any,\\n    ) -> List[BaseNode]:'}], 'start_byte': 19501, 'end_byte': 25472}, hash='9c392c57f7a24f2757e790522cbca7cab0ec53559567d3d011ce0f15ac78f170'),\n",
+       "  RelatedNodeInfo(node_id='c36a088a-5583-4d20-a6f6-c314d987b4c5', node_type=<ObjectType.TEXT: '1'>, metadata={'inclusive_scopes': [{'name': 'CodeHierarchyNodeParser', 'type': 'class_definition', 'signature': 'class CodeHierarchyNodeParser(NodeParser):'}, {'name': '_get_indentation', 'type': 'function_definition', 'signature': 'def _get_indentation(text: str) -> Tuple[str, int, int]:'}], 'start_byte': 25492, 'end_byte': 27817}, hash='78da4361d6dc2cd1d475e522f9036343d580e1072cd6f647bf404c4a6859d7d9'),\n",
+       "  RelatedNodeInfo(node_id='41c2f0ab-3ab3-4691-acf7-3c6f2162d10a', node_type=<ObjectType.TEXT: '1'>, metadata={'inclusive_scopes': [{'name': 'CodeHierarchyNodeParser', 'type': 'class_definition', 'signature': 'class CodeHierarchyNodeParser(NodeParser):'}, {'name': '_get_comment_text', 'type': 'function_definition', 'signature': 'def _get_comment_text(node: TextNode) -> str:'}], 'start_byte': 27837, 'end_byte': 28035}, hash='780851f89f59302c04bbbe13cb46dfa404e04ad7a3e568a4527e1c0a5dbb2812'),\n",
+       "  RelatedNodeInfo(node_id='99b4bfca-f48a-4cd7-aa73-fcaeb4fd4461', node_type=<ObjectType.TEXT: '1'>, metadata={'inclusive_scopes': [{'name': 'CodeHierarchyNodeParser', 'type': 'class_definition', 'signature': 'class CodeHierarchyNodeParser(NodeParser):'}, {'name': '_create_comment_line', 'type': 'function_definition', 'signature': 'def _create_comment_line(cls, node: TextNode, indention_lvl: int = -1) -> str:'}], 'start_byte': 28054, 'end_byte': 29175}, hash='d5267b1cdc5b60644125f7d196fea602471760a7063e67fb52f5eaadf798a246'),\n",
+       "  RelatedNodeInfo(node_id='3f4a4cc7-049d-4bf9-a0f1-9b546508af16', node_type=<ObjectType.TEXT: '1'>, metadata={'inclusive_scopes': [{'name': 'CodeHierarchyNodeParser', 'type': 'class_definition', 'signature': 'class CodeHierarchyNodeParser(NodeParser):'}, {'name': '_get_replacement_text', 'type': 'function_definition', 'signature': 'def _get_replacement_text(cls, child_node: TextNode) -> str:'}], 'start_byte': 29194, 'end_byte': 32017}, hash='423740218dc6072242982baf9cc1a3ba5676d9990f80ee41817f7e989aae752c'),\n",
+       "  RelatedNodeInfo(node_id='109e1549-e92a-4f76-b2f8-f564ba2fcb46', node_type=<ObjectType.TEXT: '1'>, metadata={'inclusive_scopes': [{'name': 'CodeHierarchyNodeParser', 'type': 'class_definition', 'signature': 'class CodeHierarchyNodeParser(NodeParser):'}, {'name': '_skeletonize', 'type': 'function_definition', 'signature': 'def _skeletonize(cls, parent_node: TextNode, child_node: TextNode) -> None:'}], 'start_byte': 32036, 'end_byte': 32702}, hash='cb348537cee003c69c8704dd203aaf0ff93408642ba8c961d52c43168f49c213'),\n",
+       "  RelatedNodeInfo(node_id='65505ced-5998-41af-9d22-9b8b6182395f', node_type=<ObjectType.TEXT: '1'>, metadata={'inclusive_scopes': [{'name': 'CodeHierarchyNodeParser', 'type': 'class_definition', 'signature': 'class CodeHierarchyNodeParser(NodeParser):'}, {'name': '_skeletonize_list', 'type': 'function_definition', 'signature': \"def _skeletonize_list(cls, nodes: List[TextNode]) -> None:\\n        # Create a convenient map for mapping node id's to nodes\"}], 'start_byte': 32721, 'end_byte': 33374}, hash='1b60cc9e6e64c851677274b3693e3fdbfd213c4910bf1d2da9a7d502c07d0008')],\n",
+       " <NodeRelationship.PARENT: '4'>: RelatedNodeInfo(node_id='1d5bf8bf-639b-486a-adc1-6cd9352eb275', node_type=<ObjectType.TEXT: '1'>, metadata={'language': 'python', 'inclusive_scopes': [], 'start_byte': 0, 'end_byte': 33375, 'filepath': '../llama_index/packs/code_hierarchy/code_hierarchy.py'}, hash='f76ef425d472658be552f10fd2279c0040c2d5ec83a05b7589d97013186e2241')}"
+      ]
+     },
+     "execution_count": null,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "split_nodes_by_id[uuid_from_text].relationships"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The `NEXT` `PREV` relationships come from the `CodeSplitter` which is a component of the `CodeHierarchyNodeParser`. It is responsible for cutting up the nodes into chunks that are a certain character length. For more information about the `CodeSplitter` read this:\n",
+    "\n",
+    "[Code Splitter](https://docs.llamaindex.ai/en/latest/api/llama_index.node_parser.CodeSplitter.html)\n",
+    "\n",
+    "The `PARENT` and `CHILD` relationships come from the `CodeHierarchyNodeParser` which is responsible for creating the hierarchy of nodes. Things like classes, functions, and methods are nodes in this hierarchy.\n",
+    "\n",
+    "The `SOURCE` is the original file that this node came from."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/markdown": [
+       "```python\n",
+       "# Code replaced for brevity. See node_id 6d205ded-3ee7-454a-9498-7d5f63963d4c\n",
+       "\"\"\"Split code using a AST parser.\n",
+       "\n",
+       "    Add metadata about the scope of the code block and relationships between\n",
+       "    code blocks.\n",
+       "    \"\"\"\n",
+       "\n",
+       "    @classmethod\n",
+       "    def class_name(cls) -> str:\n",
+       "        # Code replaced for brevity. See node_id c81c5ec6-02da-43f1-beab-70cdff2ea7e8\n",
+       "\n",
+       "    language: str = Field(\n",
+       "        description=\"The programming language of the code being split.\"\n",
+       "    )\n",
+       "    signature_identifiers: Dict[str, _SignatureCaptureOptions] = Field(\n",
+       "        description=(\n",
+       "            \"A dictionary mapping the type of a split mapped to the first and last type\"\n",
+       "            \" of itschildren which identify its signature.\"\n",
+       "        )\n",
+       "    )\n",
+       "    min_characters: int = Field(\n",
+       "        default=80,\n",
+       "        description=(\n",
+       "            \"Minimum number of characters per chunk.Defaults to 80 because that's about\"\n",
+       "            \" how long a replacement comment is in skeleton mode.\"\n",
+       "        ),\n",
+       "    )\n",
+       "# Code replaced for brevity. See node_id b5ffc7d6-b795-4304-9dcc-b31568291861```"
+      ],
+      "text/plain": [
+       "<IPython.core.display.Markdown object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "from llama_index.core.schema import NodeRelationship\n",
+    "\n",
+    "node_id = uuid_from_text\n",
+    "if NodeRelationship.NEXT not in split_nodes_by_id[node_id].relationships:\n",
+    "    print(\"No next node found!\")\n",
+    "else:\n",
+    "    next_node_relationship_info = split_nodes_by_id[node_id].relationships[\n",
+    "        NodeRelationship.NEXT\n",
+    "    ]\n",
+    "    next_node = split_nodes_by_id[next_node_relationship_info.node_id]\n",
+    "    print_python(next_node.text)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Keyword Table and Usage by the LLM\n",
+    "\n",
+    "Lets explore the use of this node parser in an index. We will be able to use any index which allows search by keyword, which should enable us to search for any node by it's uuid, or by any scope name.\n",
+    "\n",
+    "We have created a `CodeHierarchyKeywordQueryEngine` which will allow us to search for nodes by their uuid, or by their scope name. It's `.query` method can be used as a simple search tool for any LLM. Given the repo map we created earlier, or the text of a split file, the LLM should be able to figure out what to search for very naturally.\n",
+    "\n",
+    "Lets create the KeywordQueryEngine"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from llama_index.packs.code_hierarchy import CodeHierarchyKeywordQueryEngine\n",
+    "\n",
+    "query_engine = CodeHierarchyKeywordQueryEngine(\n",
+    "    nodes=split_nodes,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now we can get the same code as before."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/markdown": [
+       "```python\n",
+       "from collections import defaultdict\n",
+       "from enum import Enum\n",
+       "from tree_sitter import Node\n",
+       "from typing import Any, Dict, List, Optional, Sequence, Tuple\n",
+       "\n",
+       "\n",
+       "from llama_index.core.bridge.pydantic import BaseModel, Field\n",
+       "from llama_index.core.callbacks.base import CallbackManager\n",
+       "from llama_index.core.extractors.metadata_extractors import BaseExtractor\n",
+       "from llama_index.core.node_parser.interface import NodeParser\n",
+       "from llama_index.core.schema import BaseNode, NodeRelationship, TextNode\n",
+       "from llama_index.core.text_splitter import CodeSplitter\n",
+       "from llama_index.core.utils import get_tqdm_iterable\n",
+       "\n",
+       "\n",
+       "class _SignatureCaptureType(BaseModel):\n",
+       "    # Code replaced for brevity. See node_id b30b6043-4cba-420e-bd6b-e91beea08819\n",
+       "\n",
+       "\n",
+       "class _SignatureCaptureOptions(BaseModel):\n",
+       "    # Code replaced for brevity. See node_id e0961aad-bd9f-4295-927d-90ac6e2b06c8\n",
+       "# Code replaced for brevity. See node_id 0f6bc262-ef8b-4051-8c8e-486863e4cbe2```"
+      ],
+      "text/plain": [
+       "<IPython.core.display.Markdown object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "print_python(query_engine.query(split_nodes[0].node_id).response)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "But now we can also search for any node by it's common sense name.\n",
+    "\n",
+    "For example, the class `_SignatureCaptureOptions` is a node in the hierarchy. We can search for it by name.\n",
+    "\n",
+    "The reason we aren't getting more detail is because our min_characters is too low, try to increase it for more detail for any individual query."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/markdown": [
+       "```python\n",
+       "class _SignatureCaptureOptions(BaseModel):\n",
+       "# Code replaced for brevity. See node_id f3ccdeee-207a-4d71-9451-7a9aa93bec33```"
+      ],
+      "text/plain": [
+       "<IPython.core.display.Markdown object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "print_python(query_engine.query(\"_SignatureCaptureOptions\").response)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "And by module name, in case the LLM sees something in an import statement and wants to know more about it."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/markdown": [
+       "```python\n",
+       "from collections import defaultdict\n",
+       "from enum import Enum\n",
+       "from tree_sitter import Node\n",
+       "from typing import Any, Dict, List, Optional, Sequence, Tuple\n",
+       "\n",
+       "\n",
+       "from llama_index.core.bridge.pydantic import BaseModel, Field\n",
+       "from llama_index.core.callbacks.base import CallbackManager\n",
+       "from llama_index.core.extractors.metadata_extractors import BaseExtractor\n",
+       "from llama_index.core.node_parser.interface import NodeParser\n",
+       "from llama_index.core.schema import BaseNode, NodeRelationship, TextNode\n",
+       "from llama_index.core.text_splitter import CodeSplitter\n",
+       "from llama_index.core.utils import get_tqdm_iterable\n",
+       "\n",
+       "\n",
+       "class _SignatureCaptureType(BaseModel):\n",
+       "    # Code replaced for brevity. See node_id b30b6043-4cba-420e-bd6b-e91beea08819\n",
+       "\n",
+       "\n",
+       "class _SignatureCaptureOptions(BaseModel):\n",
+       "    # Code replaced for brevity. See node_id e0961aad-bd9f-4295-927d-90ac6e2b06c8\n",
+       "# Code replaced for brevity. See node_id 0f6bc262-ef8b-4051-8c8e-486863e4cbe2```"
+      ],
+      "text/plain": [
+       "<IPython.core.display.Markdown object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "print_python(query_engine.query(\"code_hierarchy\").response)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### As an Agent"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can convert the query engine to be used as a tool for an agent!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from llama_index.core.tools import QueryEngineTool\n",
+    "\n",
+    "tool = QueryEngineTool.from_defaults(\n",
+    "    query_engine=query_engine,\n",
+    "    name=\"code_lookup\",\n",
+    "    description=\"Useful for looking up information about the code hierarchy codebase.\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "There is also a helpful description of the tool here, which works best as a system prompt."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/markdown": [
+       "Description: Search the tool by any element in this list to get more information about that element.\n",
+       "If you see 'Code replaced for brevity' then a uuid, you may also search the tool with that uuid to see the full code.\n",
+       "You may need to use the tool multiple times to fully answer the user message.\n",
+       "The list is:\n",
+       "- ..\n",
+       "  - llama_index\n",
+       "    - packs\n",
+       "      - code_hierarchy\n",
+       "        - code_hierarchy\n",
+       "          - _SignatureCaptureType\n",
+       "          - _SignatureCaptureOptions\n",
+       "          - _ScopeMethod\n",
+       "          - _CommentOptions\n",
+       "          - _ScopeItem\n",
+       "          - _ChunkNodeOutput\n",
+       "          - CodeHierarchyNodeParser\n",
+       "            - class_name\n",
+       "            - __init__\n",
+       "            - _get_node_name\n",
+       "              - recur\n",
+       "            - _get_node_signature\n",
+       "              - find_start\n",
+       "              - find_end\n",
+       "            - _chunk_node\n",
+       "            - get_code_hierarchy_from_nodes\n",
+       "              - get_subdict\n",
+       "              - recur_inclusive_scope\n",
+       "              - dict_to_markdown\n",
+       "            - _parse_nodes\n",
+       "            - _get_indentation\n",
+       "            - _get_comment_text\n",
+       "            - _create_comment_line\n",
+       "            - _get_replacement_text\n",
+       "            - _skeletonize\n",
+       "            - _skeletonize_list\n",
+       "              - recur\n",
+       "\n"
+      ],
+      "text/plain": [
+       "<IPython.core.display.Markdown object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "display(Markdown(\"Description: \" + query_engine.get_tool_instructions()))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now lets finally actually make an agent!\n",
+    "\n",
+    "Note that this requires some complex reasoning, and works best with GPT-4-like LLMs."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from llama_index.agent.openai import OpenAIAgent\n",
+    "from llama_index.llms.openai import OpenAI\n",
+    "\n",
+    "llm = OpenAI(model=\"gpt-4\", temperature=0.1)\n",
+    "\n",
+    "agent = OpenAIAgent.from_tools(\n",
+    "    [tool], llm=llm, system_prompt=query_engine.get_tool_instructions(), verbose=True\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Added user message to memory: How does the get_code_hierarchy_from_nodes function from the code hierarchy node parser work? Provide specific implementation details.\n",
+      "=== Calling Function ===\n",
+      "Calling function: code_lookup with args: {\n",
+      "  \"input\": \"get_code_hierarchy_from_nodes\"\n",
+      "}\n",
+      "Got output: def get_code_hierarchy_from_nodes(\n",
+      "        nodes: Sequence[BaseNode],\n",
+      "        max_depth: int = -1,\n",
+      "    ) -> Tuple[Dict[str, Any], str]:\n",
+      "# Code replaced for brevity. See node_id edaed793-48f1-4926-b2d1-ee58c84d4266\n",
+      "========================\n",
+      "\n",
+      "=== Calling Function ===\n",
+      "Calling function: code_lookup with args: {\n",
+      "  \"input\": \"edaed793-48f1-4926-b2d1-ee58c84d4266\"\n",
+      "}\n",
+      "Got output: # Code replaced for brevity. See node_id 8399170e-d4d8-4a9e-8d82-8cecd8cf4afd\n",
+      "\"\"\"\n",
+      "        Creates a code hierarchy appropriate to put into a tool description or context\n",
+      "        to make it easier to search for code.\n",
+      "\n",
+      "        Call after `get_nodes_from_documents` and pass that output to this function.\n",
+      "        \"\"\"\n",
+      "        out: Dict[str, Any] = defaultdict(dict)\n",
+      "\n",
+      "        def get_subdict(keys: list[str]) -> Dict[str, Any]:\n",
+      "            # Get the dictionary we are operating on\n",
+      "                # Code replaced for brevity. See node_id 5cecc377-defc-4f8b-8e49-d33f668d4735\n",
+      "\n",
+      "        def recur_inclusive_scope(node: BaseNode, i: int, keys: list[str]) -> None:\n",
+      "                # Code replaced for brevity. See node_id 7274f0d7-f86f-4857-8ad3-e930cd272897\n",
+      "\n",
+      "        def dict_to_markdown(d: dict[str, Any], depth: int = 0) -> str:\n",
+      "                # Code replaced for brevity. See node_id c39b4dad-c48f-46ef-b961-1e244248e753\n",
+      "# Code replaced for brevity. See node_id 90d2a996-8f62-4587-9811-28eb02015142\n",
+      "========================\n",
+      "\n",
+      "=== Calling Function ===\n",
+      "Calling function: code_lookup with args: {\n",
+      "  \"input\": \"5cecc377-defc-4f8b-8e49-d33f668d4735\"\n",
+      "}\n",
+      "Got output: def get_subdict(keys: list[str]) -> Dict[str, Any]:\n",
+      "            # Get the dictionary we are operating on\n",
+      "            this_dict = out\n",
+      "            for key in keys:\n",
+      "                if key not in this_dict:\n",
+      "                    this_dict[key] = defaultdict(dict)\n",
+      "                this_dict = this_dict[key]\n",
+      "            return this_dict\n",
+      "========================\n",
+      "\n",
+      "=== Calling Function ===\n",
+      "Calling function: code_lookup with args: {\n",
+      "  \"input\": \"7274f0d7-f86f-4857-8ad3-e930cd272897\"\n",
+      "}\n",
+      "Got output: def recur_inclusive_scope(node: BaseNode, i: int, keys: list[str]) -> None:\n",
+      "            if \"inclusive_scopes\" not in node.metadata:\n",
+      "                raise KeyError(\"inclusive_scopes not in node.metadata\")\n",
+      "            if i >= len(node.metadata[\"inclusive_scopes\"]):\n",
+      "                return\n",
+      "            scope = node.metadata[\"inclusive_scopes\"][i]\n",
+      "\n",
+      "            this_dict = get_subdict(keys)\n",
+      "\n",
+      "            if scope[\"name\"] not in this_dict:\n",
+      "                this_dict[scope[\"name\"]] = defaultdict(dict)\n",
+      "\n",
+      "            if i < max_depth or max_depth == -1:\n",
+      "                recur_inclusive_scope(node, i + 1, [*keys, scope[\"name\"]])\n",
+      "========================\n",
+      "\n",
+      "=== Calling Function ===\n",
+      "Calling function: code_lookup with args: {\n",
+      "  \"input\": \"c39b4dad-c48f-46ef-b961-1e244248e753\"\n",
+      "}\n",
+      "Got output: def dict_to_markdown(d: dict[str, Any], depth: int = 0) -> str:\n",
+      "            markdown = \"\"\n",
+      "            indent = \"  \" * depth  # Two spaces per depth level\n",
+      "\n",
+      "            for key, value in d.items():\n",
+      "                if isinstance(value, dict):  # Check if value is a dict\n",
+      "                    # Add the key with a bullet point and increase depth for nested dicts\n",
+      "                    markdown += f\"{indent}- {key}\\n{dict_to_markdown(value, depth + 1)}\"\n",
+      "                else:\n",
+      "                    # Handle non-dict items if necessary\n",
+      "                    markdown += f\"{indent}- {key}: {value}\\n\"\n",
+      "\n",
+      "            return markdown\n",
+      "========================\n",
+      "\n",
+      "=== Calling Function ===\n",
+      "Calling function: code_lookup with args: {\n",
+      "  \"input\": \"90d2a996-8f62-4587-9811-28eb02015142\"\n",
+      "}\n",
+      "Got output: # Code replaced for brevity. See node_id edaed793-48f1-4926-b2d1-ee58c84d4266\n",
+      "for node in nodes:\n",
+      "            filepath = node.metadata[\"filepath\"].split(\"/\")\n",
+      "            filepath[-1] = filepath[-1].split(\".\")[0]\n",
+      "            recur_inclusive_scope(node, 0, filepath)\n",
+      "\n",
+      "        return out, dict_to_markdown(out)\n",
+      "========================\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "response = agent.chat(\n",
+    "    \"How does the get_code_hierarchy_from_nodes function from the code hierarchy node parser work? Provide specific implementation details.\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The `get_code_hierarchy_from_nodes` function in the `CodeHierarchyNodeParser` class creates a code hierarchy that can be used in a tool description or context to make it easier to search for code. This function should be called after `get_nodes_from_documents` and the output from that function should be passed to this function.\n",
+      "\n",
+      "The function takes two parameters:\n",
+      "- `nodes`: A sequence of `BaseNode` objects.\n",
+      "- `max_depth`: An integer specifying the maximum depth of the hierarchy. The default value is -1, which means there is no limit on the depth.\n",
+      "\n",
+      "The function starts by initializing an empty dictionary `out`.\n",
+      "\n",
+      "It then defines three helper functions:\n",
+      "\n",
+      "1. `get_subdict(keys)`: This function takes a list of keys and returns the sub-dictionary of `out` that corresponds to these keys. It creates any necessary intermediate dictionaries that don't exist.\n",
+      "\n",
+      "2. `recur_inclusive_scope(node, i, keys)`: This function recursively builds the code hierarchy. It takes a `BaseNode` object, an index `i`, and a list of keys. It checks if the node has \"inclusive_scopes\" in its metadata. If it does, it retrieves the scope at index `i` and adds it to the dictionary. If `i` is less than `max_depth` or `max_depth` is -1, it calls itself recursively with `i + 1` and the updated list of keys.\n",
+      "\n",
+      "3. `dict_to_markdown(d, depth)`: This function takes a dictionary `d` and an integer `depth`, and returns a string that represents the dictionary in Markdown format. It uses two spaces per depth level for indentation.\n",
+      "\n",
+      "After defining these helper functions, `get_code_hierarchy_from_nodes` iterates over the `nodes` and calls `recur_inclusive_scope` for each node with the node's filepath as the initial keys.\n",
+      "\n",
+      "Finally, it returns the dictionary `out` and its Markdown representation as a tuple.\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(str(response))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "llama_index",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/llama-index-packs/llama-index-packs-code-hierarchy/llama_index/packs/code_hierarchy/BUILD b/llama-index-packs/llama-index-packs-code-hierarchy/llama_index/packs/code_hierarchy/BUILD
new file mode 100644
index 0000000000..db46e8d6c9
--- /dev/null
+++ b/llama-index-packs/llama-index-packs-code-hierarchy/llama_index/packs/code_hierarchy/BUILD
@@ -0,0 +1 @@
+python_sources()
diff --git a/llama-index-packs/llama-index-packs-code-hierarchy/llama_index/packs/code_hierarchy/__init__.py b/llama-index-packs/llama-index-packs-code-hierarchy/llama_index/packs/code_hierarchy/__init__.py
new file mode 100644
index 0000000000..98cac47820
--- /dev/null
+++ b/llama-index-packs/llama-index-packs-code-hierarchy/llama_index/packs/code_hierarchy/__init__.py
@@ -0,0 +1,11 @@
+from llama_index.packs.code_hierarchy.base import CodeHierarchyAgentPack
+from llama_index.packs.code_hierarchy.code_hierarchy import CodeHierarchyNodeParser
+from llama_index.packs.code_hierarchy.query_engine import (
+    CodeHierarchyKeywordQueryEngine,
+)
+
+__all__ = [
+    "CodeHierarchyAgentPack",
+    "CodeHierarchyNodeParser",
+    "CodeHierarchyKeywordQueryEngine",
+]
diff --git a/llama-index-packs/llama-index-packs-code-hierarchy/llama_index/packs/code_hierarchy/base.py b/llama-index-packs/llama-index-packs-code-hierarchy/llama_index/packs/code_hierarchy/base.py
new file mode 100644
index 0000000000..d1deddd64d
--- /dev/null
+++ b/llama-index-packs/llama-index-packs-code-hierarchy/llama_index/packs/code_hierarchy/base.py
@@ -0,0 +1,43 @@
+from typing import Any, Dict, List
+
+from llama_index.agent.openai import OpenAIAgent
+from llama_index.core.llama_pack import BaseLlamaPack
+from llama_index.core.schema import BaseNode
+from llama_index.llms.openai import OpenAI
+from llama_index.core.tools import QueryEngineTool
+
+
+class CodeHierarchyAgentPack(BaseLlamaPack):
+    """Code hierarchy agent pack."""
+
+    def __init__(self, split_nodes: List[BaseNode], llm: OpenAI, verbose: bool = True):
+        """Initialize the code hierarchy agent pack."""
+        from llama_index.packs.code_hierarchy import CodeHierarchyKeywordQueryEngine
+
+        self.query_engine = CodeHierarchyKeywordQueryEngine(
+            nodes=split_nodes,
+        )
+
+        self.tool = QueryEngineTool.from_defaults(
+            query_engine=self.query_engine,
+            name="code_search",
+            description="Search the code hierarchy for a specific code element, using keywords or IDs.",
+        )
+
+        self.agent = OpenAIAgent.from_tools(
+            tools=[self.tool],
+            llm=llm,
+            system_prompt=self.query_engine.get_tool_instructions(),
+            verbose=verbose,
+        )
+
+    def get_modules(self) -> Dict[str, Any]:
+        return {
+            "query_engine": self.query_engine,
+            "tool": self.tool,
+            "agent": self.agent,
+        }
+
+    def run(self, user_message: str) -> str:
+        """Run the agent on the user message."""
+        return str(self.agent.chat(user_message))
diff --git a/llama-index-packs/llama-index-packs-code-hierarchy/llama_index/packs/code_hierarchy/code_hierarchy.py b/llama-index-packs/llama-index-packs-code-hierarchy/llama_index/packs/code_hierarchy/code_hierarchy.py
new file mode 100644
index 0000000000..0b14edbc81
--- /dev/null
+++ b/llama-index-packs/llama-index-packs-code-hierarchy/llama_index/packs/code_hierarchy/code_hierarchy.py
@@ -0,0 +1,845 @@
+from collections import defaultdict
+from enum import Enum
+from tree_sitter import Node
+from typing import Any, Dict, List, Optional, Sequence, Tuple
+
+
+from llama_index.core.bridge.pydantic import BaseModel, Field
+from llama_index.core.callbacks.base import CallbackManager
+from llama_index.core.extractors.metadata_extractors import BaseExtractor
+from llama_index.core.node_parser.interface import NodeParser
+from llama_index.core.schema import BaseNode, NodeRelationship, TextNode
+from llama_index.core.text_splitter import CodeSplitter
+from llama_index.core.utils import get_tqdm_iterable
+
+
+class _SignatureCaptureType(BaseModel):
+    """
+    Unfortunately some languages need special options for how to make a signature.
+
+    For example, html element signatures should include their closing >, there is no
+    easy way to include this using an always-exclusive system.
+
+    However, using an always-inclusive system, python decorators don't work,
+    as there isn't an easy to define terminator for decorators that is inclusive
+    to their signature.
+    """
+
+    type: str = Field(description="The type string to match on.")
+    inclusive: bool = Field(
+        description=(
+            "Whether to include the text of the node matched by this type or not."
+        ),
+    )
+
+
+class _SignatureCaptureOptions(BaseModel):
+    """
+    Options for capturing the signature of a node.
+    """
+
+    start_signature_types: Optional[List[_SignatureCaptureType]] = Field(
+        None,
+        description=(
+            "A list of node types any of which indicate the beginning of the signature."
+            "If this is none or empty, use the start_byte of the node."
+        ),
+    )
+    end_signature_types: Optional[List[_SignatureCaptureType]] = Field(
+        None,
+        description=(
+            "A list of node types any of which indicate the end of the signature."
+            "If this is none or empty, use the end_byte of the node."
+        ),
+    )
+    name_identifier: str = Field(
+        description=(
+            "The node type to use for the signatures 'name'.If retrieving the name is"
+            " more complicated than a simple type match, use a function which takes a"
+            " node and returns true or false as to whether its the name or not. The"
+            " first match is returned."
+        )
+    )
+
+
+"""
+Maps language -> Node Type -> SignatureCaptureOptions
+
+The best way for a developer to discover these is to put a breakpoint at the TIP
+tag in _chunk_node, and then create a unit test for some code, and then iterate
+through the code discovering the node names.
+"""
+_DEFAULT_SIGNATURE_IDENTIFIERS: Dict[str, Dict[str, _SignatureCaptureOptions]] = {
+    "python": {
+        "function_definition": _SignatureCaptureOptions(
+            end_signature_types=[_SignatureCaptureType(type="block", inclusive=False)],
+            name_identifier="identifier",
+        ),
+        "class_definition": _SignatureCaptureOptions(
+            end_signature_types=[_SignatureCaptureType(type="block", inclusive=False)],
+            name_identifier="identifier",
+        ),
+    },
+    "html": {
+        "element": _SignatureCaptureOptions(
+            start_signature_types=[_SignatureCaptureType(type="<", inclusive=True)],
+            end_signature_types=[_SignatureCaptureType(type=">", inclusive=True)],
+            name_identifier="tag_name",
+        )
+    },
+    "cpp": {
+        "class_specifier": _SignatureCaptureOptions(
+            end_signature_types=[_SignatureCaptureType(type="{", inclusive=False)],
+            name_identifier="type_identifier",
+        ),
+        "function_definition": _SignatureCaptureOptions(
+            end_signature_types=[_SignatureCaptureType(type="{", inclusive=False)],
+            name_identifier="function_declarator",
+        ),
+    },
+    "typescript": {
+        "interface_declaration": _SignatureCaptureOptions(
+            end_signature_types=[_SignatureCaptureType(type="{", inclusive=False)],
+            name_identifier="type_identifier",
+        ),
+        "lexical_declaration": _SignatureCaptureOptions(
+            end_signature_types=[_SignatureCaptureType(type="{", inclusive=False)],
+            name_identifier="identifier",
+        ),
+        "function_declaration": _SignatureCaptureOptions(
+            end_signature_types=[_SignatureCaptureType(type="{", inclusive=False)],
+            name_identifier="identifier",
+        ),
+        "class_declaration": _SignatureCaptureOptions(
+            end_signature_types=[_SignatureCaptureType(type="{", inclusive=False)],
+            name_identifier="type_identifier",
+        ),
+        "method_definition": _SignatureCaptureOptions(
+            end_signature_types=[_SignatureCaptureType(type="{", inclusive=False)],
+            name_identifier="property_identifier",
+        ),
+    },
+}
+
+
+class _ScopeMethod(Enum):
+    INDENTATION = "INDENTATION"
+    BRACKETS = "BRACKETS"
+    HTML_END_TAGS = "HTML_END_TAGS"
+
+
+class _CommentOptions(BaseModel):
+    comment_template: str
+    scope_method: _ScopeMethod
+
+
+_COMMENT_OPTIONS: Dict[str, _CommentOptions] = {
+    "cpp": _CommentOptions(
+        comment_template="// {}", scope_method=_ScopeMethod.BRACKETS
+    ),
+    "html": _CommentOptions(
+        comment_template="<!-- {} -->", scope_method=_ScopeMethod.HTML_END_TAGS
+    ),
+    "python": _CommentOptions(
+        comment_template="# {}", scope_method=_ScopeMethod.INDENTATION
+    ),
+    "typescript": _CommentOptions(
+        comment_template="// {}", scope_method=_ScopeMethod.BRACKETS
+    ),
+}
+
+assert all(
+    language in _DEFAULT_SIGNATURE_IDENTIFIERS for language in _COMMENT_OPTIONS
+), "Not all languages in _COMMENT_OPTIONS are in _DEFAULT_SIGNATURE_IDENTIFIERS"
+assert all(
+    language in _COMMENT_OPTIONS for language in _DEFAULT_SIGNATURE_IDENTIFIERS
+), "Not all languages in _DEFAULT_SIGNATURE_IDENTIFIERS are in _COMMENT_OPTIONS"
+
+
+class _ScopeItem(BaseModel):
+    """Like a Node from tree_sitter, but with only the str information we need."""
+
+    name: str
+    type: str
+    signature: str
+
+
+class _ChunkNodeOutput(BaseModel):
+    """The output of a chunk_node call."""
+
+    this_document: Optional[TextNode]
+    upstream_children_documents: List[TextNode]
+    all_documents: List[TextNode]
+
+
+class CodeHierarchyNodeParser(NodeParser):
+    """Split code using a AST parser.
+
+    Add metadata about the scope of the code block and relationships between
+    code blocks.
+    """
+
+    @classmethod
+    def class_name(cls) -> str:
+        """Get class name."""
+        return "CodeHierarchyNodeParser"
+
+    language: str = Field(
+        description="The programming language of the code being split."
+    )
+    signature_identifiers: Dict[str, _SignatureCaptureOptions] = Field(
+        description=(
+            "A dictionary mapping the type of a split mapped to the first and last type"
+            " of itschildren which identify its signature."
+        )
+    )
+    min_characters: int = Field(
+        default=80,
+        description=(
+            "Minimum number of characters per chunk.Defaults to 80 because that's about"
+            " how long a replacement comment is in skeleton mode."
+        ),
+    )
+    code_splitter: Optional[CodeSplitter] = Field(
+        description="The text splitter to use when splitting documents."
+    )
+    metadata_extractor: Optional[BaseExtractor] = Field(
+        default=None, description="Metadata extraction pipeline to apply to nodes."
+    )
+    callback_manager: CallbackManager = Field(
+        default_factory=CallbackManager, exclude=True
+    )
+    skeleton: bool = Field(
+        True,
+        description=(
+            "Parent nodes have the text of their child nodes replaced with a signature"
+            " and a comment instructing the language model to visit the child node for"
+            " the full text of the scope."
+        ),
+    )
+
+    def __init__(
+        self,
+        language: str,
+        skeleton: bool = True,
+        signature_identifiers: Optional[Dict[str, _SignatureCaptureOptions]] = None,
+        code_splitter: Optional[CodeSplitter] = None,
+        callback_manager: Optional[CallbackManager] = None,
+        metadata_extractor: Optional[BaseExtractor] = None,
+        chunk_min_characters: int = 80,
+    ):
+        callback_manager = callback_manager or CallbackManager([])
+
+        if signature_identifiers is None:
+            try:
+                signature_identifiers = _DEFAULT_SIGNATURE_IDENTIFIERS[language]
+            except KeyError:
+                raise ValueError(
+                    f"Must provide signature_identifiers for language {language}."
+                )
+
+        super().__init__(
+            include_prev_next_rel=False,
+            language=language,
+            callback_manager=callback_manager,
+            metadata_extractor=metadata_extractor,
+            code_splitter=code_splitter,
+            signature_identifiers=signature_identifiers,
+            min_characters=chunk_min_characters,
+            skeleton=skeleton,
+        )
+
+    def _get_node_name(self, node: Node) -> str:
+        """Get the name of a node."""
+        signature_identifier = self.signature_identifiers[node.type]
+
+        def recur(node: Node) -> str:
+            for child in node.children:
+                if child.type == signature_identifier.name_identifier:
+                    return child.text.decode()
+                if child.children:
+                    out = recur(child)
+                    if out:
+                        return out
+            return ""
+
+        return recur(node).strip()
+
+    def _get_node_signature(self, text: str, node: Node) -> str:
+        """Get the signature of a node."""
+        signature_identifier = self.signature_identifiers[node.type]
+
+        def find_start(node: Node) -> Optional[int]:
+            if not signature_identifier.start_signature_types:
+                signature_identifier.start_signature_types = []
+
+            for st in signature_identifier.start_signature_types:
+                if node.type == st.type:
+                    if st.inclusive:
+                        return node.start_byte
+                    return node.end_byte
+
+            for child in node.children:
+                out = find_start(child)
+                if out is not None:
+                    return out
+
+            return None
+
+        def find_end(node: Node) -> Optional[int]:
+            if not signature_identifier.end_signature_types:
+                signature_identifier.end_signature_types = []
+
+            for st in signature_identifier.end_signature_types:
+                if node.type == st.type:
+                    if st.inclusive:
+                        return node.end_byte
+                    return node.start_byte
+
+            for child in node.children:
+                out = find_end(child)
+                if out is not None:
+                    return out
+
+            return None
+
+        start_byte, end_byte = find_start(node), find_end(node)
+        if start_byte is None:
+            start_byte = node.start_byte
+        if end_byte is None:
+            end_byte = node.end_byte
+        return text[start_byte:end_byte].strip()
+
+    def _chunk_node(
+        self,
+        parent: Node,
+        text: str,
+        _context_list: Optional[List[_ScopeItem]] = None,
+        _root: bool = True,
+    ) -> _ChunkNodeOutput:
+        """
+        This is really the "main" method of this class. It is recursive and recursively
+        chunks the text by the options identified in self.signature_identifiers.
+
+        It is ran by get_nodes_from_documents.
+
+        Args:
+            parent (Node): The parent node to chunk
+            text (str): The text of the entire document
+            _context_list (Optional[List[_ScopeItem]]): The scope context of the
+                                                        parent node
+            _root (bool): Whether or not this is the root node
+        """
+        if _context_list is None:
+            _context_list = []
+
+        upstream_children_documents: List[TextNode] = []
+        all_documents: List[TextNode] = []
+
+        # Capture any whitespace before parent.start_byte
+        # Very important for space sensitive languages like python
+        start_byte = parent.start_byte
+        while start_byte > 0 and text[start_byte - 1] in (" ", "\t"):
+            start_byte -= 1
+
+        # Create this node
+        current_chunk = text[start_byte : parent.end_byte]
+
+        # Return early if the chunk is too small
+        if len(current_chunk) < self.min_characters and not _root:
+            return _ChunkNodeOutput(
+                this_document=None, all_documents=[], upstream_children_documents=[]
+            )
+
+        # TIP: This is a wonderful place to put a debug breakpoint when
+        #      Trying to integrate a new language. Pay attention to parent.type to learn
+        #      all the available node types and their hierarchy.
+        if parent.type in self.signature_identifiers or _root:
+            # Get the new context
+            if not _root:
+                new_context = _ScopeItem(
+                    name=self._get_node_name(parent),
+                    type=parent.type,
+                    signature=self._get_node_signature(text=text, node=parent),
+                )
+                _context_list.append(new_context)
+            this_document = TextNode(
+                text=current_chunk,
+                metadata={
+                    "inclusive_scopes": [cl.dict() for cl in _context_list],
+                    "start_byte": start_byte,
+                    "end_byte": parent.end_byte,
+                },
+                relationships={
+                    NodeRelationship.CHILD: [],
+                },
+            )
+            all_documents.append(this_document)
+        else:
+            this_document = None
+
+        # Iterate over children
+        for child in parent.children:
+            if child.children:
+                # Recurse on the child
+                next_chunks = self._chunk_node(
+                    child, text, _context_list=_context_list.copy(), _root=False
+                )
+
+                # If there is a this_document, then we need
+                # to add the children to this_document
+                # and flush upstream_children_documents
+                if this_document is not None:
+                    # If we have been given a document, that means it's children
+                    # are already set, so it needs to become a child of this node
+                    if next_chunks.this_document is not None:
+                        assert not next_chunks.upstream_children_documents, (
+                            "next_chunks.this_document and"
+                            " next_chunks.upstream_children_documents are exclusive."
+                        )
+                        this_document.relationships[
+                            NodeRelationship.CHILD
+                        ].append(  # type: ignore
+                            next_chunks.this_document.as_related_node_info()
+                        )
+                        next_chunks.this_document.relationships[
+                            NodeRelationship.PARENT
+                        ] = this_document.as_related_node_info()
+                    # Otherwise, we have been given a list of
+                    # upstream_children_documents. We need to make
+                    # them a child of this node
+                    else:
+                        for d in next_chunks.upstream_children_documents:
+                            this_document.relationships[
+                                NodeRelationship.CHILD
+                            ].append(  # type: ignore
+                                d.as_related_node_info()
+                            )
+                            d.relationships[
+                                NodeRelationship.PARENT
+                            ] = this_document.as_related_node_info()
+                # Otherwise we pass the children upstream
+                else:
+                    # If we have been given a document, that means it's
+                    # children are already set, so it needs to become a
+                    # child of the next node
+                    if next_chunks.this_document is not None:
+                        assert not next_chunks.upstream_children_documents, (
+                            "next_chunks.this_document and"
+                            " next_chunks.upstream_children_documents are exclusive."
+                        )
+                        upstream_children_documents.append(next_chunks.this_document)
+                    # Otherwise, we have leftover children, they need
+                    # to become children of the next node
+                    else:
+                        upstream_children_documents.extend(
+                            next_chunks.upstream_children_documents
+                        )
+
+                # Lastly we need to maintain all documents
+                all_documents.extend(next_chunks.all_documents)
+
+        return _ChunkNodeOutput(
+            this_document=this_document,
+            upstream_children_documents=upstream_children_documents,
+            all_documents=all_documents,
+        )
+
+    @staticmethod
+    def get_code_hierarchy_from_nodes(
+        nodes: Sequence[BaseNode],
+        max_depth: int = -1,
+    ) -> Tuple[Dict[str, Any], str]:
+        """
+        Creates a code hierarchy appropriate to put into a tool description or context
+        to make it easier to search for code.
+
+        Call after `get_nodes_from_documents` and pass that output to this function.
+        """
+        out: Dict[str, Any] = defaultdict(dict)
+
+        def get_subdict(keys: List[str]) -> Dict[str, Any]:
+            # Get the dictionary we are operating on
+            this_dict = out
+            for key in keys:
+                if key not in this_dict:
+                    this_dict[key] = defaultdict(dict)
+                this_dict = this_dict[key]
+            return this_dict
+
+        def recur_inclusive_scope(node: BaseNode, i: int, keys: List[str]) -> None:
+            if "inclusive_scopes" not in node.metadata:
+                raise KeyError("inclusive_scopes not in node.metadata")
+            if i >= len(node.metadata["inclusive_scopes"]):
+                return
+            scope = node.metadata["inclusive_scopes"][i]
+
+            this_dict = get_subdict(keys)
+
+            if scope["name"] not in this_dict:
+                this_dict[scope["name"]] = defaultdict(dict)
+
+            if i < max_depth or max_depth == -1:
+                recur_inclusive_scope(node, i + 1, [*keys, scope["name"]])
+
+        def dict_to_markdown(d: Dict[str, Any], depth: int = 0) -> str:
+            markdown = ""
+            indent = "  " * depth  # Two spaces per depth level
+
+            for key, value in d.items():
+                if isinstance(value, dict):  # Check if value is a dict
+                    # Add the key with a bullet point and increase depth for nested dicts
+                    markdown += f"{indent}- {key}\n{dict_to_markdown(value, depth + 1)}"
+                else:
+                    # Handle non-dict items if necessary
+                    markdown += f"{indent}- {key}: {value}\n"
+
+            return markdown
+
+        for node in nodes:
+            filepath = node.metadata["filepath"].split("/")
+            filepath[-1] = filepath[-1].split(".")[0]
+            recur_inclusive_scope(node, 0, filepath)
+
+        return out, dict_to_markdown(out)
+
+    def _parse_nodes(
+        self,
+        nodes: Sequence[BaseNode],
+        show_progress: bool = False,
+        **kwargs: Any,
+    ) -> List[BaseNode]:
+        """
+        The main public method of this class.
+
+        Parse documents into nodes.
+        """
+        out: List[BaseNode] = []
+
+        try:
+            import tree_sitter_languages
+        except ImportError:
+            raise ImportError(
+                "Please install tree_sitter_languages to use CodeSplitter."
+            )
+
+        try:
+            parser = tree_sitter_languages.get_parser(self.language)
+        except Exception as e:
+            print(
+                f"Could not get parser for language {self.language}. Check "
+                "https://github.com/grantjenks/py-tree-sitter-languages#license "
+                "for a list of valid languages."
+            )
+            raise e  # noqa: TRY201
+
+        nodes_with_progress = get_tqdm_iterable(
+            nodes, show_progress, "Parsing documents into nodes"
+        )
+        for node in nodes_with_progress:
+            text = node.text
+            tree = parser.parse(bytes(text, "utf-8"))
+
+            if (
+                not tree.root_node.children
+                or tree.root_node.children[0].type != "ERROR"
+            ):
+                # Chunk the code
+                _chunks = self._chunk_node(tree.root_node, node.text)
+                assert _chunks.this_document is not None, "Root node must be a chunk"
+                chunks = _chunks.all_documents
+
+                # Add your metadata to the chunks here
+                for chunk in chunks:
+                    chunk.metadata = {
+                        "language": self.language,
+                        **chunk.metadata,
+                        **node.metadata,
+                    }
+                    chunk.relationships[
+                        NodeRelationship.SOURCE
+                    ] = node.as_related_node_info()
+
+                if self.skeleton:
+                    self._skeletonize_list(chunks)
+
+                # Now further split the code by lines and characters
+                # TODO: Test this and the relationships it creates
+                if self.code_splitter:
+                    new_nodes = []
+                    for original_node in chunks:
+                        new_split_nodes = self.code_splitter.get_nodes_from_documents(
+                            [original_node], show_progress=show_progress, **kwargs
+                        )
+
+                        # Force the first new_split_node to have the
+                        # same id as the original_node
+                        new_split_nodes[0].id_ = original_node.id_
+
+                        # Add the UUID of the next node to the end of all nodes
+                        for i, new_split_node in enumerate(new_split_nodes[:-1]):
+                            new_split_node.text = (
+                                new_split_node.text
+                                + "\n"
+                                + self._create_comment_line(new_split_nodes[i + 1], 0)
+                            ).strip()
+
+                        # Add the UUID of the previous node to the beginning of all nodes
+                        for i, new_split_node in enumerate(new_split_nodes[1:]):
+                            new_split_node.text = (
+                                self._create_comment_line(new_split_nodes[i])
+                                + new_split_node.text
+                            ).strip()
+
+                        # Add the parent child info to all the new_nodes_
+                        # derived from node
+                        for new_split_node in new_split_nodes:
+                            new_split_node.relationships[
+                                NodeRelationship.CHILD
+                            ] = original_node.child_nodes  # type: ignore
+                            new_split_node.relationships[
+                                NodeRelationship.PARENT
+                            ] = original_node.parent_node  # type: ignore
+
+                        # Go through chunks and replace all
+                        # instances of node.node_id in relationships
+                        # with new_nodes_[0].node_id
+                        for old_node in chunks:
+                            # Handle child nodes, which are a list
+                            new_children = []
+                            for old_nodes_child in old_node.child_nodes or []:
+                                if old_nodes_child.node_id == original_node.node_id:
+                                    new_children.append(
+                                        new_split_nodes[0].as_related_node_info()
+                                    )
+                                new_children.append(old_nodes_child)
+                            old_node.relationships[
+                                NodeRelationship.CHILD
+                            ] = new_children
+
+                            # Handle parent node
+                            if (
+                                old_node.parent_node
+                                and old_node.parent_node.node_id
+                                == original_node.node_id
+                            ):
+                                old_node.relationships[
+                                    NodeRelationship.PARENT
+                                ] = new_split_nodes[0].as_related_node_info()
+
+                        # Now save new_nodes_
+                        new_nodes += new_split_nodes
+
+                    chunks = new_nodes
+
+                # Or just extract metadata
+                if self.metadata_extractor:
+                    chunks = self.metadata_extractor.process_nodes(  # type: ignore
+                        chunks
+                    )
+
+                out += chunks
+            else:
+                raise ValueError(f"Could not parse code with language {self.language}.")
+
+        return out
+
+    @staticmethod
+    def _get_indentation(text: str) -> Tuple[str, int, int]:
+        indent_char = None
+        minimum_chain = None
+
+        # Check that text is at least 1 line long
+        text_split = text.splitlines()
+        if len(text_split) == 0:
+            raise ValueError("Text should be at least one line long.")
+
+        for line in text_split:
+            stripped_line = line.lstrip()
+
+            if stripped_line:
+                # Get whether it's tabs or spaces
+                spaces_count = line.count(" ", 0, len(line) - len(stripped_line))
+                tabs_count = line.count("\t", 0, len(line) - len(stripped_line))
+
+                if not indent_char:
+                    if spaces_count:
+                        indent_char = " "
+                    if tabs_count:
+                        indent_char = "\t"
+
+                # Detect mixed indentation.
+                if spaces_count > 0 and tabs_count > 0:
+                    raise ValueError("Mixed indentation found.")
+                if indent_char == " " and tabs_count > 0:
+                    raise ValueError("Mixed indentation found.")
+                if indent_char == "\t" and spaces_count > 0:
+                    raise ValueError("Mixed indentation found.")
+
+                # Get the minimum chain of indent_char
+                if indent_char:
+                    char_count = line.count(
+                        indent_char, 0, len(line) - len(stripped_line)
+                    )
+                    if minimum_chain is not None:
+                        if char_count > 0:
+                            minimum_chain = min(char_count, minimum_chain)
+                    else:
+                        if char_count > 0:
+                            minimum_chain = char_count
+
+        # Handle edge case
+        if indent_char is None:
+            indent_char = " "
+        if minimum_chain is None:
+            minimum_chain = 4
+
+        # Get the first indent count
+        first_line = text_split[0]
+        first_indent_count = 0
+        for char in first_line:
+            if char == indent_char:
+                first_indent_count += 1
+            else:
+                break
+
+        # Return the default indent level if only one indentation level was found.
+        return indent_char, minimum_chain, first_indent_count // minimum_chain
+
+    @staticmethod
+    def _get_comment_text(node: TextNode) -> str:
+        """Gets just the natural language text for a skeletonize comment."""
+        return f"Code replaced for brevity. See node_id {node.node_id}"
+
+    @classmethod
+    def _create_comment_line(cls, node: TextNode, indention_lvl: int = -1) -> str:
+        """
+        Creates a comment line for a node.
+
+        Sometimes we don't use this in a loop because it requires recalculating
+        a lot of the same information. But it is handy.
+        """
+        # Create the text to replace the child_node.text with
+        language = node.metadata["language"]
+        if language not in _COMMENT_OPTIONS:
+            # TODO: Create a contribution message
+            raise KeyError("Language not yet supported. Please contribute!")
+        comment_options = _COMMENT_OPTIONS[language]
+        (
+            indentation_char,
+            indentation_count_per_lvl,
+            first_indentation_lvl,
+        ) = cls._get_indentation(node.text)
+        if indention_lvl != -1:
+            first_indentation_lvl = indention_lvl
+        else:
+            first_indentation_lvl += 1
+        return (
+            indentation_char * indentation_count_per_lvl * first_indentation_lvl
+            + comment_options.comment_template.format(cls._get_comment_text(node))
+            + "\n"
+        )
+
+    @classmethod
+    def _get_replacement_text(cls, child_node: TextNode) -> str:
+        """
+        Manufactures a the replacement text to use to skeletonize a given child node.
+        """
+        signature = child_node.metadata["inclusive_scopes"][-1]["signature"]
+        language = child_node.metadata["language"]
+        if language not in _COMMENT_OPTIONS:
+            # TODO: Create a contribution message
+            raise KeyError("Language not yet supported. Please contribute!")
+        comment_options = _COMMENT_OPTIONS[language]
+
+        # Create the text to replace the child_node.text with
+        (
+            indentation_char,
+            indentation_count_per_lvl,
+            first_indentation_lvl,
+        ) = cls._get_indentation(child_node.text)
+
+        # Start with a properly indented signature
+        replacement_txt = (
+            indentation_char * indentation_count_per_lvl * first_indentation_lvl
+            + signature
+        )
+
+        # Add brackets if necessary. Expandable in the
+        # future to other methods of scoping.
+        if comment_options.scope_method == _ScopeMethod.BRACKETS:
+            replacement_txt += " {\n"
+            replacement_txt += (
+                indentation_char
+                * indentation_count_per_lvl
+                * (first_indentation_lvl + 1)
+                + comment_options.comment_template.format(
+                    cls._get_comment_text(child_node)
+                )
+                + "\n"
+            )
+            replacement_txt += (
+                indentation_char * indentation_count_per_lvl * first_indentation_lvl
+                + "}"
+            )
+
+        elif comment_options.scope_method == _ScopeMethod.INDENTATION:
+            replacement_txt += "\n"
+            replacement_txt += indentation_char * indentation_count_per_lvl * (
+                first_indentation_lvl + 1
+            ) + comment_options.comment_template.format(
+                cls._get_comment_text(child_node)
+            )
+
+        elif comment_options.scope_method == _ScopeMethod.HTML_END_TAGS:
+            tag_name = child_node.metadata["inclusive_scopes"][-1]["name"]
+            end_tag = f"</{tag_name}>"
+            replacement_txt += "\n"
+            replacement_txt += (
+                indentation_char
+                * indentation_count_per_lvl
+                * (first_indentation_lvl + 1)
+                + comment_options.comment_template.format(
+                    cls._get_comment_text(child_node)
+                )
+                + "\n"
+            )
+            replacement_txt += (
+                indentation_char * indentation_count_per_lvl * first_indentation_lvl
+                + end_tag
+            )
+
+        else:
+            raise KeyError(f"Unrecognized enum value {comment_options.scope_method}")
+
+        return replacement_txt
+
+    @classmethod
+    def _skeletonize(cls, parent_node: TextNode, child_node: TextNode) -> None:
+        """WARNING: In Place Operation."""
+        # Simple protection clauses
+        if child_node.text not in parent_node.text:
+            raise ValueError("The child text is not contained inside the parent text.")
+        if child_node.node_id not in (c.node_id for c in parent_node.child_nodes or []):
+            raise ValueError("The child node is not a child of the parent node.")
+
+        # Now do the replacement
+        replacement_text = cls._get_replacement_text(child_node=child_node)
+        parent_node.text = parent_node.text.replace(child_node.text, replacement_text)
+
+    @classmethod
+    def _skeletonize_list(cls, nodes: List[TextNode]) -> None:
+        # Create a convenient map for mapping node id's to nodes
+        node_id_map = {n.node_id: n for n in nodes}
+
+        def recur(node: TextNode) -> None:
+            # If any children exist, skeletonize ourselves, starting at the root DFS
+            for child in node.child_nodes or []:
+                child_node = node_id_map[child.node_id]
+                cls._skeletonize(parent_node=node, child_node=child_node)
+                recur(child_node)
+
+        # Iterate over root nodes and recur
+        for n in nodes:
+            if n.parent_node is None:
+                recur(n)
diff --git a/llama-index-packs/llama-index-packs-code-hierarchy/llama_index/packs/code_hierarchy/query_engine.py b/llama-index-packs/llama-index-packs-code-hierarchy/llama_index/packs/code_hierarchy/query_engine.py
new file mode 100644
index 0000000000..d6024abe20
--- /dev/null
+++ b/llama-index-packs/llama-index-packs-code-hierarchy/llama_index/packs/code_hierarchy/query_engine.py
@@ -0,0 +1,155 @@
+from pathlib import Path
+import re
+from typing import Any, Dict, Sequence, Optional, Set, Tuple, TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from llama_index.core.langchain_helpers.agents import LlamaIndexTool
+
+from llama_index.core.query_engine import CustomQueryEngine
+from llama_index.core.schema import BaseNode
+from llama_index.packs.code_hierarchy.code_hierarchy import CodeHierarchyNodeParser
+
+
+DEFAULT_TOOL_INSTRUCTIONS = (
+    "Search the tool by any element in this list to get more information about that element.\n"
+    "If you see 'Code replaced for brevity' then a uuid, you may also search the tool with that uuid to see the full code.\n"
+    "You may need to use the tool multiple times to fully answer the user message.\n"
+    "The list is:\n"
+    "{repo_map}\n"
+)
+
+
+class CodeHierarchyKeywordQueryEngine(CustomQueryEngine):
+    """A keyword table made specifically to work with the code hierarchy node parser."""
+
+    nodes: Sequence[BaseNode]
+    node_dict: Optional[Dict[str, Tuple[int, BaseNode]]] = None
+    repo_map_depth: int = -1
+    include_repo_map: bool = True
+    repo_map: Optional[Tuple[Dict[str, Any], str]] = None
+    tool_instructions: str = DEFAULT_TOOL_INSTRUCTIONS
+
+    def _setup_node_dict(self) -> None:
+        """Initialize the index."""
+        self.node_dict = {}
+        for node in self.nodes:
+            keys = self._extract_keywords_from_node(node)
+            for key in keys:
+                self.node_dict[key] = (node.metadata["start_byte"], node.text)
+        self.repo_map = CodeHierarchyNodeParser.get_code_hierarchy_from_nodes(
+            self.nodes, max_depth=self.repo_map_depth
+        )
+
+    def _extract_keywords_from_node(self, node: BaseNode) -> Set[str]:
+        """Determine the keywords associated with the node in the index."""
+        keywords = self._extract_uuid_from_node(node)
+        keywords |= self._extract_module_from_node(node)
+        keywords |= self._extract_name_from_node(node)
+        return keywords
+
+    def _extract_uuid_from_node(self, node: BaseNode) -> Set[str]:
+        """Extract the uuid from the node."""
+        return {node.id_}
+
+    def _extract_module_from_node(self, node: BaseNode) -> Set[str]:
+        """Extract the module name from the node."""
+        keywords = set()
+        if not node.metadata["inclusive_scopes"]:
+            path = Path(node.metadata["filepath"])
+            name = path.name
+            name = re.sub(r"\..*$", "", name)
+            if name in self.node_dict:
+                its_start_byte, _ = self.node_dict[name]
+                if node.metadata["start_byte"] < its_start_byte:
+                    keywords.add(name)
+            else:
+                keywords.add(name)
+        return keywords
+
+    def _extract_name_from_node(self, node: BaseNode) -> Set[str]:
+        """Extract the name and signature from the node."""
+        keywords = set()
+        if node.metadata["inclusive_scopes"]:
+            name = node.metadata["inclusive_scopes"][-1]["name"]
+            start_byte = node.metadata["start_byte"]
+            if name in self.node_dict:
+                its_start_byte, _ = self.node_dict[name]
+                if start_byte < its_start_byte:
+                    keywords.add(name)
+            else:
+                keywords.add(name)
+        return keywords
+
+    def custom_query(self, query: str) -> str:
+        """Query the index. Only use exact matches.
+        If there is no exact match, but there is one for a parent, returns the parent.
+        """
+        if self.node_dict is None or self.repo_map is None:
+            self._setup_node_dict()
+
+        def get_all_dict_recursive(inp: Dict[str, Any]) -> Set[str]:
+            """Get all keys and values from a dictionary of dictionaries recursively."""
+            kvs = set()
+            for key, value in inp.items():
+                kvs.add(key)
+                if isinstance(value, dict):
+                    kvs |= get_all_dict_recursive(value)
+                else:
+                    kvs.add(value)
+            return kvs
+
+        def get_parent_dict_recursive(inp: Dict[str, Any], query: str) -> str:
+            """Get the parent of a key in a dictionary of dictionaries recursively."""
+            for key, value in inp.items():
+                if isinstance(value, dict):
+                    if query in value:
+                        return key
+                    else:
+                        parent = get_parent_dict_recursive(value, query)
+                        if parent is not None:
+                            return parent
+            return None
+
+        if query in self.node_dict:
+            return self.node_dict[query][1]
+
+        kvs = get_all_dict_recursive(self.repo_map[0])
+        parent_query = query
+        while parent_query not in kvs:
+            parent_query = get_parent_dict_recursive(self.repo_map[0], parent_query)
+            if parent_query is None:
+                return "None"
+
+        # After finding the parent_query, ensure it's in self.node_dict before accessing
+        if parent_query in self.node_dict:
+            return self.node_dict[parent_query][1]
+        else:
+            return "None"
+
+    def get_tool_instructions(self) -> str:
+        """Get the tool instructions."""
+        if self.node_dict is None or self.repo_map is None:
+            self._setup_node_dict()
+        return self.tool_instructions.format(
+            repo_map=self.repo_map[1] if self.include_repo_map else ""
+        )
+
+    def as_langchain_tool(
+        self,
+        **tool_kwargs: Any,
+    ) -> "LlamaIndexTool":
+        """
+        Return the index as a langchain tool.
+        Set a repo map depth of -1 to include all nodes.
+        otherwise set the depth to the desired max depth.
+        """
+        from llama_index.core.langchain_helpers.agents import LlamaIndexTool
+
+        if self.node_dict is None or self.repo_map is None:
+            self._setup_node_dict()
+        return LlamaIndexTool(
+            name="Code Search",
+            description=self.get_tool_instructions(),
+            query_engine=self,
+            **tool_kwargs,
+        )
diff --git a/llama-index-packs/llama-index-packs-code-hierarchy/pyproject.toml b/llama-index-packs/llama-index-packs-code-hierarchy/pyproject.toml
new file mode 100644
index 0000000000..b5928b7070
--- /dev/null
+++ b/llama-index-packs/llama-index-packs-code-hierarchy/pyproject.toml
@@ -0,0 +1,53 @@
+[build-system]
+build-backend = "poetry.core.masonry.api"
+requires = ["poetry-core"]
+
+[tool.codespell]
+check-filenames = true
+check-hidden = true
+skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb"
+
+[tool.llamahub]
+classes = ["CodeHierarchyPack"]
+contains_example = true
+import_path = "llama_index.packs.code_hierarchy"
+
+[tool.mypy]
+disallow_untyped_defs = true
+exclude = ["_static", "build", "examples", "notebooks", "venv"]
+ignore_missing_imports = true
+python_version = "3.8"
+
+[tool.poetry]
+authors = ["Ryan Peach <rgpeach10@gmail.com>"]
+description = "A node parser which can create a hierarchy of all code scopes in a directory."
+keywords = ["c", "code", "cpp", "hierarchy", "html", "javascript", "python", "repo", "typescript"]
+license = "MIT"
+maintainers = ["ryanpeach"]
+name = "llama-index-packs-code-hierarchy"
+readme = "README.md"
+version = "0.1.0"
+
+[tool.poetry.dependencies]
+python = ">=3.8.1,<3.12"
+llama-index-core = "^0.10.1"
+tree-sitter-languages = "^1.8.0"
+tree-sitter = "^0.20.2"
+llama-index-agent-openai = "^0.1.5"
+llama-index-readers-file = "^0.1.8"
+
+[tool.poetry.group.dev.dependencies]
+ipython = "8.10.0"
+jupyter = "^1.0.0"
+pytest = "7.2.1"
+
+[tool.poetry.group.dev.dependencies.black]
+extras = ["jupyter"]
+version = "<=23.9.1,>=23.7.0"
+
+[tool.poetry.group.dev.dependencies.codespell]
+extras = ["toml"]
+version = ">=v2.2.6"
+
+[[tool.poetry.packages]]
+include = "llama_index/"
diff --git a/llama-index-packs/llama-index-packs-code-hierarchy/tests/BUILD b/llama-index-packs/llama-index-packs-code-hierarchy/tests/BUILD
new file mode 100644
index 0000000000..de1e1004db
--- /dev/null
+++ b/llama-index-packs/llama-index-packs-code-hierarchy/tests/BUILD
@@ -0,0 +1,3 @@
+python_tests(
+  dependencies=['llama-index-integrations/readers/llama-index-readers-file/llama_index/readers/file']
+)
diff --git a/llama-index-packs/llama-index-packs-code-hierarchy/tests/__init__.py b/llama-index-packs/llama-index-packs-code-hierarchy/tests/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/llama-index-packs/llama-index-packs-code-hierarchy/tests/test_code_hierarchy_no_skeleton.py b/llama-index-packs/llama-index-packs-code-hierarchy/tests/test_code_hierarchy_no_skeleton.py
new file mode 100644
index 0000000000..c9d0c8ae32
--- /dev/null
+++ b/llama-index-packs/llama-index-packs-code-hierarchy/tests/test_code_hierarchy_no_skeleton.py
@@ -0,0 +1,705 @@
+"""Test CodeHierarchyNodeParser with skeleton option set to False."""
+import os
+from typing import List, cast
+
+from llama_index.packs.code_hierarchy import CodeHierarchyNodeParser
+from llama_index.core.schema import NodeRelationship, RelatedNodeInfo, TextNode
+
+
+def test_python_code_splitter() -> None:
+    """Test case for code splitting using python."""
+    if "CI" in os.environ:
+        return
+
+    code_splitter = CodeHierarchyNodeParser(
+        language="python", skeleton=False, chunk_min_characters=0
+    )
+
+    text = """\
+class Foo:
+    def bar() -> None:
+        print("bar")
+
+    async def baz():
+        print("baz")"""
+
+    text_node = TextNode(
+        text=text,
+        metadata={
+            "module": "example.foo",
+        },
+    )
+
+    chunks: List[TextNode] = code_splitter.get_nodes_from_documents([text_node])
+
+    # This is the module scope
+    assert chunks[0].text == text
+    assert chunks[0].metadata["module"] == "example.foo"
+    assert chunks[0].metadata["inclusive_scopes"] == []
+    assert NodeRelationship.PARENT not in chunks[0].relationships
+    assert [c.node_id for c in chunks[0].relationships[NodeRelationship.CHILD]] == [
+        chunks[1].id_
+    ]
+    assert isinstance(chunks[0].relationships[NodeRelationship.SOURCE], RelatedNodeInfo)
+    assert (
+        cast(RelatedNodeInfo, chunks[0].relationships[NodeRelationship.SOURCE]).node_id
+        == text_node.id_
+    )
+    assert NodeRelationship.PREVIOUS not in chunks[0].relationships
+    assert NodeRelationship.NEXT not in chunks[0].relationships
+
+    # This is the class scope
+    assert chunks[1].text == text
+    assert chunks[1].metadata["module"] == "example.foo"
+    assert chunks[1].metadata["inclusive_scopes"] == [
+        {"name": "Foo", "type": "class_definition", "signature": "class Foo:"}
+    ]
+    assert isinstance(chunks[1].relationships[NodeRelationship.PARENT], RelatedNodeInfo)
+    assert (
+        cast(RelatedNodeInfo, chunks[1].relationships[NodeRelationship.PARENT]).node_id
+        == chunks[0].id_
+    )
+    assert [c.node_id for c in chunks[1].relationships[NodeRelationship.CHILD]] == [
+        chunks[2].id_,
+        chunks[3].id_,
+    ]
+    assert isinstance(chunks[1].relationships[NodeRelationship.SOURCE], RelatedNodeInfo)
+    assert (
+        cast(RelatedNodeInfo, chunks[1].relationships[NodeRelationship.SOURCE]).node_id
+        == text_node.id_
+    )
+    assert NodeRelationship.PREVIOUS not in chunks[1].relationships
+    assert NodeRelationship.NEXT not in chunks[1].relationships
+
+    # This is the first method scope
+    assert (
+        chunks[2].text
+        == """\
+    def bar() -> None:
+        print("bar")"""
+    )
+    assert chunks[2].metadata["module"] == "example.foo"
+    assert chunks[2].metadata["inclusive_scopes"] == [
+        {"name": "Foo", "type": "class_definition", "signature": "class Foo:"},
+        {
+            "name": "bar",
+            "type": "function_definition",
+            "signature": "def bar() -> None:",
+        },
+    ]
+    assert isinstance(chunks[2].relationships[NodeRelationship.PARENT], RelatedNodeInfo)
+    assert (
+        cast(RelatedNodeInfo, chunks[2].relationships[NodeRelationship.PARENT]).node_id
+        == chunks[1].id_
+    )
+    assert chunks[2].relationships[NodeRelationship.CHILD] == []
+    assert isinstance(chunks[2].relationships[NodeRelationship.SOURCE], RelatedNodeInfo)
+    assert (
+        cast(RelatedNodeInfo, chunks[2].relationships[NodeRelationship.SOURCE]).node_id
+        == text_node.id_
+    )
+    assert NodeRelationship.PREVIOUS not in chunks[2].relationships
+    assert NodeRelationship.NEXT not in chunks[2].relationships
+
+    # This is the second method scope
+    assert (
+        chunks[3].text
+        == """\
+    async def baz():
+        print("baz")"""
+    )
+    assert chunks[3].metadata["module"] == "example.foo"
+    assert chunks[3].metadata["inclusive_scopes"] == [
+        {"name": "Foo", "type": "class_definition", "signature": "class Foo:"},
+        {"name": "baz", "type": "function_definition", "signature": "async def baz():"},
+    ]
+    assert isinstance(chunks[3].relationships[NodeRelationship.PARENT], RelatedNodeInfo)
+    assert (
+        cast(RelatedNodeInfo, chunks[3].relationships[NodeRelationship.PARENT]).node_id
+        == chunks[1].id_
+    )
+    assert chunks[3].relationships[NodeRelationship.CHILD] == []
+    assert isinstance(chunks[3].relationships[NodeRelationship.SOURCE], RelatedNodeInfo)
+    assert (
+        cast(RelatedNodeInfo, chunks[3].relationships[NodeRelationship.SOURCE]).node_id
+        == text_node.id_
+    )
+    assert NodeRelationship.PREVIOUS not in chunks[3].relationships
+    assert NodeRelationship.NEXT not in chunks[3].relationships
+
+
+def test_python_code_splitter_with_decorators() -> None:
+    """Test case for code splitting using python."""
+    if "CI" in os.environ:
+        return
+
+    code_splitter = CodeHierarchyNodeParser(
+        language="python", skeleton=False, chunk_min_characters=0
+    )
+
+    text = """\
+@foo
+class Foo:
+    @bar
+    @barfoo
+    def bar() -> None:
+        print("bar")"""
+
+    text_node = TextNode(
+        text=text,
+        metadata={
+            "module": "example.foo",
+        },
+    )
+
+    chunks: List[TextNode] = code_splitter.get_nodes_from_documents([text_node])
+
+    # This is the module scope
+    assert chunks[0].text == text
+    assert chunks[0].metadata["module"] == "example.foo"
+    assert chunks[0].metadata["inclusive_scopes"] == []
+    assert NodeRelationship.PARENT not in chunks[0].relationships
+    assert [c.node_id for c in chunks[0].relationships[NodeRelationship.CHILD]] == [
+        chunks[1].id_
+    ]
+    assert isinstance(chunks[0].relationships[NodeRelationship.SOURCE], RelatedNodeInfo)
+    assert (
+        cast(RelatedNodeInfo, chunks[0].relationships[NodeRelationship.SOURCE]).node_id
+        == text_node.id_
+    )
+    assert NodeRelationship.PREVIOUS not in chunks[0].relationships
+    assert NodeRelationship.NEXT not in chunks[0].relationships
+
+    # This is the class scope
+    assert (
+        chunks[1].text
+        == """\
+class Foo:
+    @bar
+    @barfoo
+    def bar() -> None:
+        print("bar")"""
+    )
+    assert chunks[1].metadata["module"] == "example.foo"
+    assert chunks[1].metadata["inclusive_scopes"] == [
+        {"name": "Foo", "type": "class_definition", "signature": "class Foo:"}
+    ]
+    assert isinstance(chunks[1].relationships[NodeRelationship.PARENT], RelatedNodeInfo)
+    assert (
+        cast(RelatedNodeInfo, chunks[1].relationships[NodeRelationship.PARENT]).node_id
+        == chunks[0].id_
+    )
+    assert [c.node_id for c in chunks[1].relationships[NodeRelationship.CHILD]] == [
+        chunks[2].id_,
+    ]
+    assert isinstance(chunks[1].relationships[NodeRelationship.SOURCE], RelatedNodeInfo)
+    assert (
+        cast(RelatedNodeInfo, chunks[1].relationships[NodeRelationship.SOURCE]).node_id
+        == text_node.id_
+    )
+    assert NodeRelationship.PREVIOUS not in chunks[1].relationships
+    assert NodeRelationship.NEXT not in chunks[1].relationships
+
+    # This is the first method scope
+    assert (
+        chunks[2].text
+        == """\
+    def bar() -> None:
+        print("bar")"""
+    )
+    assert chunks[2].metadata["module"] == "example.foo"
+    assert chunks[2].metadata["inclusive_scopes"] == [
+        {"name": "Foo", "type": "class_definition", "signature": "class Foo:"},
+        {
+            "name": "bar",
+            "type": "function_definition",
+            "signature": "def bar() -> None:",
+        },
+    ]
+    assert isinstance(chunks[2].relationships[NodeRelationship.PARENT], RelatedNodeInfo)
+    assert (
+        cast(RelatedNodeInfo, chunks[2].relationships[NodeRelationship.PARENT]).node_id
+        == chunks[1].id_
+    )
+    assert chunks[2].relationships[NodeRelationship.CHILD] == []
+    assert isinstance(chunks[2].relationships[NodeRelationship.SOURCE], RelatedNodeInfo)
+    assert (
+        cast(RelatedNodeInfo, chunks[2].relationships[NodeRelationship.SOURCE]).node_id
+        == text_node.id_
+    )
+    assert NodeRelationship.PREVIOUS not in chunks[2].relationships
+    assert NodeRelationship.NEXT not in chunks[2].relationships
+
+
+def test_html_code_splitter() -> None:
+    """Test case for code splitting using HTML."""
+    if "CI" in os.environ:
+        return
+
+    code_splitter = CodeHierarchyNodeParser(
+        language="html",
+        chunk_min_characters=len("    <title>My Example Page</title>") + 1,
+        skeleton=False,
+    )
+
+    text = """\
+<!DOCTYPE html>
+<html>
+<head>
+    <title>My Example Page</title>
+</head>
+<body>
+    <h1>Welcome to My Example Page</h1>
+    <p>This is a basic HTML page example.</p>
+    <ul>
+        <li>Item 1</li>
+        <li>Item 2</li>
+        <li>Item 3</li>
+    </ul>
+    <img src="https://example.com/image.jpg" alt="Example Image">
+</body>
+</html>"""
+
+    text_node = TextNode(
+        text=text,
+    )
+    chunks = code_splitter.get_nodes_from_documents([text_node])
+
+    # This is the DOCTYPE scope
+    assert chunks[0].text == text
+    assert chunks[0].metadata["inclusive_scopes"] == []
+    assert NodeRelationship.PARENT not in chunks[0].relationships
+    assert [c.node_id for c in chunks[0].relationships[NodeRelationship.CHILD]] == [
+        chunks[1].id_
+    ]
+    assert (
+        cast(RelatedNodeInfo, chunks[0].relationships[NodeRelationship.SOURCE]).node_id
+        == text_node.id_
+    )
+    assert NodeRelationship.PREVIOUS not in chunks[0].relationships
+    assert NodeRelationship.NEXT not in chunks[0].relationships
+
+    # This is the html scope
+    assert (
+        chunks[1].text
+        == """\
+<html>
+<head>
+    <title>My Example Page</title>
+</head>
+<body>
+    <h1>Welcome to My Example Page</h1>
+    <p>This is a basic HTML page example.</p>
+    <ul>
+        <li>Item 1</li>
+        <li>Item 2</li>
+        <li>Item 3</li>
+    </ul>
+    <img src="https://example.com/image.jpg" alt="Example Image">
+</body>
+</html>"""
+    )
+    assert chunks[1].metadata["inclusive_scopes"] == [
+        {"name": "html", "type": "element", "signature": "<html>"}
+    ]
+    assert (
+        cast(RelatedNodeInfo, chunks[1].relationships[NodeRelationship.PARENT]).node_id
+        == chunks[0].id_
+    )
+    assert [c.node_id for c in chunks[1].relationships[NodeRelationship.CHILD]] == [
+        chunks[2].id_,
+        chunks[3].id_,
+    ]
+    assert (
+        cast(RelatedNodeInfo, chunks[1].relationships[NodeRelationship.SOURCE]).node_id
+        == text_node.id_
+    )
+    assert NodeRelationship.PREVIOUS not in chunks[1].relationships
+    assert NodeRelationship.NEXT not in chunks[1].relationships
+
+    # Head chunk
+    assert (
+        chunks[2].text
+        == """\
+<head>
+    <title>My Example Page</title>
+</head>"""
+    )
+    assert chunks[2].metadata["inclusive_scopes"] == [
+        {"name": "html", "type": "element", "signature": "<html>"},
+        {"name": "head", "type": "element", "signature": "<head>"},
+    ]
+    assert (
+        cast(RelatedNodeInfo, chunks[2].relationships[NodeRelationship.PARENT]).node_id
+        == chunks[1].id_
+    )  # Parent should be <html>
+    assert [
+        c.node_id for c in chunks[2].relationships[NodeRelationship.CHILD]
+    ] == []  # Child should be <title>
+    assert (
+        cast(RelatedNodeInfo, chunks[2].relationships[NodeRelationship.SOURCE]).node_id
+        == text_node.id_
+    )
+    assert NodeRelationship.PREVIOUS not in chunks[2].relationships
+    assert NodeRelationship.NEXT not in chunks[2].relationships
+
+    # Test the fourth chunk (<body> tag and its content)
+    assert (
+        chunks[3].text
+        == """\
+<body>
+    <h1>Welcome to My Example Page</h1>
+    <p>This is a basic HTML page example.</p>
+    <ul>
+        <li>Item 1</li>
+        <li>Item 2</li>
+        <li>Item 3</li>
+    </ul>
+    <img src="https://example.com/image.jpg" alt="Example Image">
+</body>"""
+    )
+    assert chunks[3].metadata["inclusive_scopes"] == [
+        {"name": "html", "type": "element", "signature": "<html>"},
+        {"name": "body", "type": "element", "signature": "<body>"},
+    ]
+    assert (
+        cast(RelatedNodeInfo, chunks[3].relationships[NodeRelationship.PARENT]).node_id
+        == chunks[1].id_
+    )  # Parent should be <html>
+    assert chunks[5].id_ in [
+        c.node_id for c in chunks[3].relationships[NodeRelationship.CHILD]
+    ]
+    assert (
+        cast(RelatedNodeInfo, chunks[3].relationships[NodeRelationship.SOURCE]).node_id
+        == text_node.id_
+    )
+    assert NodeRelationship.PREVIOUS not in chunks[3].relationships
+    assert NodeRelationship.NEXT not in chunks[3].relationships
+
+    # Test the seventh chunk (<ul> tag and its content)
+    assert (
+        chunks[6].text
+        == """\
+    <ul>
+        <li>Item 1</li>
+        <li>Item 2</li>
+        <li>Item 3</li>
+    </ul>"""
+    )
+    assert chunks[6].metadata["inclusive_scopes"] == [
+        {"name": "html", "type": "element", "signature": "<html>"},
+        {"name": "body", "type": "element", "signature": "<body>"},
+        {"name": "ul", "type": "element", "signature": "<ul>"},
+    ]
+    assert (
+        cast(RelatedNodeInfo, chunks[6].relationships[NodeRelationship.PARENT]).node_id
+        == chunks[3].id_
+    )  # Parent should be <body>
+    assert [c.node_id for c in chunks[6].relationships[NodeRelationship.CHILD]] == []
+    assert (
+        cast(RelatedNodeInfo, chunks[6].relationships[NodeRelationship.SOURCE]).node_id
+        == text_node.id_
+    )
+    assert NodeRelationship.PREVIOUS not in chunks[6].relationships
+    assert NodeRelationship.NEXT not in chunks[6].relationships
+
+
+def test_typescript_code_splitter() -> None:
+    """Test case for code splitting using TypeScript."""
+    if "CI" in os.environ:
+        return
+
+    code_splitter = CodeHierarchyNodeParser(
+        language="typescript", skeleton=False, chunk_min_characters=0
+    )
+
+    text = """\
+function foo() {
+    console.log("bar");
+}
+
+class Example {
+    exampleMethod() {
+        console.log("line1");
+    }
+}
+
+function baz() {
+    console.log("bbq");
+}"""
+
+    text_node = TextNode(
+        text=text,
+    )
+    chunks: List[RelatedNodeInfo] = code_splitter.get_nodes_from_documents([text_node])
+
+    # Test the second chunk (function foo)
+    assert (
+        chunks[1].text
+        == """\
+function foo() {
+    console.log("bar");
+}"""
+    )
+    assert chunks[1].metadata["inclusive_scopes"] == [
+        {"name": "foo", "type": "function_declaration", "signature": "function foo()"}
+    ]
+    assert chunks[1].relationships[NodeRelationship.PARENT].node_id == chunks[0].id_
+    assert [c.node_id for c in chunks[1].relationships[NodeRelationship.CHILD]] == []
+
+    # Test the third chunk (class Example)
+    assert (
+        chunks[2].text
+        == """\
+class Example {
+    exampleMethod() {
+        console.log("line1");
+    }
+}"""
+    )
+    assert chunks[2].metadata["inclusive_scopes"] == [
+        {"name": "Example", "type": "class_declaration", "signature": "class Example"}
+    ]
+    assert chunks[2].relationships[NodeRelationship.PARENT].node_id == chunks[0].id_
+    assert [c.node_id for c in chunks[2].relationships[NodeRelationship.CHILD]] == [
+        chunks[3].id_
+    ]
+
+    # Test the fourth chunk (exampleMethod in class Example)
+    assert (
+        chunks[3].text
+        == """\
+    exampleMethod() {
+        console.log("line1");
+    }"""
+    )
+    assert chunks[3].metadata["inclusive_scopes"] == [
+        {"name": "Example", "type": "class_declaration", "signature": "class Example"},
+        {
+            "name": "exampleMethod",
+            "type": "method_definition",
+            "signature": "exampleMethod()",
+        },
+    ]
+    assert (
+        cast(RelatedNodeInfo, chunks[3].relationships[NodeRelationship.PARENT]).node_id
+        == chunks[2].id_
+    )
+    assert chunks[3].relationships[NodeRelationship.CHILD] == []
+
+    # Test the fifth chunk (function baz)
+    assert (
+        chunks[4].text
+        == """\
+function baz() {
+    console.log("bbq");
+}"""
+    )
+    assert chunks[4].metadata["inclusive_scopes"] == [
+        {"name": "baz", "type": "function_declaration", "signature": "function baz()"}
+    ]
+    assert (
+        cast(RelatedNodeInfo, chunks[4].relationships[NodeRelationship.PARENT]).node_id
+        == chunks[0].id_
+    )
+    assert chunks[4].relationships[NodeRelationship.CHILD] == []
+
+
+def test_tsx_code_splitter() -> None:
+    """Test case for code splitting using TypeScript JSX (TSX)."""
+    if "CI" in os.environ:
+        return
+
+    code_splitter = CodeHierarchyNodeParser(
+        language="typescript", skeleton=False, chunk_min_characters=0
+    )
+
+    text = """\
+import React from 'react';
+
+interface Person {
+  name: string;
+  age: number;
+}
+
+const ExampleComponent: React.FC = () => {
+  const person: Person = {
+    name: 'John Doe',
+    age: 30,
+  };
+
+  return (
+    <div>
+      <h1>Hello, {person.name}!</h1>
+      <p>You are {person.age} years old.</p>
+    </div>
+  );
+};
+
+export default ExampleComponent;"""
+
+    text_node = TextNode(
+        text=text,
+    )
+    chunks: List[RelatedNodeInfo] = code_splitter.get_nodes_from_documents([text_node])
+
+    # Test the first chunk (import statement)
+    assert chunks[0].text == text
+    assert chunks[0].metadata["inclusive_scopes"] == []
+
+    # Test the second chunk (interface definition)
+    assert (
+        chunks[1].text
+        == """\
+interface Person {
+  name: string;
+  age: number;
+}"""
+    )
+    assert chunks[1].metadata["inclusive_scopes"] == [
+        {
+            "name": "Person",
+            "type": "interface_declaration",
+            "signature": "interface Person",
+        }
+    ]
+    assert (
+        cast(RelatedNodeInfo, chunks[1].relationships[NodeRelationship.PARENT]).node_id
+        == chunks[0].id_
+    )
+    assert chunks[1].relationships[NodeRelationship.CHILD] == []
+
+    # Test the third chunk (ExampleComponent function definition)
+    assert (
+        chunks[2].text
+        == """\
+const ExampleComponent: React.FC = () => {
+  const person: Person = {
+    name: 'John Doe',
+    age: 30,
+  };
+
+  return (
+    <div>
+      <h1>Hello, {person.name}!</h1>
+      <p>You are {person.age} years old.</p>
+    </div>
+  );
+};"""
+    )
+    assert chunks[2].metadata["inclusive_scopes"] == [
+        {
+            "name": "ExampleComponent",
+            "type": "lexical_declaration",
+            "signature": "const ExampleComponent: React.FC = () =>",
+        }
+    ]
+    assert (
+        cast(RelatedNodeInfo, chunks[2].relationships[NodeRelationship.PARENT]).node_id
+        == chunks[0].id_
+    )
+
+    # TODO: Unfortunately tree_splitter errors on the html elements
+
+
+def test_cpp_code_splitter() -> None:
+    """Test case for code splitting using C++."""
+    if "CI" in os.environ:
+        return
+
+    # Removing chunk_lines, chunk_lines_overlap, and max_chars to focus on scopes
+    code_splitter = CodeHierarchyNodeParser(
+        language="cpp",
+        skeleton=False,
+        chunk_min_characters=0,
+    )
+
+    text = """\
+#include <iostream>
+
+class MyClass {       // The class
+  public:             // Access specifier
+    int myNum;        // Attribute (int variable)
+    string myString;  // Attribute (string variable)
+    void myMethod() { // Method/function defined inside the class
+        cout << "Hello World!";
+    }
+};
+
+int main() {
+    std::cout << "Hello, World!" << std::endl;
+    return 0;
+}"""
+
+    text_node = TextNode(
+        text=text,
+    )
+    chunks = code_splitter.get_nodes_from_documents([text_node])
+
+    # Test the first chunk (#include statement)
+    assert chunks[0].text == text
+    assert chunks[0].metadata["inclusive_scopes"] == []
+
+    # Test the second chunk (class MyClass)
+    assert (
+        chunks[1].text
+        == """\
+class MyClass {       // The class
+  public:             // Access specifier
+    int myNum;        // Attribute (int variable)
+    string myString;  // Attribute (string variable)
+    void myMethod() { // Method/function defined inside the class
+        cout << "Hello World!";
+    }
+}"""
+    )
+    assert chunks[1].metadata["inclusive_scopes"] == [
+        {"name": "MyClass", "type": "class_specifier", "signature": "class MyClass"}
+    ]
+    assert (
+        cast(RelatedNodeInfo, chunks[1].relationships[NodeRelationship.PARENT]).node_id
+        == chunks[0].id_
+    )
+    assert [c.node_id for c in chunks[1].relationships[NodeRelationship.CHILD]] == [
+        chunks[2].id_
+    ]
+
+    # Test the third chunk (myMethod in class MyClass)
+    assert (
+        chunks[2].text
+        == """\
+    void myMethod() { // Method/function defined inside the class
+        cout << "Hello World!";
+    }"""
+    )
+    assert chunks[2].metadata["inclusive_scopes"] == [
+        {"name": "MyClass", "type": "class_specifier", "signature": "class MyClass"},
+        {
+            "name": "myMethod()",
+            "type": "function_definition",
+            "signature": "void myMethod()",
+        },
+    ]
+    assert (
+        cast(RelatedNodeInfo, chunks[2].relationships[NodeRelationship.PARENT]).node_id
+        == chunks[1].id_
+    )
+    assert chunks[2].relationships[NodeRelationship.CHILD] == []
+
+    # Test the fourth chunk (main function)
+    assert (
+        chunks[3].text
+        == """\
+int main() {
+    std::cout << "Hello, World!" << std::endl;
+    return 0;
+}"""
+    )
+    assert chunks[3].metadata["inclusive_scopes"] == [
+        {"name": "main()", "type": "function_definition", "signature": "int main()"}
+    ]
+    assert (
+        cast(RelatedNodeInfo, chunks[3].relationships[NodeRelationship.PARENT]).node_id
+        == chunks[0].id_
+    )
+    assert chunks[3].relationships[NodeRelationship.CHILD] == []
diff --git a/llama-index-packs/llama-index-packs-code-hierarchy/tests/test_code_hierarchy_with_skeleton.py b/llama-index-packs/llama-index-packs-code-hierarchy/tests/test_code_hierarchy_with_skeleton.py
new file mode 100644
index 0000000000..6e0b49b93f
--- /dev/null
+++ b/llama-index-packs/llama-index-packs-code-hierarchy/tests/test_code_hierarchy_with_skeleton.py
@@ -0,0 +1,526 @@
+"""Test CodeHierarchyNodeParser with skeleton option set to False."""
+import os
+from typing import List, cast
+
+from llama_index.packs.code_hierarchy import CodeHierarchyNodeParser
+from llama_index.core.schema import NodeRelationship, RelatedNodeInfo, TextNode
+
+
+def test_python_code_splitter() -> None:
+    """Test case for code splitting using python."""
+    if "CI" in os.environ:
+        return
+
+    code_splitter = CodeHierarchyNodeParser(
+        language="python", skeleton=True, chunk_min_characters=0
+    )
+
+    text = """\
+class Foo:
+    def bar() -> None:
+        print("bar")
+
+    async def baz():
+        print("baz")"""
+
+    text_node = TextNode(
+        text=text,
+        metadata={
+            "module": "example.foo",
+        },
+    )
+
+    chunks: List[TextNode] = code_splitter.get_nodes_from_documents([text_node])
+
+    # This is the module scope
+    assert (
+        chunks[0].text
+        == f"""\
+class Foo:
+    # {CodeHierarchyNodeParser._get_comment_text(chunks[1])}"""
+    )
+    assert chunks[0].metadata["module"] == "example.foo"
+    assert chunks[0].metadata["inclusive_scopes"] == []
+    assert NodeRelationship.PARENT not in chunks[0].relationships
+    assert [c.node_id for c in chunks[0].relationships[NodeRelationship.CHILD]] == [
+        chunks[1].id_
+    ]
+    assert isinstance(chunks[0].relationships[NodeRelationship.SOURCE], RelatedNodeInfo)
+    assert (
+        cast(RelatedNodeInfo, chunks[0].relationships[NodeRelationship.SOURCE]).node_id
+        == text_node.id_
+    )
+    assert NodeRelationship.PREVIOUS not in chunks[0].relationships
+    assert NodeRelationship.NEXT not in chunks[0].relationships
+
+    # This is the class scope
+    assert (
+        chunks[1].text
+        == f"""\
+class Foo:
+    def bar() -> None:
+        # {CodeHierarchyNodeParser._get_comment_text(chunks[2])}
+
+    async def baz():
+        # {CodeHierarchyNodeParser._get_comment_text(chunks[3])}"""
+    )
+    assert chunks[1].metadata["module"] == "example.foo"
+    assert chunks[1].metadata["inclusive_scopes"] == [
+        {"name": "Foo", "type": "class_definition", "signature": "class Foo:"}
+    ]
+    assert isinstance(chunks[1].relationships[NodeRelationship.PARENT], RelatedNodeInfo)
+    assert (
+        cast(RelatedNodeInfo, chunks[1].relationships[NodeRelationship.PARENT]).node_id
+        == chunks[0].id_
+    )
+    assert [c.node_id for c in chunks[1].relationships[NodeRelationship.CHILD]] == [
+        chunks[2].id_,
+        chunks[3].id_,
+    ]
+    assert isinstance(chunks[1].relationships[NodeRelationship.SOURCE], RelatedNodeInfo)
+    assert (
+        cast(RelatedNodeInfo, chunks[1].relationships[NodeRelationship.SOURCE]).node_id
+        == text_node.id_
+    )
+    assert NodeRelationship.PREVIOUS not in chunks[1].relationships
+    assert NodeRelationship.NEXT not in chunks[1].relationships
+
+    # This is the first method scope
+    assert (
+        chunks[2].text
+        == """\
+    def bar() -> None:
+        print("bar")"""
+    )
+    assert chunks[2].metadata["module"] == "example.foo"
+    assert chunks[2].metadata["inclusive_scopes"] == [
+        {"name": "Foo", "type": "class_definition", "signature": "class Foo:"},
+        {
+            "name": "bar",
+            "type": "function_definition",
+            "signature": "def bar() -> None:",
+        },
+    ]
+    assert isinstance(chunks[2].relationships[NodeRelationship.PARENT], RelatedNodeInfo)
+    assert (
+        cast(RelatedNodeInfo, chunks[2].relationships[NodeRelationship.PARENT]).node_id
+        == chunks[1].id_
+    )
+    assert chunks[2].relationships[NodeRelationship.CHILD] == []
+    assert isinstance(chunks[2].relationships[NodeRelationship.SOURCE], RelatedNodeInfo)
+    assert (
+        cast(RelatedNodeInfo, chunks[2].relationships[NodeRelationship.SOURCE]).node_id
+        == text_node.id_
+    )
+    assert NodeRelationship.PREVIOUS not in chunks[2].relationships
+    assert NodeRelationship.NEXT not in chunks[2].relationships
+
+    # This is the second method scope
+    assert (
+        chunks[3].text
+        == """\
+    async def baz():
+        print("baz")"""
+    )
+    assert chunks[3].metadata["module"] == "example.foo"
+    assert chunks[3].metadata["inclusive_scopes"] == [
+        {"name": "Foo", "type": "class_definition", "signature": "class Foo:"},
+        {"name": "baz", "type": "function_definition", "signature": "async def baz():"},
+    ]
+    assert isinstance(chunks[3].relationships[NodeRelationship.PARENT], RelatedNodeInfo)
+    assert (
+        cast(RelatedNodeInfo, chunks[3].relationships[NodeRelationship.PARENT]).node_id
+        == chunks[1].id_
+    )
+    assert chunks[3].relationships[NodeRelationship.CHILD] == []
+    assert isinstance(chunks[3].relationships[NodeRelationship.SOURCE], RelatedNodeInfo)
+    assert (
+        cast(RelatedNodeInfo, chunks[3].relationships[NodeRelationship.SOURCE]).node_id
+        == text_node.id_
+    )
+    assert NodeRelationship.PREVIOUS not in chunks[3].relationships
+    assert NodeRelationship.NEXT not in chunks[3].relationships
+
+
+def test_python_code_splitter_with_decorators() -> None:
+    """Test case for code splitting using python."""
+    if "CI" in os.environ:
+        return
+
+    code_splitter = CodeHierarchyNodeParser(
+        language="python", skeleton=True, chunk_min_characters=0
+    )
+
+    text = """\
+@foo
+class Foo:
+    @bar
+    @barfoo
+    def bar() -> None:
+        print("bar")"""
+
+    text_node = TextNode(
+        text=text,
+        metadata={
+            "module": "example.foo",
+        },
+    )
+
+    chunks: List[TextNode] = code_splitter.get_nodes_from_documents([text_node])
+
+    # This is the module scope
+    assert (
+        chunks[0].text
+        == f"""\
+@foo
+class Foo:
+    # {CodeHierarchyNodeParser._get_comment_text(chunks[1])}"""
+    )
+    assert chunks[0].metadata["module"] == "example.foo"
+    assert chunks[0].metadata["inclusive_scopes"] == []
+    assert NodeRelationship.PARENT not in chunks[0].relationships
+    assert [c.node_id for c in chunks[0].relationships[NodeRelationship.CHILD]] == [
+        chunks[1].id_
+    ]
+    assert isinstance(chunks[0].relationships[NodeRelationship.SOURCE], RelatedNodeInfo)
+    assert (
+        cast(RelatedNodeInfo, chunks[0].relationships[NodeRelationship.SOURCE]).node_id
+        == text_node.id_
+    )
+    assert NodeRelationship.PREVIOUS not in chunks[0].relationships
+    assert NodeRelationship.NEXT not in chunks[0].relationships
+
+    # This is the class scope
+    assert (
+        chunks[1].text
+        == f"""\
+class Foo:
+    @bar
+    @barfoo
+    def bar() -> None:
+        # {CodeHierarchyNodeParser._get_comment_text(chunks[2])}"""
+    )
+    assert chunks[1].metadata["module"] == "example.foo"
+    assert chunks[1].metadata["inclusive_scopes"] == [
+        {"name": "Foo", "type": "class_definition", "signature": "class Foo:"}
+    ]
+    assert isinstance(chunks[1].relationships[NodeRelationship.PARENT], RelatedNodeInfo)
+    assert (
+        cast(RelatedNodeInfo, chunks[1].relationships[NodeRelationship.PARENT]).node_id
+        == chunks[0].id_
+    )
+    assert [c.node_id for c in chunks[1].relationships[NodeRelationship.CHILD]] == [
+        chunks[2].id_,
+    ]
+    assert isinstance(chunks[1].relationships[NodeRelationship.SOURCE], RelatedNodeInfo)
+    assert (
+        cast(RelatedNodeInfo, chunks[1].relationships[NodeRelationship.SOURCE]).node_id
+        == text_node.id_
+    )
+    assert NodeRelationship.PREVIOUS not in chunks[1].relationships
+    assert NodeRelationship.NEXT not in chunks[1].relationships
+
+    # This is the first method scope
+    assert (
+        chunks[2].text
+        == """\
+    def bar() -> None:
+        print("bar")"""
+    )
+    assert chunks[2].metadata["module"] == "example.foo"
+    assert chunks[2].metadata["inclusive_scopes"] == [
+        {"name": "Foo", "type": "class_definition", "signature": "class Foo:"},
+        {
+            "name": "bar",
+            "type": "function_definition",
+            "signature": "def bar() -> None:",
+        },
+    ]
+    assert isinstance(chunks[2].relationships[NodeRelationship.PARENT], RelatedNodeInfo)
+    assert (
+        cast(RelatedNodeInfo, chunks[2].relationships[NodeRelationship.PARENT]).node_id
+        == chunks[1].id_
+    )
+    assert chunks[2].relationships[NodeRelationship.CHILD] == []
+    assert isinstance(chunks[2].relationships[NodeRelationship.SOURCE], RelatedNodeInfo)
+    assert (
+        cast(RelatedNodeInfo, chunks[2].relationships[NodeRelationship.SOURCE]).node_id
+        == text_node.id_
+    )
+    assert NodeRelationship.PREVIOUS not in chunks[2].relationships
+    assert NodeRelationship.NEXT not in chunks[2].relationships
+
+
+def test_html_code_splitter() -> None:
+    """Test case for code splitting using HTML."""
+    if "CI" in os.environ:
+        return
+
+    code_splitter = CodeHierarchyNodeParser(
+        language="html",
+        chunk_min_characters=len("    <title>My Example Page</title>") + 1,
+        skeleton=True,
+    )
+
+    text = """\
+<!DOCTYPE html>
+<html>
+<head>
+    <title>My Example Page</title>
+</head>
+<body>
+    <h1>Welcome to My Example Page</h1>
+    <p>This is a basic HTML page example.</p>
+    <ul>
+        <li>Item 1</li>
+        <li>Item 2</li>
+        <li>Item 3</li>
+    </ul>
+    <img src="https://example.com/image.jpg" alt="Example Image">
+</body>
+</html>"""
+
+    text_node = TextNode(
+        text=text,
+    )
+    chunks = code_splitter.get_nodes_from_documents([text_node])
+
+    # This is the DOCTYPE scope
+    assert (
+        chunks[0].text
+        == f"""\
+<!DOCTYPE html>
+<html>
+    <!-- {CodeHierarchyNodeParser._get_comment_text(chunks[1])} -->
+</html>"""
+    )
+    assert chunks[0].metadata["inclusive_scopes"] == []
+    assert NodeRelationship.PARENT not in chunks[0].relationships
+    assert [c.node_id for c in chunks[0].relationships[NodeRelationship.CHILD]] == [
+        chunks[1].id_
+    ]
+    assert (
+        cast(RelatedNodeInfo, chunks[0].relationships[NodeRelationship.SOURCE]).node_id
+        == text_node.id_
+    )
+    assert NodeRelationship.PREVIOUS not in chunks[0].relationships
+    assert NodeRelationship.NEXT not in chunks[0].relationships
+
+    # This is the html scope
+    assert (
+        chunks[1].text
+        == f"""\
+<html>
+<head>
+    <!-- {CodeHierarchyNodeParser._get_comment_text(chunks[2])} -->
+</head>
+<body>
+    <!-- {CodeHierarchyNodeParser._get_comment_text(chunks[3])} -->
+</body>
+</html>"""
+    )
+    assert chunks[1].metadata["inclusive_scopes"] == [
+        {"name": "html", "type": "element", "signature": "<html>"}
+    ]
+    assert (
+        cast(RelatedNodeInfo, chunks[1].relationships[NodeRelationship.PARENT]).node_id
+        == chunks[0].id_
+    )
+    assert [c.node_id for c in chunks[1].relationships[NodeRelationship.CHILD]] == [
+        chunks[2].id_,
+        chunks[3].id_,
+    ]
+    assert (
+        cast(RelatedNodeInfo, chunks[1].relationships[NodeRelationship.SOURCE]).node_id
+        == text_node.id_
+    )
+    assert NodeRelationship.PREVIOUS not in chunks[1].relationships
+    assert NodeRelationship.NEXT not in chunks[1].relationships
+
+    # Head chunk
+    assert (
+        chunks[2].text
+        == """\
+<head>
+    <title>My Example Page</title>
+</head>"""
+    )
+    assert chunks[2].metadata["inclusive_scopes"] == [
+        {"name": "html", "type": "element", "signature": "<html>"},
+        {"name": "head", "type": "element", "signature": "<head>"},
+    ]
+    assert (
+        cast(RelatedNodeInfo, chunks[2].relationships[NodeRelationship.PARENT]).node_id
+        == chunks[1].id_
+    )  # Parent should be <html>
+    assert [
+        c.node_id for c in chunks[2].relationships[NodeRelationship.CHILD]
+    ] == []  # Child should be <title>
+    assert (
+        cast(RelatedNodeInfo, chunks[2].relationships[NodeRelationship.SOURCE]).node_id
+        == text_node.id_
+    )
+    assert NodeRelationship.PREVIOUS not in chunks[2].relationships
+    assert NodeRelationship.NEXT not in chunks[2].relationships
+
+
+def test_typescript_code_splitter() -> None:
+    """Test case for code splitting using TypeScript."""
+    if "CI" in os.environ:
+        return
+
+    code_splitter = CodeHierarchyNodeParser(
+        language="typescript", skeleton=True, chunk_min_characters=0
+    )
+
+    text = """\
+function foo() {
+    console.log("bar");
+}
+
+class Example {
+    exampleMethod() {
+        console.log("line1");
+    }
+}
+
+function baz() {
+    console.log("bbq");
+}"""
+
+    text_node = TextNode(
+        text=text,
+    )
+    chunks: List[RelatedNodeInfo] = code_splitter.get_nodes_from_documents([text_node])
+
+    # Fstrings don't like forward slash
+    double_forward_slash: str = "//"
+    assert (
+        chunks[0].text
+        == f"""\
+function foo() {{
+    {double_forward_slash} {CodeHierarchyNodeParser._get_comment_text(chunks[1])}
+}}
+
+class Example {{
+    {double_forward_slash} {CodeHierarchyNodeParser._get_comment_text(chunks[2])}
+}}
+
+function baz() {{
+    {double_forward_slash} {CodeHierarchyNodeParser._get_comment_text(chunks[4])}
+}}"""
+    )
+
+    # Test the second chunk (function foo)
+    assert (
+        chunks[1].text
+        == """\
+function foo() {
+    console.log("bar");
+}"""
+    )
+    assert chunks[1].metadata["inclusive_scopes"] == [
+        {"name": "foo", "type": "function_declaration", "signature": "function foo()"}
+    ]
+    assert (
+        cast(RelatedNodeInfo, chunks[1].relationships[NodeRelationship.PARENT]).node_id
+        == chunks[0].id_
+    )
+    assert [c.node_id for c in chunks[1].relationships[NodeRelationship.CHILD]] == []
+
+    # Test the third chunk (class Example)
+    assert (
+        chunks[2].text
+        == f"""\
+class Example {{
+    exampleMethod() {{
+        {double_forward_slash} {CodeHierarchyNodeParser._get_comment_text(chunks[3])}
+    }}
+}}"""
+    )
+    assert chunks[2].metadata["inclusive_scopes"] == [
+        {"name": "Example", "type": "class_declaration", "signature": "class Example"}
+    ]
+    assert (
+        cast(RelatedNodeInfo, chunks[2].relationships[NodeRelationship.PARENT]).node_id
+        == chunks[0].id_
+    )
+    assert [c.node_id for c in chunks[2].relationships[NodeRelationship.CHILD]] == [
+        chunks[3].id_
+    ]
+
+    # Test the fourth chunk (exampleMethod in class Example)
+    assert (
+        chunks[3].text
+        == """\
+    exampleMethod() {
+        console.log("line1");
+    }"""
+    )
+    assert chunks[3].metadata["inclusive_scopes"] == [
+        {"name": "Example", "type": "class_declaration", "signature": "class Example"},
+        {
+            "name": "exampleMethod",
+            "type": "method_definition",
+            "signature": "exampleMethod()",
+        },
+    ]
+    assert (
+        cast(RelatedNodeInfo, chunks[3].relationships[NodeRelationship.PARENT]).node_id
+        == chunks[2].id_
+    )
+    assert chunks[3].relationships[NodeRelationship.CHILD] == []
+
+    # Test the fifth chunk (function baz)
+    assert (
+        chunks[4].text
+        == """\
+function baz() {
+    console.log("bbq");
+}"""
+    )
+    assert chunks[4].metadata["inclusive_scopes"] == [
+        {"name": "baz", "type": "function_declaration", "signature": "function baz()"}
+    ]
+    assert (
+        cast(RelatedNodeInfo, chunks[4].relationships[NodeRelationship.PARENT]).node_id
+        == chunks[0].id_
+    )
+    assert chunks[4].relationships[NodeRelationship.CHILD] == []
+
+
+# No need to test everything that is in test_code_hierarchy_no_skeleton
+
+
+def test_typescript_code_splitter_2() -> None:
+    """Test case for code splitting using TypeScript."""
+    if "CI" in os.environ:
+        return
+
+    code_splitter = CodeHierarchyNodeParser(
+        language="typescript", skeleton=True, chunk_min_characters=0
+    )
+
+    text = """\
+class Example {
+    exampleMethod() {
+        console.log("line1");
+    }
+}
+"""
+
+    text_node = TextNode(
+        text=text,
+    )
+    chunks: List[RelatedNodeInfo] = code_splitter.get_nodes_from_documents([text_node])
+
+    # Fstrings don't like forward slash
+    double_forward_slash: str = "//"
+    assert (
+        chunks[0].text
+        == f"""\
+class Example {{
+    {double_forward_slash} {CodeHierarchyNodeParser._get_comment_text(chunks[1])}
+}}
+"""
+    )
diff --git a/llama-index-packs/llama-index-packs-code-hierarchy/tests/test_query_engine.py b/llama-index-packs/llama-index-packs-code-hierarchy/tests/test_query_engine.py
new file mode 100644
index 0000000000..b389d5054e
--- /dev/null
+++ b/llama-index-packs/llama-index-packs-code-hierarchy/tests/test_query_engine.py
@@ -0,0 +1,97 @@
+"""Test CodeHierarchyNodeParser reading itself."""
+from typing import Sequence
+
+import pytest
+from llama_index.core import SimpleDirectoryReader
+from pytest import fixture
+from llama_index.packs.code_hierarchy import CodeHierarchyNodeParser
+from llama_index.core.text_splitter import CodeSplitter
+from pathlib import Path
+from llama_index.core.schema import BaseNode
+import re
+
+from llama_index.packs.code_hierarchy import CodeHierarchyKeywordQueryEngine
+
+
+def print_python(python_text: str) -> None:
+    """This function prints python text in ipynb nicely formatted."""
+    print("```python\n" + python_text + "```")
+
+
+@fixture(params=[(80, 1000, 10), (500, 5000, 100)])
+def code_hierarchy_nodes(request) -> Sequence[BaseNode]:
+    reader = SimpleDirectoryReader(
+        input_files=[
+            Path(__file__).parent
+            / Path("../llama_index/packs/code_hierarchy/code_hierarchy.py")
+        ],
+        file_metadata=lambda x: {"filepath": x},
+    )
+    nodes = reader.load_data()
+    return CodeHierarchyNodeParser(
+        language="python",
+        chunk_min_characters=request.param[0],
+        # You can further parameterize the CodeSplitter to split the code
+        # into "chunks" that match your context window size using
+        # chunck_lines and max_chars parameters, here we just use the defaults
+        code_splitter=CodeSplitter(
+            language="python", max_chars=request.param[1], chunk_lines=request.param[2]
+        ),
+    ).get_nodes_from_documents(nodes)
+
+
+def test_code_splitter_NEXT_relationship_indention(
+    code_hierarchy_nodes: Sequence[BaseNode],
+) -> None:
+    """When using jupyter I found that the final brevity comment was indented when it shouldn't be."""
+    for node in code_hierarchy_nodes:
+        last_line = node.text.split("\n")[-1]
+        if "Code replaced for brevity" in last_line and "NEXT" in node.relationships:
+            assert not last_line.startswith(" ")
+            assert not last_line.startswith("\t")
+
+
+def test_query_by_module_name(code_hierarchy_nodes: Sequence[BaseNode]) -> None:
+    """Test querying the index by filename."""
+    index = CodeHierarchyKeywordQueryEngine(nodes=code_hierarchy_nodes)
+    query = "code_hierarchy"
+    results = index.query(query)
+    assert len(results.response) >= 1 and results.response != "None"
+
+
+@pytest.mark.parametrize(
+    "name",
+    [
+        "CodeHierarchyNodeParser",
+        "_parse_node",
+        "recur",
+        "__init__",
+    ],
+)
+def test_query_by_item_name(
+    name: str, code_hierarchy_nodes: Sequence[BaseNode]
+) -> None:
+    """Test querying the index by signature."""
+    index = CodeHierarchyKeywordQueryEngine(nodes=code_hierarchy_nodes)
+    query = "CodeHierarchyNodeParser"
+    results = index.query(query)
+    assert len(results.response) >= 1 and results.response != "None"
+
+
+def test_get_tool(code_hierarchy_nodes: Sequence[BaseNode]) -> None:
+    """Test querying the index by signature."""
+    index = CodeHierarchyKeywordQueryEngine(nodes=code_hierarchy_nodes)
+    query = "CodeHierarchyNodeParser"
+    results = index.as_langchain_tool().run(query)
+    assert len(results) >= 1 and results != "None"
+
+
+def test_query_by_all_uuids(code_hierarchy_nodes: Sequence[BaseNode]) -> None:
+    """Test querying the index by signature."""
+    index = CodeHierarchyKeywordQueryEngine(nodes=code_hierarchy_nodes)
+    for node in code_hierarchy_nodes:
+        # Find all uuids in the node
+        uuids = re.findall(r"[\w-]{36}", node.text)
+        for uuid in uuids:
+            results = index.query(uuid)
+            assert len(results.response) >= 1 and results.response != "None"
diff --git a/llama-index-packs/llama-index-packs-code-hierarchy/tests/test_utility_methods.py b/llama-index-packs/llama-index-packs-code-hierarchy/tests/test_utility_methods.py
new file mode 100644
index 0000000000..7784056750
--- /dev/null
+++ b/llama-index-packs/llama-index-packs-code-hierarchy/tests/test_utility_methods.py
@@ -0,0 +1,133 @@
+import pytest
+from llama_index.packs.code_hierarchy import CodeHierarchyNodeParser
+
+
+def test_space_indentation() -> None:
+    text = """\
+def function():
+    print("First level of indentation")
+    if True:
+        print("Second level of indentation")
+"""
+    (
+        indent_char,
+        count_per_indent,
+        first_indent_level,
+    ) = CodeHierarchyNodeParser._get_indentation(text)
+    assert indent_char == " "
+    assert count_per_indent == 4
+    assert first_indent_level == 0
+
+
+def test_tab_indentation() -> None:
+    text = """\
+def function():
+\tprint("First level of indentation")
+\tif True:
+\t\tprint("Second level of indentation")
+"""
+    (
+        indent_char,
+        count_per_indent,
+        first_indent_level,
+    ) = CodeHierarchyNodeParser._get_indentation(text)
+    assert indent_char == "\t"
+    assert count_per_indent == 1
+    assert first_indent_level == 0
+
+
+def test_tab_indentation_2() -> None:
+    text = """\
+\tdef function():
+\t\tprint("First level of indentation")
+\t\tif True:
+\t\t\tprint("Second level of indentation")
+"""
+    (
+        indent_char,
+        count_per_indent,
+        first_indent_level,
+    ) = CodeHierarchyNodeParser._get_indentation(text)
+    assert indent_char == "\t"
+    assert count_per_indent == 1
+    assert first_indent_level == 1
+
+
+def test_mixed_indentation() -> None:
+    text = """\
+def function():
+\tprint("First level of indentation")
+    if True:
+        print("Second level of indentation")
+"""
+    with pytest.raises(ValueError, match="Mixed indentation found."):
+        CodeHierarchyNodeParser._get_indentation(text)
+
+
+def test_mixed_indentation_2() -> None:
+    text = """\
+\tdef function():
+  print("First level of indentation")
+    if True:
+        print("Second level of indentation")
+"""
+    with pytest.raises(ValueError, match="Mixed indentation found."):
+        CodeHierarchyNodeParser._get_indentation(text)
+
+
+def test_no_indentation() -> None:
+    text = """\"
+def function():
+print("No indentation")
+"""
+    (
+        indent_char,
+        count_per_indent,
+        first_indent_level,
+    ) = CodeHierarchyNodeParser._get_indentation(text)
+    assert indent_char == " "
+    assert count_per_indent == 4
+    assert first_indent_level == 0
+
+
+def test_typescript() -> None:
+    text = """\
+class Example {
+    exampleMethod() {
+        console.log("line1");
+    }
+}
+"""
+    (
+        indent_char,
+        count_per_indent,
+        first_indent_level,
+    ) = CodeHierarchyNodeParser._get_indentation(text)
+    assert indent_char == " "
+    assert count_per_indent == 4
+    assert first_indent_level == 0
+
+
+def test_typescript_2() -> None:
+    text = """\
+function foo() {
+    console.log("bar");
+}
+
+class Example {
+    exampleMethod() {
+        console.log("line1");
+    }
+}
+
+function baz() {
+    console.log("bbq");
+}"""
+    (
+        indent_char,
+        count_per_indent,
+        first_indent_level,
+    ) = CodeHierarchyNodeParser._get_indentation(text)
+    assert indent_char == " "
+    assert count_per_indent == 4
+    assert first_indent_level == 0
-- 
GitLab