From aa6726706476e0f957a8d57a5ca89e519e93bad7 Mon Sep 17 00:00:00 2001 From: R Ostrowski <rostrovsky@users.noreply.github.com> Date: Mon, 11 Dec 2023 01:34:32 +0100 Subject: [PATCH] Remediate RCE vulnerability CVE-2023-39662 - part 2 (#9423) --- llama_index/exec_utils.py | 19 +++++++++++++++++++ tests/query_engine/test_pandas.py | 23 +++++++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/llama_index/exec_utils.py b/llama_index/exec_utils.py index 3cdef1cd52..3e8575a3af 100644 --- a/llama_index/exec_utils.py +++ b/llama_index/exec_utils.py @@ -1,4 +1,5 @@ import copy +import re from types import CodeType, ModuleType from typing import Any, Dict, Mapping, Sequence, Union @@ -90,6 +91,22 @@ def _get_restricted_globals(__globals: Union[dict, None]) -> Any: return restricted_globals +def _verify_source_safety(__source: Union[str, bytes, CodeType]) -> None: + pattern = r"_{1,2}\w+_{0,2}" + + if isinstance(__source, CodeType): + raise RuntimeError("Direct execution of CodeType is forbidden!") + if isinstance(__source, bytes): + __source = __source.decode() + + matches = re.findall(pattern, __source) + + if matches: + raise RuntimeError( + "Execution of code containing references to private or dunder methods is forbidden!" + ) + + def safe_eval( __source: Union[str, bytes, CodeType], __globals: Union[Dict[str, Any], None] = None, @@ -98,6 +115,7 @@ def safe_eval( """ eval within safe global context. """ + _verify_source_safety(__source) return eval(__source, _get_restricted_globals(__globals), __locals) @@ -109,4 +127,5 @@ def safe_exec( """ eval within safe global context. """ + _verify_source_safety(__source) return exec(__source, _get_restricted_globals(__globals), __locals) diff --git a/tests/query_engine/test_pandas.py b/tests/query_engine/test_pandas.py index 060d837093..0c7acedd55 100644 --- a/tests/query_engine/test_pandas.py +++ b/tests/query_engine/test_pandas.py @@ -84,6 +84,29 @@ def test_default_output_processor_rce(tmp_path: Path) -> None: assert not tmp_file.is_file(), "file has been created via RCE!" +@pytest.mark.skipif(sys.version_info < (3, 9), reason="Requires Python 3.9 or higher") +def test_default_output_processor_rce2() -> None: + """ + Test that output processor prevents RCE. + https://github.com/run-llama/llama_index/issues/7054#issuecomment-1829141330 . + """ + df = pd.DataFrame( + { + "city": ["Toronto", "Tokyo", "Berlin"], + "population": [2930000, 13960000, 3645000], + } + ) + + injected_code = "().__class__.__mro__[-1].__subclasses__()[137].__init__.__globals__['system']('ls')" + + output = default_output_processor(injected_code, df) + + assert ( + "Execution of code containing references to private or dunder methods is forbidden!" + in output + ), "Injected code executed successfully!" + + @pytest.mark.skipif(sys.version_info < (3, 9), reason="Requires Python 3.9 or higher") def test_default_output_processor_e2e(tmp_path: Path) -> None: """ -- GitLab