Skip to content

Commit e042b8d

Browse files
DeanChensjcopybara-github
authored andcommitted
fix: Fix Code Generation Template Injection via Filenames
User-supplied or LLM-generated filenames were interpolated into Python code strings using `.format()` without escaping, leading to arbitrary code execution vulnerability. 1. Updating `_DATA_FILE_UTIL_MAP` templates to not include quotes around `{filename}`. 2. Using `repr(file.name)` when formatting the template in `_get_data_file_preprocessing_code` to ensure the filename is safely escaped as a Python string literal. Co-authored-by: Shangjie Chen <deanchen@google.com> PiperOrigin-RevId: 934744761
1 parent 5c8c55a commit e042b8d

2 files changed

Lines changed: 45 additions & 2 deletions

File tree

src/google/adk/flows/llm_flows/_code_execution.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,9 @@ class DataFileUtil:
7070
_DATA_FILE_UTIL_MAP = {
7171
'text/csv': DataFileUtil(
7272
extension='.csv',
73-
loader_code_template="pd.read_csv('{filename}')",
73+
# Note: The template does not quote {filename} because repr() in
74+
# _get_data_file_preprocessing_code supplies quotes and escaping.
75+
loader_code_template='pd.read_csv({filename})',
7476
),
7577
}
7678

@@ -529,7 +531,7 @@ def _get_normalized_file_name(file_name: str) -> str:
529531

530532
var_name = _get_normalized_file_name(file.name)
531533
loader_code = _DATA_FILE_UTIL_MAP[file.mime_type].loader_code_template.format(
532-
filename=file.name
534+
filename=repr(file.name)
533535
)
534536
return f"""
535537
{_DATA_FILE_HELPER_LIB}

tests/unittests/flows/llm_flows/test_code_execution.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
"""Unit tests for Code Execution logic."""
1616

17+
import ast
1718
import datetime
1819
from unittest.mock import AsyncMock
1920
from unittest.mock import MagicMock
@@ -23,7 +24,9 @@
2324
from google.adk.code_executors.base_code_executor import BaseCodeExecutor
2425
from google.adk.code_executors.built_in_code_executor import BuiltInCodeExecutor
2526
from google.adk.code_executors.code_execution_utils import CodeExecutionResult
27+
from google.adk.code_executors.code_execution_utils import File
2628
from google.adk.flows.llm_flows._code_execution import _DATA_FILE_HELPER_LIB
29+
from google.adk.flows.llm_flows._code_execution import _get_data_file_preprocessing_code
2730
from google.adk.flows.llm_flows._code_execution import response_processor
2831
from google.adk.models.llm_response import LlmResponse
2932
from google.genai import types
@@ -166,3 +169,41 @@ def test_data_file_helper_lib_defines_crop():
166169

167170
# Regression for #4011: explore_df raised NameError when crop was undefined.
168171
namespace['explore_df'](pd.DataFrame({'a': [1, 2], 'b': ['x', 'y']}))
172+
173+
174+
def test_get_data_file_preprocessing_code_injection_reproduction():
175+
"""Test that filenames with injection payloads are safely escaped."""
176+
bad_filename = "'); print('PWNED')#"
177+
file = File(name=bad_filename, mime_type='text/csv', content=b'')
178+
code = _get_data_file_preprocessing_code(file)
179+
180+
tree = ast.parse(code)
181+
for node in ast.walk(tree):
182+
if isinstance(node, ast.Call):
183+
if isinstance(node.func, ast.Name) and node.func.id == 'print':
184+
if (
185+
len(node.args) == 1
186+
and isinstance(node.args[0], ast.Constant)
187+
and node.args[0].value == 'PWNED'
188+
):
189+
pytest.fail(
190+
"Vulnerability reproduction: print('PWNED') was parsed as"
191+
' executable code!'
192+
)
193+
194+
# Check that read_csv was called with bad_filename as a safe string literal.
195+
read_csv_arg = None
196+
for node in ast.walk(tree):
197+
if (
198+
isinstance(node, ast.Call)
199+
and isinstance(node.func, ast.Attribute)
200+
and node.func.attr == 'read_csv'
201+
and isinstance(node.func.value, ast.Name)
202+
and node.func.value.id == 'pd'
203+
):
204+
assert len(node.args) == 1
205+
assert isinstance(node.args[0], ast.Constant)
206+
read_csv_arg = node.args[0].value
207+
break
208+
209+
assert read_csv_arg == bad_filename

0 commit comments

Comments
 (0)