Skip to content

Commit ea89579

Browse files
committed
gh-148660: Fix use-after-free in OrderedDict.copy() on re-entrant mutation
Building the copy iterated od's node list while running arbitrary Python (a key's __eq__/__hash__, a subclass' __getitem__/__setitem__, or a value's __del__) that could clear od and free the nodes being walked, causing a use-after-free. Snapshot od_state and raise RuntimeError if it changes during the copy, as is already done for OrderedDict equality (gh-119004).
1 parent 8646385 commit ea89579

3 files changed

Lines changed: 222 additions & 14 deletions

File tree

Lib/test/test_ordered_dict.py

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -879,6 +879,149 @@ def side_effect(self):
879879
self.assertDictEqual(dict1, dict.fromkeys((0, 4.2)))
880880
self.assertDictEqual(dict2, dict.fromkeys((0, Key(), 4.2)))
881881

882+
def check_copy_runtime_error_issue148660(self, od):
883+
msg = re.escape("OrderedDict mutated during iteration")
884+
self.assertRaisesRegex(RuntimeError, msg, od.copy)
885+
self.assertEqual(len(od), 0) # the side effect cleared it
886+
887+
def test_issue148660_copy_clear_in_source_lookup(self):
888+
# gh-148660: a key's __eq__ clears od while od.copy() looks the value
889+
# up in the source dict; the loop must not read the freed nodes.
890+
OrderedDict = self.OrderedDict
891+
armed = False
892+
893+
class Key:
894+
def __hash__(self):
895+
return 42 # force collisions so __eq__ runs during copy
896+
def __eq__(self, other):
897+
if armed:
898+
od.clear()
899+
return self is other
900+
901+
od = OrderedDict([(Key(), 'v1'), (Key(), 'v2')])
902+
armed = True
903+
self.check_copy_runtime_error_issue148660(od)
904+
905+
def test_issue148660_copy_clear_in_dest_insert(self):
906+
# gh-148660: this is the ASan-reported path -- the key's __eq__ clears
907+
# od while od.copy() inserts into the *destination* dict (the collision
908+
# probe into od_copy), not during the source lookup.
909+
OrderedDict = self.OrderedDict
910+
armed = False
911+
eq_calls = 0
912+
913+
class Key:
914+
def __hash__(self):
915+
return 42 # force collisions so __eq__ runs during copy
916+
def __eq__(self, other):
917+
nonlocal eq_calls
918+
if armed:
919+
eq_calls += 1
920+
# 1st armed __eq__ is the source lookup, 2nd is the
921+
# collision probe while inserting into od_copy.
922+
if eq_calls == 2:
923+
od.clear()
924+
return self is other
925+
926+
od = OrderedDict([(Key(), 'v1'), (Key(), 'v2')])
927+
armed = True
928+
self.check_copy_runtime_error_issue148660(od)
929+
930+
def test_issue148660_copy_clear_in_hash(self):
931+
# gh-148660: a key's __hash__ clears od while od.copy() re-hashes it.
932+
OrderedDict = self.OrderedDict
933+
armed = False
934+
935+
class Key:
936+
def __init__(self, n):
937+
self.n = n
938+
def __hash__(self):
939+
if armed:
940+
od.clear()
941+
return 42
942+
def __eq__(self, other):
943+
return self is other
944+
945+
od = OrderedDict([(Key(1), 'v1'), (Key(2), 'v2')])
946+
armed = True
947+
self.check_copy_runtime_error_issue148660(od)
948+
949+
def test_issue148660_copy_key_eq_exception_is_preserved(self):
950+
# gh-148660: if a key's __eq__ both mutates od and raises, the raised
951+
# exception is preserved, matching OrderedDict.__eq__ (gh-119004).
952+
OrderedDict = self.OrderedDict
953+
armed = False
954+
955+
class Boom(Exception):
956+
pass
957+
958+
class Key:
959+
def __hash__(self):
960+
return 42
961+
def __eq__(self, other):
962+
if armed:
963+
od.clear()
964+
raise Boom
965+
return self is other
966+
967+
od = OrderedDict([(Key(), 'v1'), (Key(), 'v2')])
968+
armed = True
969+
self.assertRaises(Boom, od.copy)
970+
971+
def test_issue148660_copy_clear_in_subclass_getitem(self):
972+
# gh-148660: a subclass __getitem__ clears od during od.copy().
973+
OrderedDict = self.OrderedDict
974+
armed = False
975+
976+
class OD(OrderedDict):
977+
def __getitem__(self, key):
978+
if armed:
979+
od.clear()
980+
return 'dummy'
981+
return OrderedDict.__getitem__(self, key)
982+
983+
od = OD([(1, 'v1'), (2, 'v2')])
984+
armed = True
985+
self.check_copy_runtime_error_issue148660(od)
986+
987+
def test_issue148660_copy_clear_in_subclass_setitem(self):
988+
# gh-148660: a subclass __setitem__ clears od during od.copy().
989+
OrderedDict = self.OrderedDict
990+
armed = False
991+
992+
class OD(OrderedDict):
993+
def __setitem__(self, key, value):
994+
if armed:
995+
od.clear()
996+
OrderedDict.__setitem__(self, key, value)
997+
998+
od = OD([(1, 'v1'), (2, 'v2')])
999+
armed = True
1000+
self.check_copy_runtime_error_issue148660(od)
1001+
1002+
def test_issue148660_copy_clear_in_value_del(self):
1003+
# gh-148660: a value's __del__ (run by Py_DECREF) clears od during
1004+
# od.copy().
1005+
OrderedDict = self.OrderedDict
1006+
armed = False
1007+
1008+
class V:
1009+
def __del__(self):
1010+
if armed:
1011+
od.clear()
1012+
1013+
class OD(OrderedDict):
1014+
def __getitem__(self, key):
1015+
return V()
1016+
def __setitem__(self, key, value):
1017+
pass
1018+
1019+
od = OD()
1020+
OrderedDict.__setitem__(od, 1, 'v1')
1021+
OrderedDict.__setitem__(od, 2, 'v2')
1022+
armed = True
1023+
self.check_copy_runtime_error_issue148660(od)
1024+
8821025

8831026
@unittest.skipUnless(c_coll, 'requires the C version of the collections module')
8841027
class CPythonOrderedDictTests(OrderedDictTests,
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Fix a use-after-free in :class:`collections.OrderedDict`'s ``copy()`` method
2+
when the dict is mutated while being copied, for example by a key's ``__eq__``
3+
or ``__hash__``, a subclass' ``__getitem__`` or ``__setitem__``, or a value's
4+
``__del__``. It now raises :exc:`RuntimeError`, like other operations that
5+
mutate the dict during iteration. Patch by Harjoth Khara.

Objects/odictobject.c

Lines changed: 74 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1251,32 +1251,92 @@ OrderedDict_copy_impl(PyObject *od)
12511251
if (od_copy == NULL)
12521252
return NULL;
12531253

1254+
/* Building the copy runs arbitrary Python code: a key's __hash__/__eq__,
1255+
a subclass' __getitem__/__setitem__, or a value's __del__ may mutate od
1256+
and free the nodes we are iterating over. Keep od's state to detect
1257+
such mutations and raise instead of dereferencing freed memory
1258+
(see gh-148660), mirroring what is already done when comparing
1259+
OrderedDicts (see gh-119004). */
1260+
const size_t state = _PyODictObject_CAST(od)->od_state;
1261+
12541262
if (PyODict_CheckExact(od)) {
1255-
_odict_FOREACH(od, node) {
1256-
PyObject *key = _odictnode_KEY(node);
1257-
PyObject *value = _odictnode_VALUE(node, od);
1263+
node = _odict_FIRST(od);
1264+
while (node != NULL) {
1265+
PyObject *key = Py_NewRef(_odictnode_KEY(node));
1266+
Py_hash_t hash = _odictnode_HASH(node);
1267+
/* The value lookup may run the key's __eq__ or __hash__. */
1268+
PyObject *value = PyODict_GetItemWithError((PyObject *)od, key);
1269+
/* Propagate an exception raised by that re-entrant call before
1270+
reporting a mutation, as OrderedDict.__eq__ does (gh-119004). */
1271+
if (value == NULL && PyErr_Occurred()) {
1272+
Py_DECREF(key);
1273+
goto fail;
1274+
}
1275+
if (_PyODictObject_CAST(od)->od_state != state) {
1276+
Py_DECREF(key);
1277+
PyErr_SetString(PyExc_RuntimeError,
1278+
"OrderedDict mutated during iteration");
1279+
goto fail;
1280+
}
12581281
if (value == NULL) {
1259-
if (!PyErr_Occurred())
1260-
PyErr_SetObject(PyExc_KeyError, key);
1282+
PyErr_SetObject(PyExc_KeyError, key);
1283+
Py_DECREF(key);
12611284
goto fail;
12621285
}
1263-
if (_PyODict_SetItem_KnownHash_LockHeld((PyObject *)od_copy, key, value,
1264-
_odictnode_HASH(node)) != 0)
1286+
/* value is borrowed, but the insert refs it before running any
1287+
__eq__ of its own, so it cannot be freed under us here. */
1288+
int res = _PyODict_SetItem_KnownHash_LockHeld((PyObject *)od_copy,
1289+
key, value, hash);
1290+
Py_DECREF(key);
1291+
if (res != 0)
1292+
goto fail;
1293+
/* The destination insert may run the key's __eq__ and mutate od. */
1294+
if (_PyODictObject_CAST(od)->od_state != state) {
1295+
PyErr_SetString(PyExc_RuntimeError,
1296+
"OrderedDict mutated during iteration");
12651297
goto fail;
1298+
}
1299+
node = _odictnode_NEXT(node);
12661300
}
12671301
}
12681302
else {
1269-
_odict_FOREACH(od, node) {
1270-
int res;
1271-
PyObject *value = PyObject_GetItem((PyObject *)od,
1272-
_odictnode_KEY(node));
1273-
if (value == NULL)
1303+
node = _odict_FIRST(od);
1304+
while (node != NULL) {
1305+
PyObject *key = Py_NewRef(_odictnode_KEY(node));
1306+
/* __getitem__ (or the key's __eq__ during the lookup) may run
1307+
Python code that mutates od. */
1308+
PyObject *value = PyObject_GetItem((PyObject *)od, key);
1309+
if (value == NULL) {
1310+
/* A re-entrant mutation that drops the key makes __getitem__
1311+
raise KeyError; report the mutation instead, but let any
1312+
other exception propagate (as OrderedDict.__eq__ does). */
1313+
if (_PyODictObject_CAST(od)->od_state != state
1314+
&& PyErr_ExceptionMatches(PyExc_KeyError)) {
1315+
PyErr_SetString(PyExc_RuntimeError,
1316+
"OrderedDict mutated during iteration");
1317+
}
1318+
Py_DECREF(key);
1319+
goto fail;
1320+
}
1321+
if (_PyODictObject_CAST(od)->od_state != state) {
1322+
Py_DECREF(value);
1323+
Py_DECREF(key);
1324+
PyErr_SetString(PyExc_RuntimeError,
1325+
"OrderedDict mutated during iteration");
12741326
goto fail;
1275-
res = PyObject_SetItem((PyObject *)od_copy,
1276-
_odictnode_KEY(node), value);
1327+
}
1328+
int res = PyObject_SetItem((PyObject *)od_copy, key, value);
12771329
Py_DECREF(value);
1330+
Py_DECREF(key);
12781331
if (res != 0)
12791332
goto fail;
1333+
/* __setitem__ on the copy, or a value's __del__, may mutate od. */
1334+
if (_PyODictObject_CAST(od)->od_state != state) {
1335+
PyErr_SetString(PyExc_RuntimeError,
1336+
"OrderedDict mutated during iteration");
1337+
goto fail;
1338+
}
1339+
node = _odictnode_NEXT(node);
12801340
}
12811341
}
12821342
return od_copy;

0 commit comments

Comments
 (0)