From 40e9ebc5ac51c8d97afe56fe72d20d9b79f602d2 Mon Sep 17 00:00:00 2001 From: Thomas Wuerthinger Date: Fri, 24 Apr 2026 17:49:29 +0200 Subject: [PATCH 1/2] Pack dict metadata into ObjectHashMap --- .../objects/common/EconomicMapStorage.java | 45 +- .../objects/common/HashingStorageNodes.java | 69 +-- .../objects/common/ObjectHashMap.java | 520 ++++++++++++------ .../nodes/bytecode/PBytecodeRootNode.java | 16 +- .../bytecode_dsl/MakeSetStorageNode.java | 9 +- .../bytecode_dsl/PBytecodeDSLRootNode.java | 6 +- 6 files changed, 412 insertions(+), 253 deletions(-) diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/common/EconomicMapStorage.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/common/EconomicMapStorage.java index 92c5d0a8a6..75f09f299f 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/common/EconomicMapStorage.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/common/EconomicMapStorage.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * The Universal Permissive License (UPL), Version 1.0 @@ -66,8 +66,7 @@ import com.oracle.truffle.api.strings.TruffleString; import com.oracle.truffle.api.strings.TruffleString.HashCodeNode; -public class EconomicMapStorage extends HashingStorage { - +public class EconomicMapStorage extends ObjectHashMap { public static EconomicMapStorage create() { return new EconomicMapStorage(); } @@ -80,18 +79,16 @@ public static EconomicMapStorage create(int initialCapacity) { return new EconomicMapStorage(initialCapacity); } - final ObjectHashMap map; - private EconomicMapStorage(int initialCapacity) { - this.map = new ObjectHashMap(initialCapacity); + super(initialCapacity); } private EconomicMapStorage() { this(4); } - public EconomicMapStorage(ObjectHashMap original, boolean copy) { - this.map = copy ? original.copy() : original; + private EconomicMapStorage(EconomicMapStorage original) { + super(original); } @TruffleBoundary @@ -109,7 +106,7 @@ public static EconomicMapStorage createGeneric(LinkedHashMap map } public int length() { - return map.size(); + return size(); } static boolean advance(MapCursor cursor) { @@ -128,51 +125,43 @@ static Object getValue(MapCursor cursor) { return cursor.getValue(); } - void clear() { - map.clear(); - } - - public boolean mapIsEqualTo(ObjectHashMap other) { - return other == this.map; - } - public HashingStorage copy() { - return new EconomicMapStorage(this.map, true); + return new EconomicMapStorage(this); } protected void setValueForAllKeys(VirtualFrame frame, Node inliningTarget, Object value, ObjectHashMap.PutNode putNode, InlinedLoopConditionProfile loopProfile) { - MapCursor cursor = map.getEntries(); - final int size = map.size(); + MapCursor cursor = getEntries(); + final int size = size(); loopProfile.profileCounted(inliningTarget, size); LoopNode.reportLoopCount(putNode, size); while (loopProfile.inject(inliningTarget, advance(cursor))) { - putNode.put(frame, inliningTarget, map, getDictKey(cursor), value); + putNode.put(frame, inliningTarget, this, getDictKey(cursor), value); } } @TruffleBoundary public Object removeUncached(Object key, long hash) { - return RemoveNode.removeUncached(map, key, hash); + return RemoveNode.removeUncached(this, key, hash); } @TruffleBoundary public void putUncached(TruffleString key, Object value) { - PutNode.putUncached(map, key, PyObjectHashNode.hash(key, HashCodeNode.getUncached()), value); + PutNode.putUncached(this, key, PyObjectHashNode.hash(key, HashCodeNode.getUncached()), value); } @TruffleBoundary public void putUncached(Object key, Object value) { - PutNode.putUncached(map, key, PyObjectHashNode.executeUncached(key), value); + PutNode.putUncached(this, key, PyObjectHashNode.executeUncached(key), value); } @TruffleBoundary public void putUncached(Object key, long hash, Object value) { - PutNode.putUncached(map, key, hash, value); + PutNode.putUncached(this, key, hash, value); } @TruffleBoundary public void putUncached(int key, Object value) { - PutNode.putUncached(map, key, PyObjectHashNode.hash(key), value); + PutNode.putUncached(this, key, PyObjectHashNode.hash(key), value); } @TruffleBoundary @@ -201,7 +190,7 @@ public String toString() { StringBuilder builder = new StringBuilder(); builder.append("map(size=").append(length()).append(", {"); String sep = ""; - MapCursor cursor = map.getEntries(); + MapCursor cursor = getEntries(); int i = 0; while (advance(cursor)) { i++; @@ -225,7 +214,7 @@ public abstract static class EconomicMapSetStringKey extends SpecializedSetStrin static void doIt(Node inliningTarget, HashingStorage self, TruffleString key, Object value, @Cached PyObjectHashNode hashNode, @Cached ObjectHashMap.PutNode putNode) { - putNode.put(null, inliningTarget, ((EconomicMapStorage) self).map, key, hashNode.execute(null, inliningTarget, key), value); + putNode.put(null, inliningTarget, (EconomicMapStorage) self, key, hashNode.execute(null, inliningTarget, key), value); } } } diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/common/HashingStorageNodes.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/common/HashingStorageNodes.java index 1311d76254..a34728cd16 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/common/HashingStorageNodes.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/common/HashingStorageNodes.java @@ -118,7 +118,7 @@ public static Object getItemWithHash(HashingStorage self, Object key, long keyHa @Specialization static Object economicMap(Frame frame, Node inliningTarget, EconomicMapStorage self, Object key, long keyHash, @Cached ObjectHashMap.GetNode getNode) { - return getNode.execute(frame, inliningTarget, self.map, key, keyHash); + return getNode.execute(frame, inliningTarget, self, key, keyHash); } @Specialization @@ -157,7 +157,7 @@ public abstract static class HashingStorageGetItemStringKey extends Node { static Object economicMap(Node inliningTarget, EconomicMapStorage self, TruffleString key, @Cached TruffleString.HashCodeNode hashCodeNode, @Cached ObjectHashMap.GetNode getNode) { - return getNode.execute(null, inliningTarget, self.map, key, PyObjectHashNode.hash(key, hashCodeNode)); + return getNode.execute(null, inliningTarget, self, key, PyObjectHashNode.hash(key, hashCodeNode)); } @Specialization @@ -219,13 +219,13 @@ public final Object execute(Node inliningTarget, HashingStorage self, TruffleStr @Specialization(guards = "isEconomicMapOrEmpty(self)") static Object economicMap(Frame frame, Node inliningTarget, HashingStorage self, Object key, - @Cached PyObjectHashNode hashNode, + @Exclusive @Cached PyObjectHashNode hashNode, @Cached InlinedConditionProfile isEconomicMapProfile, @Cached ObjectHashMap.GetNode getNode) { // We must not omit the potentially side-effecting call to __hash__ long hash = hashNode.execute(frame, inliningTarget, key); if (isEconomicMapProfile.profile(inliningTarget, self instanceof EconomicMapStorage)) { - return getNode.execute(frame, inliningTarget, ((EconomicMapStorage) self).map, key, hash); + return getNode.execute(frame, inliningTarget, (EconomicMapStorage) self, key, hash); } else { return null; } @@ -277,7 +277,7 @@ static EconomicMapStorage dynamicObjectStorageToEconomicMap(Node inliningTarget, DynamicObject store = s.store; Object[] keys = getKeyArrayNode.execute(store); EconomicMapStorage result = EconomicMapStorage.create(keys.length); - ObjectHashMap resultMap = result.map; + ObjectHashMap resultMap = result; for (Object k : keys) { if (k instanceof TruffleString) { Object v = getNode.execute(store, k, PNone.NO_VALUE); @@ -308,16 +308,16 @@ public final HashingStorage executeCached(Frame frame, HashingStorage self, Obje @Specialization static HashingStorage economicMap(Frame frame, Node inliningTarget, EconomicMapStorage self, Object key, long keyHash, Object value, @Exclusive @Cached PutNode putNode) { - putNode.execute(frame, inliningTarget, self.map, key, keyHash, value); + putNode.execute(frame, inliningTarget, self, key, keyHash, value); return self; } @Specialization static HashingStorage empty(Frame frame, Node inliningTarget, @SuppressWarnings("unused") EmptyStorage self, Object key, long keyHash, Object value, @Exclusive @Cached PutNode putNode) { - EconomicMapStorage storage = EconomicMapStorage.create(1); - putNode.execute(frame, inliningTarget, storage.map, key, keyHash, value); - return storage; + EconomicMapStorage result = EconomicMapStorage.create(1); + putNode.execute(frame, inliningTarget, result, key, keyHash, value); + return result; } @Specialization(guards = "!self.shouldTransitionOnPut()") @@ -385,7 +385,7 @@ static HashingStorage domTransition(Frame frame, Node inliningTarget, DynamicObj @Cached DynamicObject.GetKeyArrayNode getKeyArrayNode, @Cached DynamicObject.GetNode getNode) { EconomicMapStorage result = dynamicObjectStorageToEconomicMap(inliningTarget, self, getKeyArrayNode, getNode, hashNode, putUnsafeNode); - putNode.execute(frame, inliningTarget, result.map, key, keyHash, value); + putNode.execute(frame, inliningTarget, result, key, keyHash, value); return result; } } @@ -427,7 +427,7 @@ public final HashingStorage execute(Node inliningTarget, HashingStorage self, Tr static HashingStorage economicMap(Frame frame, Node inliningTarget, EconomicMapStorage self, Object key, Object value, @Exclusive @Cached PyObjectHashNode hashNode, @Exclusive @Cached PutNode putNode) { - putNode.execute(frame, inliningTarget, self.map, key, hashNode.execute(frame, inliningTarget, key), value); + putNode.execute(frame, inliningTarget, self, key, hashNode.execute(frame, inliningTarget, key), value); return self; } @@ -435,10 +435,11 @@ static HashingStorage economicMap(Frame frame, Node inliningTarget, EconomicMapS static HashingStorage empty(Frame frame, Node inliningTarget, @SuppressWarnings("unused") EmptyStorage self, Object key, Object value, @Exclusive @Cached PyObjectHashNode hashNode, @Exclusive @Cached PutNode putNode) { - // The ObjectHashMap.PutNode is @Exclusive because profiles for a put into a freshly new - // allocated map can be quite different to profiles in the other situations when we are - // putting into a map that already has or will have some more items in it - // It is also @Cached(inline = false) because inlining it triggers GR-44836 + // Route the first insertion through the EconomicMapStorage specialization so the + // EmptyStorage fast path keeps sharing the regular EconomicMapStorage setup logic. + // The ObjectHashMap.PutNode is @Exclusive because profiles for a put into a freshly + // allocated map can differ from puts into a map that already has or will soon have + // more entries. // TODO: do we want to try DynamicObjectStorage if the key is a string? return economicMap(frame, inliningTarget, EconomicMapStorage.create(1), key, value, hashNode, putNode); } @@ -513,7 +514,7 @@ static HashingStorage domTransition(Frame frame, Node inliningTarget, DynamicObj @Cached DynamicObject.GetKeyArrayNode getKeyArrayNode, @Cached DynamicObject.GetNode getNode) { EconomicMapStorage result = dynamicObjectStorageToEconomicMap(inliningTarget, self, getKeyArrayNode, getNode, hashNode, putUnsafeNode); - putNode.execute(frame, inliningTarget, result.map, key, hashNode.execute(frame, inliningTarget, key), value); + putNode.execute(frame, inliningTarget, result, key, hashNode.execute(frame, inliningTarget, key), value); return result; } } @@ -529,7 +530,7 @@ public static boolean executeUncached(HashingStorage self, Object key, Object to } public static void executeUncachedWithHash(EconomicMapStorage storage, Object key, long hash) { - ObjectHashMapFactory.RemoveNodeGen.getUncached().execute(null, null, storage.map, key, hash); + ObjectHashMapFactory.RemoveNodeGen.getUncached().execute(null, null, storage, key, hash); } public final boolean execute(Node inliningTarget, HashingStorage self, TruffleString key, Object toUpdate) { @@ -571,7 +572,7 @@ static Object economicMap(Frame frame, Node inliningTarget, HashingStorage self, long hash = hashNode.execute(frame, inliningTarget, key); if (self instanceof EconomicMapStorage economicMap) { isEconomicMapProfile.enter(inliningTarget); - Object result = removeNode.execute(frame, inliningTarget, economicMap.map, key, hash); + Object result = removeNode.execute(frame, inliningTarget, economicMap, key, hash); return needsValue ? result : result != null; } return needsValue ? null : false; @@ -614,7 +615,7 @@ static Object keywords(Frame frame, Node inliningTarget, KeywordsStorage self, O EconomicMapStorage newStorage = EconomicMapStorage.create(self.length()); self.addAllTo(inliningTarget, newStorage, specializedPutNode); toUpdate.setDictStorage(newStorage); - Object result = removeNode.execute(frame, inliningTarget, newStorage.map, key, hashNode.execute(frame, inliningTarget, key)); + Object result = removeNode.execute(frame, inliningTarget, newStorage, key, hashNode.execute(frame, inliningTarget, key)); return needsValue ? result : result != null; } @@ -883,7 +884,7 @@ public final HashingStorageIterator execute(Node node, HashingStorage storage) { @Specialization static HashingStorageIterator economicMap(@SuppressWarnings("unused") EconomicMapStorage self) { HashingStorageIterator it = new HashingStorageIterator(true); - it.index = self.map.usedHashes; + it.index = self.usedHashes; return it; } @@ -939,7 +940,7 @@ public static HashingStorageIteratorNext create() { @Specialization(guards = "!it.isReverse") static boolean economicMap(EconomicMapStorage self, HashingStorageIterator it) { - ObjectHashMap map = self.map; + ObjectHashMap map = self; it.index++; while (it.index < map.usedHashes) { Object val = map.getValue(it.index); @@ -955,7 +956,7 @@ static boolean economicMap(EconomicMapStorage self, HashingStorageIterator it) { @Specialization(guards = "it.isReverse") static boolean economicMapReverse(EconomicMapStorage self, HashingStorageIterator it) { - ObjectHashMap map = self.map; + ObjectHashMap map = self; it.index--; while (it.index >= 0) { Object val = map.getValue(it.index); @@ -1119,7 +1120,7 @@ public static HashingStorageIteratorKey create() { @Specialization static Object economicMap(EconomicMapStorage self, HashingStorageIterator it) { - return self.map.getKey(it.index); + return self.getKey(it.index); } @Specialization @@ -1164,7 +1165,7 @@ public static long executeUncached(HashingStorage storage, HashingStorageIterato @Specialization static long economicMap(EconomicMapStorage self, HashingStorageIterator it) { - return self.map.hashes[it.index]; + return self.getHash(it.index); } @Specialization @@ -1212,7 +1213,7 @@ public abstract static class HashingStoragePop extends Node { @Specialization static Object[] economicMap(Node inliningTarget, EconomicMapStorage self, @SuppressWarnings("unused") Object toUpdate, @Cached ObjectHashMap.PopNode popNode) { - return popNode.execute(inliningTarget, self.map); + return popNode.execute(inliningTarget, self); } // Other storages should not have any side effects, it's OK if they call __eq__ @@ -1398,7 +1399,7 @@ static HashingStorage doIt(Frame frame, Node inliningTarget, HashingStorage aSto @Cached HashingStorageXorCallback callbackA, @Cached HashingStorageXorCallback callbackB) { final EconomicMapStorage result = EconomicMapStorage.createWithSideEffects(); - ObjectHashMap resultMap = result.map; + ObjectHashMap resultMap = result; ResultAndOther accA = new ResultAndOther(resultMap, bStorage); forEachA.execute(frame, inliningTarget, aStorage, callbackA, accA); @@ -1450,7 +1451,7 @@ static HashingStorage doIt(Frame frame, Node inliningTarget, HashingStorage aSto @Cached HashingStorageForEach forEachA, @Cached HashingStorageIntersectCallback callback) { final EconomicMapStorage result = EconomicMapStorage.createWithSideEffects(); - ResultAndOther acc = new ResultAndOther(result.map, bStorage); + ResultAndOther acc = new ResultAndOther(result, bStorage); forEachA.execute(frame, inliningTarget, aStorage, callback, acc); return result; } @@ -1497,7 +1498,7 @@ static HashingStorage doIt(Frame frame, Node inliningTarget, HashingStorage aSto @Cached HashingStorageForEach forEachA, @Cached HashingStorageDiffCallback callback) { final EconomicMapStorage result = EconomicMapStorage.createWithSideEffects(); - ResultAndOther acc = new ResultAndOther(result.map, bStorage); + ResultAndOther acc = new ResultAndOther(result, bStorage); forEachA.execute(frame, inliningTarget, aStorage, callback, acc); return result; } @@ -1627,20 +1628,20 @@ public abstract static class HashingStorageTransferItem extends HashingStorageFo @Specialization static EconomicMapStorage economic2Economic(Frame frame, Node inliningTarget, EconomicMapStorage src, HashingStorageIterator it, EconomicMapStorage destStorage, - @Cached PutNode putNode) { - ObjectHashMap srcMap = src.map; - putNode.put(frame, inliningTarget, destStorage.map, srcMap.getKey(it.index), srcMap.hashes[it.index], srcMap.getValue(it.index)); + @Exclusive @Cached PutNode putNode) { + ObjectHashMap srcMap = src; + putNode.put(frame, inliningTarget, destStorage, srcMap.getKey(it.index), srcMap.getHash(it.index), srcMap.getValue(it.index)); return destStorage; } @Specialization(replaces = "economic2Economic") @InliningCutoff static HashingStorage economic2Generic(Frame frame, Node inliningTarget, EconomicMapStorage src, HashingStorageIterator it, HashingStorage destStorage, - @Cached HashingStorageSetItemWithHash setItemWithHash) { + @Exclusive @Cached HashingStorageSetItemWithHash setItemWithHash) { // Note that the point is to avoid side-effecting __hash__ call. Since the source is // economic map, the key may be an arbitrary object. - ObjectHashMap srcMap = src.map; - return setItemWithHash.execute(frame, inliningTarget, destStorage, srcMap.getKey(it.index), srcMap.hashes[it.index], srcMap.getValue(it.index)); + ObjectHashMap srcMap = src; + return setItemWithHash.execute(frame, inliningTarget, destStorage, srcMap.getKey(it.index), srcMap.getHash(it.index), srcMap.getValue(it.index)); } @Fallback diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/common/ObjectHashMap.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/common/ObjectHashMap.java index 431598c1ed..4f37786bc4 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/common/ObjectHashMap.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/common/ObjectHashMap.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * The Universal Permissive License (UPL), Version 1.0 @@ -42,8 +42,6 @@ import static com.oracle.truffle.api.CompilerDirectives.SLOWPATH_PROBABILITY; -import java.util.Arrays; - import com.oracle.graal.python.builtins.objects.common.ObjectHashMapFactory.PutNodeGen; import com.oracle.graal.python.builtins.objects.common.ObjectHashMapFactory.RemoveNodeGen; import com.oracle.graal.python.lib.PyObjectRichCompareBool; @@ -109,14 +107,10 @@ *

* Areas for future improvements: *

*/ -public final class ObjectHashMap { +public class ObjectHashMap extends HashingStorage { + private static final int TIGHT_ENTRY_CAPACITY_LIMIT = 8; + /** * Every hash map will preallocate at least this many buckets (and corresponding # of slots for * the real items). */ - private static final int INITIAL_INDICES_SIZE = 8; + private static final int INITIAL_INDICES_SIZE = 4; /** * We limit the max size of preallocated hash maps. See the comment in the ctor. @@ -137,21 +133,35 @@ public final class ObjectHashMap { private static final int MAX_PREALLOCATED_INDICES_SIZE = 1 << 20; /** - * Indices that participate in a collision chain are marked with the sign bit. + * Indices that participate in a collision chain are marked with the sign bit in the logical + * representation. The physical storage may use a narrower primitive array. */ private static final int COLLISION_MASK = 1 << 31; + private static final int BYTE_COLLISION_MASK = 1 << 7; + private static final int SHORT_COLLISION_MASK = 1 << 15; /** - * We need some placeholders. Those masked with {@link #COLLISION_MASK} give numbers higher than - * our max number of items, which is MAX_INT/2, because we cram they keys and values together - * into one array. + * Sparse table indices use 0 and 1 as reserved markers and store real compact-array indices as + * {@code index + INDEX_OFFSET}. The collision bit is kept separately. */ - private static final int DUMMY_INDEX = -2; - private static final int EMPTY_INDEX = -1; + private static final int EMPTY_INDEX = 0; + private static final int DUMMY_INDEX = 1; + private static final int INDEX_OFFSET = 2; + private static final int MAX_BYTE_INDEX = (BYTE_COLLISION_MASK - 1) - INDEX_OFFSET; + private static final int MAX_SHORT_INDEX = (SHORT_COLLISION_MASK - 1) - INDEX_OFFSET; + + private static void markCollision(byte[] metadata, int entryCapacity, int compactIndex) { + int indexByteSize = getIndexByteSize(entryCapacity); + int indicesOffset = getIndicesOffset(entryCapacity); + markCollision(metadata, indicesOffset, indexByteSize, getPhysicalCollisionMaskForIndexByteSize(indexByteSize), compactIndex); + } - private static void markCollision(int[] indices, int compactIndex) { - assert indices[compactIndex] != EMPTY_INDEX; - indices[compactIndex] = indices[compactIndex] | COLLISION_MASK; + private static void markCollision(byte[] metadata, int indicesOffset, int indexByteSize, int physicalCollisionMask, int compactIndex) { + int index = getIndex(metadata, indicesOffset, indexByteSize, compactIndex); + assert index != EMPTY_INDEX; + if (index != DUMMY_INDEX) { + setIndex(metadata, indicesOffset, indexByteSize, physicalCollisionMask, compactIndex, index | COLLISION_MASK); + } } private static boolean isCollision(int index) { @@ -159,25 +169,17 @@ private static boolean isCollision(int index) { } private static int unwrapIndex(int value) { - return value & ~COLLISION_MASK; + return (value & ~COLLISION_MASK) - INDEX_OFFSET; } - /** - * This is the factor how much the map grows when new entries are added. Note that we grow - * according to the used slots for real items, not according to the buckets count, because when - * "growing" we also remove dummy entries, so "growing" could mean that we also shrink. - */ - private static final int GROWTH_RATE = 4; - private static final long PERTURB_SHIFT = 5; // It takes at most this many >>> shifts to turn any long into 0 private static final int PERTURB_SHIFTS_COUT = 13; - // Sparse array with indices pointing to hashes and keysAndValues - private int[] indices; + // Packed metadata: hashes first, then sparse indices. + private byte[] metadata; - // Compact arrays with the actual dict items: - long[] hashes; + // Compact array with the actual dict items: Object[] keysAndValues; // How many real items are in the dict @@ -194,12 +196,12 @@ private static int unwrapIndex(int value) { public ObjectHashMap(int capacity) { int allocateSize; - if (capacity <= INITIAL_INDICES_SIZE) { + int entryCapacity; + if (capacity <= 0) { allocateSize = INITIAL_INDICES_SIZE; + entryCapacity = getUsableSize(allocateSize); } else { - // We need the hash table of this size, in order to accommodate "capacity" many entries - int indicesCapacity = capacity + (capacity / 3); - if (indicesCapacity < 0 || indicesCapacity > MAX_PREALLOCATED_INDICES_SIZE) { + if (capacity > getUsableSize(MAX_PREALLOCATED_INDICES_SIZE)) { // This oddity is here because in some cases we are asked to allocate very large // dict in a situation where CPython (probably) does not preallocate at all and // fails later during the actual insertion on something unrelated before it can @@ -207,13 +209,13 @@ public ObjectHashMap(int capacity) { // behavior, so we take it easy if the requested size is too large. Maybe we should // rather revisit all such callsites instead of fixing this here... allocateSize = MAX_PREALLOCATED_INDICES_SIZE; + entryCapacity = getUsableSize(allocateSize); } else { - int pow2 = getNextPow2(indicesCapacity); - assert pow2 > INITIAL_INDICES_SIZE; - allocateSize = pow2; + allocateSize = getMinBucketsCount(capacity); + entryCapacity = getRequestedEntryCapacity(capacity, allocateSize); } } - allocateData(allocateSize); + allocateData(allocateSize, entryCapacity); } public ObjectHashMap() { @@ -222,16 +224,24 @@ public ObjectHashMap() { allocateData(INITIAL_INDICES_SIZE); } - private void allocateData(int newSize) { - assert isPow2(newSize); - indices = new int[newSize]; - Arrays.fill(indices, EMPTY_INDEX); - // since we allow ourselves to fill only up to 3/4 of the hash table, we need this many - // entries for the actual values: (we intentionally over-allocate by a small constant) - int quarter = newSize >> 2; - int usableSize = 3 * quarter + 2; - hashes = new long[usableSize]; - keysAndValues = new Object[usableSize * 2]; + protected ObjectHashMap(ObjectHashMap original) { + size = original.size; + usedHashes = original.usedHashes; + usedIndices = original.usedIndices; + metadata = PythonUtils.arrayCopyOf(original.metadata, original.metadata.length); + keysAndValues = PythonUtils.arrayCopyOf(original.keysAndValues, original.keysAndValues.length); + } + + private void allocateData(int bucketsCount) { + allocateData(bucketsCount, getUsableSize(bucketsCount)); + } + + private void allocateData(int bucketsCount, int entryCapacity) { + assert isPow2(bucketsCount); + assert entryCapacity > 0 && entryCapacity <= getUsableSize(bucketsCount); + ensureArraySizesFit(bucketsCount, entryCapacity); + metadata = createMetadata(bucketsCount, entryCapacity); + keysAndValues = new Object[entryCapacity * 2]; } public void clear() { @@ -241,17 +251,6 @@ public void clear() { allocateData(INITIAL_INDICES_SIZE); } - public ObjectHashMap copy() { - ObjectHashMap result = new ObjectHashMap(); - result.size = size; - result.usedHashes = usedHashes; - result.usedIndices = usedIndices; - result.hashes = PythonUtils.arrayCopyOf(hashes, hashes.length); - result.indices = PythonUtils.arrayCopyOf(indices, indices.length); - result.keysAndValues = PythonUtils.arrayCopyOf(keysAndValues, keysAndValues.length); - return result; - } - public MapCursor getEntries() { return new MapCursor(); } @@ -291,7 +290,7 @@ public boolean advance() { } public DictKey getKey() { - return new DictKey(ObjectHashMap.this.getKey(index), hashes[index]); + return new DictKey(ObjectHashMap.this.getKey(index), ObjectHashMap.this.getHash(index)); } public Object getValue() { @@ -299,15 +298,146 @@ public Object getValue() { } } - private static int getBucketsCount(int[] indices) { - return indices.length; + private int getEntryCapacity() { + return keysAndValues.length >> 1; + } + + private int getBucketsCount() { + return getBucketsCount(metadata, getEntryCapacity()); + } + + private static byte[] createMetadata(int bucketsCount, int usableSize) { + return new byte[getMetadataLength(bucketsCount, usableSize)]; + } + + private static int getBucketsCount(byte[] metadata, int entryCapacity) { + return (metadata.length - getIndicesOffset(entryCapacity)) / getIndexByteSize(entryCapacity); + } + + private static int getIndexByteSize(int entryCapacity) { + if (entryCapacity - 1 <= MAX_BYTE_INDEX) { + return Byte.BYTES; + } else if (entryCapacity - 1 <= MAX_SHORT_INDEX) { + return Short.BYTES; + } else { + return Integer.BYTES; + } + } + + private static int getIndicesOffset(int entryCapacity) { + return castMetadataInt(getIndicesOffsetLong(entryCapacity)); + } + + private static int getMetadataLength(int bucketsCount, int entryCapacity) { + return castMetadataInt(getMetadataLengthLong(bucketsCount, entryCapacity)); + } + + private static int getHashOffset(int index) { + return castMetadataInt((long) index * Long.BYTES); + } + + private static int getIndexOffset(int entryCapacity, int compactIndex) { + return castMetadataInt((long) getIndicesOffset(entryCapacity) + ((long) compactIndex * getIndexByteSize(entryCapacity))); + } + + private static int getIndexOffset(int indicesOffset, int indexByteSize, int compactIndex) { + return indicesOffset + compactIndex * indexByteSize; + } + + private static long getIndicesOffsetLong(int entryCapacity) { + return (long) entryCapacity * Long.BYTES; + } + + private static long getMetadataLengthLong(int bucketsCount, int entryCapacity) { + return getIndicesOffsetLong(entryCapacity) + ((long) bucketsCount * getIndexByteSize(entryCapacity)); + } + + private static void ensureArraySizesFit(int bucketsCount, int entryCapacity) { + long metadataLength = getMetadataLengthLong(bucketsCount, entryCapacity); + if (metadataLength > Integer.MAX_VALUE || ((long) entryCapacity << 1) > Integer.MAX_VALUE) { + CompilerDirectives.transferToInterpreterAndInvalidate(); + throw new OutOfMemoryError(); + } + } + + private static int castMetadataInt(long value) { + assert value >= 0 && value <= Integer.MAX_VALUE; + return (int) value; + } + + private static int getPhysicalCollisionMaskForIndexByteSize(int indexByteSize) { + if (indexByteSize == Byte.BYTES) { + return BYTE_COLLISION_MASK; + } else if (indexByteSize == Short.BYTES) { + return SHORT_COLLISION_MASK; + } else { + return COLLISION_MASK; + } + } + + private static int getPhysicalCollisionMask(int entryCapacity) { + return getPhysicalCollisionMaskForIndexByteSize(getIndexByteSize(entryCapacity)); + } + + private static int getIndex(byte[] metadata, int entryCapacity, int compactIndex) { + return getIndex(metadata, getIndicesOffset(entryCapacity), getIndexByteSize(entryCapacity), compactIndex); + } + + private static int getIndex(byte[] metadata, int indicesOffset, int indexByteSize, int compactIndex) { + int offset = getIndexOffset(indicesOffset, indexByteSize, compactIndex); + if (indexByteSize == Byte.BYTES) { + return decodeIndex(metadata[offset] & 0xFF, BYTE_COLLISION_MASK); + } else if (indexByteSize == Short.BYTES) { + return decodeIndex(PythonUtils.ARRAY_ACCESSOR.getShort(metadata, offset) & 0xFFFF, SHORT_COLLISION_MASK); + } else { + return decodeIndex(PythonUtils.ARRAY_ACCESSOR.getInt(metadata, offset), COLLISION_MASK); + } + } + + private static int decodeIndex(int encodedValue, int physicalCollisionMask) { + int value = encodedValue & (physicalCollisionMask - 1); + if ((encodedValue & physicalCollisionMask) != 0) { + value |= COLLISION_MASK; + } + return value; + } + + private static void setIndex(byte[] metadata, int entryCapacity, int compactIndex, int logicalValue) { + setIndex(metadata, getIndicesOffset(entryCapacity), getIndexByteSize(entryCapacity), getPhysicalCollisionMask(entryCapacity), compactIndex, logicalValue); + } + + private static void setIndex(byte[] metadata, int indicesOffset, int indexByteSize, int physicalCollisionMask, int compactIndex, int logicalValue) { + int encodedValue = logicalValue & ~COLLISION_MASK; + if ((logicalValue & COLLISION_MASK) != 0) { + encodedValue |= physicalCollisionMask; + } + int offset = getIndexOffset(indicesOffset, indexByteSize, compactIndex); + if (indexByteSize == Byte.BYTES) { + metadata[offset] = (byte) encodedValue; + } else if (indexByteSize == Short.BYTES) { + PythonUtils.ARRAY_ACCESSOR.putShort(metadata, offset, (short) encodedValue); + } else { + PythonUtils.ARRAY_ACCESSOR.putInt(metadata, offset, encodedValue); + } + } + + private static long getHash(byte[] metadata, int index) { + return PythonUtils.ARRAY_ACCESSOR.getLong(metadata, getHashOffset(index)); + } + + private static void setHash(byte[] metadata, int index, long hash) { + PythonUtils.ARRAY_ACCESSOR.putLong(metadata, getHashOffset(index), hash); } - private boolean needsResize(int[] localIndices) { - // when the hash table is 3/4 full, we resize on insertion - int bucketsCount = getBucketsCount(localIndices); - int bucketsCntQuarter = Math.max(1, bucketsCount >> 2); - return usedIndices + bucketsCntQuarter > bucketsCount; + long getHash(int index) { + return getHash(metadata, index); + } + + private boolean needsResize(byte[] localMetadata) { + // Keep one slot empty at all times. For the smallest table, that means resizing once 2 of + // the 4 buckets are already in use instead of allowing the table to become completely full. + int bucketsCount = getBucketsCount(localMetadata, getEntryCapacity()); + return usedHashes >= getEntryCapacity() || usedIndices >= getUsableSize(bucketsCount); } public int size() { @@ -328,7 +458,7 @@ public static Object[] doPopWithRestart(Node inliningTarget, ObjectHashMap map, @Cached InlinedBranchProfile lookupRestart) { while (true) { try { - return doPop(inliningTarget, map, map.indices, emptyMapProfile, hasValueProfile, hasCollisionProfile); + return doPop(inliningTarget, map, map.metadata, emptyMapProfile, hasValueProfile, hasCollisionProfile); } catch (RestartLookupException ignore) { lookupRestart.enter(inliningTarget); } @@ -339,7 +469,7 @@ private static boolean isIndex(int indexInIndices, int indexToFind) { return indexInIndices != DUMMY_INDEX && indexInIndices != EMPTY_INDEX && indexToFind == unwrapIndex(indexInIndices); } - private static Object[] doPop(Node inliningTarget, ObjectHashMap map, int[] indices, + private static Object[] doPop(Node inliningTarget, ObjectHashMap map, byte[] metadata, @Cached InlinedConditionProfile emptyMapProfile, @Cached InlinedCountingConditionProfile hasValueProfile, @Cached InlinedCountingConditionProfile hasCollisionProfile) throws RestartLookupException { @@ -347,9 +477,14 @@ private static Object[] doPop(Node inliningTarget, ObjectHashMap map, int[] indi return null; } Object[] localKeysAndValues = map.keysAndValues; + int entryCapacity = map.getEntryCapacity(); + int indicesLen = getBucketsCount(metadata, entryCapacity); + int indexByteSize = getIndexByteSize(entryCapacity); + int indicesOffset = getIndicesOffset(entryCapacity); + int physicalCollisionMask = getPhysicalCollisionMaskForIndexByteSize(indexByteSize); int usedHashes = map.usedHashes; for (int i = usedHashes - 1; i >= 0; i--) { - if (indices != map.indices) { + if (metadata != map.metadata) { // restart, can happen after Truffle safepoint on backedge throw RestartLookupException.INSTANCE; } @@ -358,13 +493,13 @@ private static Object[] doPop(Node inliningTarget, ObjectHashMap map, int[] indi // We can remove the item from the compact arrays var result = new Object[]{map.getKey(i), value}; // We need to find the slot in the sparse indices array - long hash = map.hashes[i]; - int compactIndex = getIndex(indices.length, hash); - int index = indices[compactIndex]; + long hash = map.getHash(i); + int compactIndex = getIndex(indicesLen, hash); + int index = getIndex(metadata, indicesOffset, indexByteSize, compactIndex); if (hasCollisionProfile.profile(inliningTarget, isIndex(index, i))) { - indices[compactIndex] = DUMMY_INDEX; + setIndex(metadata, indicesOffset, indexByteSize, physicalCollisionMask, compactIndex, DUMMY_INDEX); } else { - removeBucketWithIndex(map, indices, hash, compactIndex, i); + removeBucketWithIndex(map, metadata, indicesOffset, indexByteSize, physicalCollisionMask, indicesLen, hash, compactIndex, i); } // Only remove the slot now, removeBucketWithIndex can restart the search map.setValue(i, null); @@ -376,20 +511,23 @@ private static Object[] doPop(Node inliningTarget, ObjectHashMap map, int[] indi throw CompilerDirectives.shouldNotReachHere(); } - private static void removeBucketWithIndex(ObjectHashMap map, int[] indices, long hash, int initialCompactIndex, int indexToFind) throws RestartLookupException { - int searchLimit = getBucketsCount(map.indices) + PERTURB_SHIFTS_COUT; + private static void removeBucketWithIndex(ObjectHashMap map, byte[] metadata, int indicesOffset, int indexByteSize, int physicalCollisionMask, int indicesLen, long hash, + int initialCompactIndex, + int indexToFind) + throws RestartLookupException { + int searchLimit = indicesLen + PERTURB_SHIFTS_COUT; long perturb = hash; int compactIndex = initialCompactIndex; for (int i = 0; i < searchLimit; i++) { - if (indices != map.indices) { + if (metadata != map.metadata) { // guards against things happening in the safepoint on the backedge throw RestartLookupException.INSTANCE; } perturb >>>= PERTURB_SHIFT; - compactIndex = nextIndex(indices.length, compactIndex, perturb); - int index = indices[compactIndex]; + compactIndex = nextIndex(indicesLen, compactIndex, perturb); + int index = getIndex(metadata, indicesOffset, indexByteSize, compactIndex); if (isIndex(index, indexToFind)) { - indices[compactIndex] = DUMMY_INDEX; + setIndex(metadata, indicesOffset, indexByteSize, physicalCollisionMask, compactIndex, DUMMY_INDEX); return; } } @@ -433,56 +571,60 @@ static Object doGet(Frame frame, ObjectHashMap map, Object key, long keyHash, InlinedCountingConditionProfile collisionFoundEqKey, PyObjectRichCompareBool eqNode) throws RestartLookupException { assert map.checkInternalState(); - int[] indices = map.indices; - int indicesLen = indices.length; + byte[] metadata = map.metadata; + int entryCapacity = map.getEntryCapacity(); + int indicesLen = getBucketsCount(metadata, entryCapacity); + int indexByteSize = getIndexByteSize(entryCapacity); + int indicesOffset = getIndicesOffset(entryCapacity); int compactIndex = getIndex(indicesLen, keyHash); - int index = indices[compactIndex]; + int index = getIndex(metadata, indicesOffset, indexByteSize, compactIndex); if (foundNullKey.profile(inliningTarget, index == EMPTY_INDEX)) { return null; } if (foundSameHashKey.profile(inliningTarget, index != DUMMY_INDEX)) { int unwrappedIndex = unwrapIndex(index); - if (foundEqKey.profile(inliningTarget, map.keysEqual(indices, frame, inliningTarget, unwrappedIndex, key, keyHash, eqNode))) { + if (foundEqKey.profile(inliningTarget, map.keysEqual(metadata, frame, inliningTarget, unwrappedIndex, key, keyHash, eqNode))) { return map.getValue(unwrappedIndex); - } else if (!isCollision(indices[compactIndex])) { - // ^ note: we need to re-read indices[compactIndex], + } else if (!isCollision(getIndex(metadata, indicesOffset, indexByteSize, compactIndex))) { + // ^ note: we need to re-read the bucket, // it may have been changed during __eq__ return null; } } - return getCollision(frame, map, key, keyHash, inliningTarget, collisionFoundNoValue, collisionFoundEqKey, eqNode, indices, indicesLen, compactIndex); + return getCollision(frame, map, key, keyHash, inliningTarget, collisionFoundNoValue, collisionFoundEqKey, eqNode, metadata, indicesOffset, indexByteSize, indicesLen, + compactIndex); } @InliningCutoff private static Object getCollision(Frame frame, ObjectHashMap map, Object key, long keyHash, Node inliningTarget, InlinedCountingConditionProfile collisionFoundNoValue, InlinedCountingConditionProfile collisionFoundEqKey, - PyObjectRichCompareBool eqNode, int[] indices, int indicesLen, int compactIndex) throws RestartLookupException { + PyObjectRichCompareBool eqNode, byte[] metadata, int indicesOffset, int indexByteSize, int indicesLen, int compactIndex) throws RestartLookupException { int index; // collision: intentionally counted loop long perturb = keyHash; - int searchLimit = getBucketsCount(indices) + PERTURB_SHIFTS_COUT; + int searchLimit = indicesLen + PERTURB_SHIFTS_COUT; int i = 0; try { for (; i < searchLimit; i++) { - if (indices != map.indices) { + if (metadata != map.metadata) { // guards against things happening in the safepoint on the backedge throw RestartLookupException.INSTANCE; } perturb >>>= PERTURB_SHIFT; compactIndex = nextIndex(indicesLen, compactIndex, perturb); - index = map.indices[compactIndex]; + index = getIndex(metadata, indicesOffset, indexByteSize, compactIndex); if (collisionFoundNoValue.profile(inliningTarget, index == EMPTY_INDEX)) { return null; } if (index != DUMMY_INDEX) { int unwrappedIndex = unwrapIndex(index); - if (collisionFoundEqKey.profile(inliningTarget, map.keysEqual(indices, frame, inliningTarget, unwrappedIndex, key, keyHash, eqNode))) { + if (collisionFoundEqKey.profile(inliningTarget, map.keysEqual(metadata, frame, inliningTarget, unwrappedIndex, key, keyHash, eqNode))) { return map.getValue(unwrappedIndex); - } else if (!isCollision(indices[compactIndex])) { - // ^ note: we need to re-read indices[compactIndex], + } else if (!isCollision(getIndex(metadata, indicesOffset, indexByteSize, compactIndex))) { + // ^ note: we need to re-read the bucket, // it may have been changed during __eq__ return null; } @@ -555,54 +697,60 @@ static void doPut(Frame frame, ObjectHashMap map, Object key, long keyHash, Obje InlinedBranchProfile rehash2Profile, PyObjectRichCompareBool eqNode) throws RestartLookupException { assert map.checkInternalState(); - int[] indices = map.indices; - int indicesLen = indices.length; + byte[] metadata = map.metadata; + int entryCapacity = map.getEntryCapacity(); + int indicesLen = getBucketsCount(metadata, entryCapacity); + int indexByteSize = getIndexByteSize(entryCapacity); + int indicesOffset = getIndicesOffset(entryCapacity); + int physicalCollisionMask = getPhysicalCollisionMaskForIndexByteSize(indexByteSize); int compactIndex = getIndex(indicesLen, keyHash); - int index = indices[compactIndex]; + int index = getIndex(metadata, indicesOffset, indexByteSize, compactIndex); if (foundNullKey.profile(inliningTarget, index == EMPTY_INDEX)) { - map.putInNewSlot(indices, inliningTarget, rehash1Profile, key, keyHash, value, compactIndex); + map.putInNewSlot(metadata, entryCapacity, inliningTarget, rehash1Profile, key, keyHash, value, compactIndex); return; } - if (foundEqKey.profile(inliningTarget, index != DUMMY_INDEX && map.keysEqual(indices, frame, inliningTarget, unwrapIndex(index), key, keyHash, eqNode))) { + if (foundEqKey.profile(inliningTarget, index != DUMMY_INDEX && map.keysEqual(metadata, frame, inliningTarget, unwrapIndex(index), key, keyHash, eqNode))) { // we found the key, override the value, Python does not override the key though map.setValue(unwrapIndex(index), value); return; } - putCollision(frame, map, key, keyHash, value, inliningTarget, collisionFoundNoValue, collisionFoundEqKey, rehash2Profile, eqNode, indices, indicesLen, compactIndex); + putCollision(frame, map, key, keyHash, value, inliningTarget, collisionFoundNoValue, collisionFoundEqKey, rehash2Profile, eqNode, metadata, indicesOffset, indexByteSize, + physicalCollisionMask, entryCapacity, indicesLen, compactIndex); } @InliningCutoff private static void putCollision(Frame frame, ObjectHashMap map, Object key, long keyHash, Object value, Node inliningTarget, InlinedCountingConditionProfile collisionFoundNoValue, InlinedCountingConditionProfile collisionFoundEqKey, InlinedBranchProfile rehash2Profile, PyObjectRichCompareBool eqNode, - int[] indices, int indicesLen, int compactIndex) throws RestartLookupException { - markCollision(indices, compactIndex); + byte[] metadata, int indicesOffset, int indexByteSize, int physicalCollisionMask, int entryCapacity, int indicesLen, int compactIndex) + throws RestartLookupException { + markCollision(metadata, indicesOffset, indexByteSize, physicalCollisionMask, compactIndex); long perturb = keyHash; - int searchLimit = getBucketsCount(indices) + PERTURB_SHIFTS_COUT; + int searchLimit = indicesLen + PERTURB_SHIFTS_COUT; int i = 0; try { for (; i < searchLimit; i++) { - if (indices != map.indices) { + if (metadata != map.metadata) { // guards against things happening in the safepoint on the backedge throw RestartLookupException.INSTANCE; } perturb >>>= PERTURB_SHIFT; compactIndex = nextIndex(indicesLen, compactIndex, perturb); - int index = indices[compactIndex]; + int index = getIndex(metadata, indicesOffset, indexByteSize, compactIndex); if (collisionFoundNoValue.profile(inliningTarget, index == EMPTY_INDEX)) { - map.putInNewSlot(indices, inliningTarget, rehash2Profile, key, keyHash, value, compactIndex); + map.putInNewSlot(metadata, entryCapacity, inliningTarget, rehash2Profile, key, keyHash, value, compactIndex); return; } - if (collisionFoundEqKey.profile(inliningTarget, index != DUMMY_INDEX && map.keysEqual(indices, frame, inliningTarget, unwrapIndex(index), key, keyHash, eqNode))) { + if (collisionFoundEqKey.profile(inliningTarget, index != DUMMY_INDEX && map.keysEqual(metadata, frame, inliningTarget, unwrapIndex(index), key, keyHash, eqNode))) { // we found the key, override the value, Python does not override the key // though map.setValue(unwrapIndex(index), value); return; } - markCollision(indices, compactIndex); + markCollision(metadata, indicesOffset, indexByteSize, physicalCollisionMask, compactIndex); TruffleSafepoint.poll(inliningTarget); } } finally { @@ -618,28 +766,33 @@ private static void putCollision(Frame frame, ObjectHashMap map, Object key, lon // Internal helper: it is not profiling, never rehashes, and it assumes that the hash map never // contains the key that we are inserting - private void insertNewKey(int[] localIndices, Object key, long keyHash, Object value) { - assert localIndices == this.indices; - int compactIndex = getIndex(localIndices.length, keyHash); - int index = localIndices[compactIndex]; + private void insertNewKey(byte[] localMetadata, Object key, long keyHash, Object value) { + assert localMetadata == this.metadata; + int entryCapacity = getEntryCapacity(); + int indicesLen = getBucketsCount(localMetadata, entryCapacity); + int indexByteSize = getIndexByteSize(entryCapacity); + int indicesOffset = getIndicesOffset(entryCapacity); + int physicalCollisionMask = getPhysicalCollisionMaskForIndexByteSize(indexByteSize); + int compactIndex = getIndex(indicesLen, keyHash); + int index = getIndex(localMetadata, indicesOffset, indexByteSize, compactIndex); if (index == EMPTY_INDEX) { - putInNewSlot(localIndices, key, keyHash, value, compactIndex); + putInNewSlot(localMetadata, entryCapacity, key, keyHash, value, compactIndex); return; } // collision - markCollision(localIndices, compactIndex); + markCollision(localMetadata, indicesOffset, indexByteSize, physicalCollisionMask, compactIndex); long perturb = keyHash; - int searchLimit = getBucketsCount(localIndices) + PERTURB_SHIFTS_COUT; + int searchLimit = indicesLen + PERTURB_SHIFTS_COUT; for (int i = 0; i < searchLimit; i++) { perturb >>>= PERTURB_SHIFT; - compactIndex = nextIndex(localIndices.length, compactIndex, perturb); - index = localIndices[compactIndex]; + compactIndex = nextIndex(indicesLen, compactIndex, perturb); + index = getIndex(localMetadata, indicesOffset, indexByteSize, compactIndex); if (index == EMPTY_INDEX) { - putInNewSlot(localIndices, key, keyHash, value, compactIndex); + putInNewSlot(localMetadata, entryCapacity, key, keyHash, value, compactIndex); return; } - markCollision(localIndices, compactIndex); + markCollision(localMetadata, indicesOffset, indexByteSize, physicalCollisionMask, compactIndex); } // all values are dummies? Not possible, since we should have compacted the // hashes/keysAndValues arrays in "remove". Also, there must be an unused slot available, @@ -647,29 +800,30 @@ private void insertNewKey(int[] localIndices, Object key, long keyHash, Object v throw CompilerDirectives.shouldNotReachHere(); } - private void putInNewSlot(int[] localIndices, Node inliningTarget, InlinedBranchProfile rehashProfile, Object key, long keyHash, Object value, int compactIndex) { - assert indices == localIndices; - if (CompilerDirectives.injectBranchProbability(SLOWPATH_PROBABILITY, needsResize(localIndices))) { + private void putInNewSlot(byte[] localMetadata, int entryCapacity, Node inliningTarget, InlinedBranchProfile rehashProfile, Object key, long keyHash, Object value, int compactIndex) { + assert metadata == localMetadata; + assert entryCapacity == getEntryCapacity(); + if (CompilerDirectives.injectBranchProbability(SLOWPATH_PROBABILITY, needsResize(localMetadata))) { rehashProfile.enter(inliningTarget); rehashAndPut(key, keyHash, value); return; } - putInNewSlot(localIndices, key, keyHash, value, compactIndex); + putInNewSlot(localMetadata, entryCapacity, key, keyHash, value, compactIndex); } - private void putInNewSlot(int[] localIndices, Object key, long keyHash, Object value, int compactIndex) { + private void putInNewSlot(byte[] localMetadata, int entryCapacity, Object key, long keyHash, Object value, int compactIndex) { size++; usedIndices++; int newIndex = usedHashes++; - localIndices[compactIndex] = newIndex; + setIndex(localMetadata, entryCapacity, compactIndex, newIndex + INDEX_OFFSET); setValue(newIndex, value); setKey(newIndex, key); - hashes[newIndex] = keyHash; + setHash(localMetadata, newIndex, keyHash); } private boolean needsCompaction() { // if more than quarter of all the slots are occupied by dummy values -> compact - int quarterOfUsable = hashes.length >> 2; + int quarterOfUsable = getEntryCapacity() >> 2; int dummyCnt = usedHashes - size; return dummyCnt > quarterOfUsable; } @@ -718,21 +872,25 @@ static Object doRemove(Frame frame, Node inliningTarget, ObjectHashMap map, Obje compactProfile.enter(inliningTarget); map.compact(); } - int[] indices = map.indices; - int indicesLen = indices.length; + byte[] metadata = map.metadata; + int entryCapacity = map.getEntryCapacity(); + int indicesLen = getBucketsCount(metadata, entryCapacity); + int indexByteSize = getIndexByteSize(entryCapacity); + int indicesOffset = getIndicesOffset(entryCapacity); + int physicalCollisionMask = getPhysicalCollisionMaskForIndexByteSize(indexByteSize); // Note: CPython is not shrinking the capacity of the hash table on delete, we do the // same int compactIndex = getIndex(indicesLen, keyHash); - int index = indices[compactIndex]; + int index = getIndex(metadata, indicesOffset, indexByteSize, compactIndex); if (foundNullKey.profile(inliningTarget, index == EMPTY_INDEX)) { return null; // not found } int unwrappedIndex = unwrapIndex(index); - if (foundEqKey.profile(inliningTarget, index != DUMMY_INDEX && map.keysEqual(indices, frame, inliningTarget, unwrappedIndex, key, keyHash, eqNode))) { + if (foundEqKey.profile(inliningTarget, index != DUMMY_INDEX && map.keysEqual(metadata, frame, inliningTarget, unwrappedIndex, key, keyHash, eqNode))) { Object result = map.getValue(unwrappedIndex); - indices[compactIndex] = DUMMY_INDEX; + setIndex(metadata, indicesOffset, indexByteSize, physicalCollisionMask, compactIndex, DUMMY_INDEX); map.setValue(unwrappedIndex, null); map.setKey(unwrappedIndex, null); map.size--; @@ -740,33 +898,35 @@ static Object doRemove(Frame frame, Node inliningTarget, ObjectHashMap map, Obje } // collision: intentionally counted loop - return removeCollision(frame, inliningTarget, map, key, keyHash, collisionFoundNoValue, collisionFoundEqKey, eqNode, indices, indicesLen, compactIndex); + return removeCollision(frame, inliningTarget, map, key, keyHash, collisionFoundNoValue, collisionFoundEqKey, eqNode, metadata, indicesOffset, indexByteSize, + physicalCollisionMask, indicesLen, compactIndex); } @InliningCutoff private static Object removeCollision(Frame frame, Node inliningTarget, ObjectHashMap map, Object key, long keyHash, InlinedCountingConditionProfile collisionFoundNoValue, InlinedCountingConditionProfile collisionFoundEqKey, - PyObjectRichCompareBool eqNode, int[] indices, int indicesLen, int compactIndex) throws RestartLookupException { + PyObjectRichCompareBool eqNode, byte[] metadata, int indicesOffset, int indexByteSize, int physicalCollisionMask, int indicesLen, int compactIndex) + throws RestartLookupException { int unwrappedIndex; long perturb = keyHash; - int searchLimit = getBucketsCount(indices) + PERTURB_SHIFTS_COUT; + int searchLimit = indicesLen + PERTURB_SHIFTS_COUT; int i = 0; try { for (; i < searchLimit; i++) { - if (indices != map.indices) { + if (metadata != map.metadata) { // guards against things happening in the safepoint on the backedge throw RestartLookupException.INSTANCE; } perturb >>>= PERTURB_SHIFT; compactIndex = nextIndex(indicesLen, compactIndex, perturb); - int index = indices[compactIndex]; + int index = getIndex(metadata, indicesOffset, indexByteSize, compactIndex); if (collisionFoundNoValue.profile(inliningTarget, index == EMPTY_INDEX)) { return null; } unwrappedIndex = unwrapIndex(index); - if (collisionFoundEqKey.profile(inliningTarget, index != DUMMY_INDEX && map.keysEqual(indices, frame, inliningTarget, unwrappedIndex, key, keyHash, eqNode))) { + if (collisionFoundEqKey.profile(inliningTarget, index != DUMMY_INDEX && map.keysEqual(metadata, frame, inliningTarget, unwrappedIndex, key, keyHash, eqNode))) { Object result = map.getValue(unwrappedIndex); - indices[compactIndex] = DUMMY_INDEX; + setIndex(metadata, indicesOffset, indexByteSize, physicalCollisionMask, compactIndex, DUMMY_INDEX); map.setValue(unwrappedIndex, null); map.setKey(unwrappedIndex, null); map.size--; @@ -797,9 +957,9 @@ public Throwable fillInStackTrace() { } } - private boolean keysEqual(int[] originalIndices, Frame frame, Node inliningTarget, int index, Object key, long keyHash, + private boolean keysEqual(byte[] originalMetadata, Frame frame, Node inliningTarget, int index, Object key, long keyHash, PyObjectRichCompareBool eqNode) throws RestartLookupException { - if (hashes[index] != keyHash) { + if (getHash(index) != keyHash) { return false; } Object originalKey = getKey(index); @@ -807,7 +967,7 @@ private boolean keysEqual(int[] originalIndices, Frame frame, Node inliningTarge return true; } boolean result = eqNode.executeEq(frame, inliningTarget, originalKey, key); - if (indices != originalIndices || getKey(index) != originalKey) { + if (metadata != originalMetadata || getKey(index) != originalKey) { // Either someone overridden the slot we are just examining, or rehasing reallocated the // indices array. We need to restart the lookup. Other situations are OK: // @@ -832,42 +992,39 @@ private boolean keysEqual(int[] originalIndices, Frame frame, Node inliningTarge */ @TruffleBoundary private void rehashAndPut(Object newKey, long newKeyHash, Object newValue) { - int requiredIndicesSize = usedHashes * GROWTH_RATE; - // We need the hash table of this size, in order to accommodate "requiredIndicesSize" items - int indicesCapacity = requiredIndicesSize + (requiredIndicesSize / 3); - if (indicesCapacity < INITIAL_INDICES_SIZE) { - indicesCapacity = INITIAL_INDICES_SIZE; - } else { - indicesCapacity = getNextPow2(indicesCapacity); - if (indicesCapacity << 1 < 0) { - // some arrays we allocate are 2 times the size - throw new OutOfMemoryError(); - } - } - long[] oldHashes = hashes; + int newSize = size + 1; + int indicesCapacity = getMinBucketsCount(newSize); + byte[] oldMetadata = metadata; Object[] oldKeysAndValues = keysAndValues; int oldUsedSize = usedHashes; int oldSize = size; - allocateData(indicesCapacity); + allocateData(indicesCapacity, getRequestedEntryCapacity(newSize, indicesCapacity)); size = 0; usedHashes = 0; usedIndices = 0; - int[] localIndices = this.indices; + byte[] localMetadata = this.metadata; for (int i = 0; i < oldUsedSize; i++) { if (getValue(i, oldKeysAndValues) != null) { final Object key = getKey(i, oldKeysAndValues); - insertNewKey(localIndices, key, oldHashes[i], getValue(i, oldKeysAndValues)); + insertNewKey(localMetadata, key, getHash(oldMetadata, i), getValue(i, oldKeysAndValues)); } } assert size == oldSize : String.format("size=%d, oldSize=%d, oldUsedSize=%d, usedHashes=%d, usedIndices=%d", size, oldSize, oldUsedSize, usedHashes, usedIndices); - insertNewKey(localIndices, newKey, newKeyHash, newValue); + insertNewKey(localMetadata, newKey, newKeyHash, newValue); + } + + private static int getRequestedEntryCapacity(int requestedCapacity, int bucketsCount) { + if (requestedCapacity <= TIGHT_ENTRY_CAPACITY_LIMIT) { + return requestedCapacity; + } + return getUsableSize(bucketsCount); } @TruffleBoundary private void compact() { // shuffle[X] will tell us by how much value X found in 'indices' should be shuffled to left - int[] shuffle = new int[hashes.length]; + int[] shuffle = new int[getEntryCapacity()]; int currentShuffle = 0; int dummyCount = 0; for (int i = 0; i < usedHashes; i++) { @@ -882,21 +1039,23 @@ private void compact() { setKey(i - currentShuffle, getKey(i)); setValue(i, null); setKey(i, null); - hashes[i - currentShuffle] = hashes[i]; + setHash(metadata, i - currentShuffle, getHash(i)); shuffle[i] = currentShuffle; } } usedHashes -= dummyCount; // We've "removed" the dummy entries - int[] localIndices = indices; - for (int i = 0; i < localIndices.length; i++) { - int index = localIndices[i]; + byte[] localMetadata = metadata; + int entryCapacity = getEntryCapacity(); + int localIndicesLength = getBucketsCount(localMetadata, entryCapacity); + for (int i = 0; i < localIndicesLength; i++) { + int index = getIndex(localMetadata, entryCapacity, i); if (index != EMPTY_INDEX && index != DUMMY_INDEX) { boolean collision = isCollision(index); int unwrapped = unwrapIndex(index); int newIndex = unwrapped - shuffle[unwrapped]; - localIndices[i] = newIndex; + setIndex(localMetadata, entryCapacity, i, newIndex + INDEX_OFFSET); if (collision) { - markCollision(localIndices, i); + markCollision(localMetadata, entryCapacity, i); } } else if (index == DUMMY_INDEX) { dummyCount--; @@ -916,6 +1075,24 @@ private static int getIndex(int indicesLen, long hash) { return (int) (hash & (indicesLen - 1)); } + private static int getUsableSize(int bucketsCount) { + int minFreeBuckets = Math.max(2, bucketsCount >> 2); + return bucketsCount - minFreeBuckets + 1; + } + + private static int getMinBucketsCount(int requiredEntries) { + int bucketsCount = INITIAL_INDICES_SIZE; + while (getUsableSize(bucketsCount) < requiredEntries) { + if (bucketsCount > Integer.MAX_VALUE >> 1) { + // The backing arrays cannot grow past Java array indexing limits. The exact packed + // metadata bound is checked in allocateData. + throw new OutOfMemoryError(); + } + bucketsCount <<= 1; + } + return bucketsCount; + } + public static Object getKey(int index, Object[] keysAndValues) { return keysAndValues[index << 1]; } @@ -943,17 +1120,10 @@ public void setKey(int index, Object key) { private boolean checkInternalState() { // We must have at least one empty slot, collision resolution relies on the fact that it is // always going to find an empty slot - assert usedIndices < indices.length : usedIndices; + assert usedIndices < getBucketsCount() : usedIndices; return true; } - private static int getNextPow2(int n) { - if (isPow2(n)) { - return n; - } - return 1 << (Integer.SIZE - Integer.numberOfLeadingZeros(n)); - } - private static boolean isPow2(int n) { return Integer.bitCount(n) == 1; } diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/bytecode/PBytecodeRootNode.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/bytecode/PBytecodeRootNode.java index fef50c8cca..5159496d63 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/bytecode/PBytecodeRootNode.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/bytecode/PBytecodeRootNode.java @@ -5857,10 +5857,10 @@ static void doIt(VirtualFrame frame, ObjectHashMap map, Object key, Object value } @ExplodeLoop - private static ObjectHashMap moveFromStackToSetHashMap(VirtualFrame virtualFrame, int start, int stop, ObjHashMapPutNode putNode) { + private static EconomicMapStorage moveFromStackToSetHashMap(VirtualFrame virtualFrame, int start, int stop, ObjHashMapPutNode putNode) { CompilerAsserts.partialEvaluationConstant(start); CompilerAsserts.partialEvaluationConstant(stop); - var result = new ObjectHashMap(stop - start); + EconomicMapStorage result = EconomicMapStorage.create(stop - start); for (int i = start; i < stop; i++) { putNode.execute(virtualFrame, result, virtualFrame.getObject(i), PNone.NONE); virtualFrame.clear(i); @@ -5869,10 +5869,10 @@ private static ObjectHashMap moveFromStackToSetHashMap(VirtualFrame virtualFrame } @ExplodeLoop - private static ObjectHashMap moveFromStackToDictHashMap(VirtualFrame virtualFrame, int start, int stop, ObjHashMapPutNode putNode) { + private static EconomicMapStorage moveFromStackToDictHashMap(VirtualFrame virtualFrame, int start, int stop, ObjHashMapPutNode putNode) { CompilerAsserts.partialEvaluationConstant(start); CompilerAsserts.partialEvaluationConstant(stop); - var result = new ObjectHashMap((stop - start) / 2); + EconomicMapStorage result = EconomicMapStorage.create((stop - start) / 2); for (int i = start; i + 1 < stop; i += 2) { putNode.execute(virtualFrame, result, virtualFrame.getObject(i), virtualFrame.getObject(i + 1)); virtualFrame.clear(i); @@ -5901,16 +5901,16 @@ private int bytecodeCollectionFromStack(VirtualFrame virtualFrame, int type, int case CollectionBits.KIND_SET: { ObjHashMapPutNode putNode = insertChildNode(localNodes, nodeIndex, UNCACHED_OBJ_HASHMAP_PUT, ObjHashMapPutNodeGen.class, NODE_OBJ_HASHMAP_PUT, useCachedNodes); - ObjectHashMap storage = moveFromStackToSetHashMap(virtualFrame, stackTop - count + 1, stackTop + 1, putNode); - res = PFactory.createSet(getLanguage(), new EconomicMapStorage(storage, false)); + EconomicMapStorage storage = moveFromStackToSetHashMap(virtualFrame, stackTop - count + 1, stackTop + 1, putNode); + res = PFactory.createSet(getLanguage(), storage); break; } case CollectionBits.KIND_DICT: { ObjHashMapPutNode putNode = insertChildNode(localNodes, nodeIndex, UNCACHED_OBJ_HASHMAP_PUT, ObjHashMapPutNodeGen.class, NODE_OBJ_HASHMAP_PUT, useCachedNodes); assert count % 2 == 0; - ObjectHashMap storage = moveFromStackToDictHashMap(virtualFrame, stackTop - count + 1, stackTop + 1, putNode); - res = PFactory.createDict(getLanguage(), new EconomicMapStorage(storage, false)); + EconomicMapStorage storage = moveFromStackToDictHashMap(virtualFrame, stackTop - count + 1, stackTop + 1, putNode); + res = PFactory.createDict(getLanguage(), storage); break; } case CollectionBits.KIND_KWORDS: { diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/bytecode_dsl/MakeSetStorageNode.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/bytecode_dsl/MakeSetStorageNode.java index a773a0b633..471c7bb672 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/bytecode_dsl/MakeSetStorageNode.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/bytecode_dsl/MakeSetStorageNode.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2025, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2025, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * The Universal Permissive License (UPL), Version 1.0 @@ -42,7 +42,6 @@ import com.oracle.graal.python.builtins.objects.PNone; import com.oracle.graal.python.builtins.objects.common.EconomicMapStorage; -import com.oracle.graal.python.builtins.objects.common.ObjectHashMap; import com.oracle.graal.python.builtins.objects.common.ObjectHashMap.PutNode; import com.oracle.graal.python.lib.PyObjectHashNode; import com.oracle.truffle.api.dsl.Cached; @@ -66,12 +65,12 @@ public static EconomicMapStorage doNonEmpty(VirtualFrame frame, Node inliningTar @Cached PyObjectHashNode hashNode, @Cached PutNode putNode) { int profiledLen = lengthProfile.profile(inliningTarget, elements.length); - ObjectHashMap map = new ObjectHashMap(profiledLen); + EconomicMapStorage storage = EconomicMapStorage.create(profiledLen); for (int i = 0; i < profiledLen; i++) { Object key = elements[i]; long keyHash = hashNode.execute(frame, inliningTarget, key); - putNode.put(frame, inliningTarget, map, key, keyHash, PNone.NONE); + putNode.put(frame, inliningTarget, storage, key, keyHash, PNone.NONE); } - return new EconomicMapStorage(map, false); + return storage; } } diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/bytecode_dsl/PBytecodeDSLRootNode.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/bytecode_dsl/PBytecodeDSLRootNode.java index 9ebc46b456..8fae5bfca3 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/bytecode_dsl/PBytecodeDSLRootNode.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/bytecode_dsl/PBytecodeDSLRootNode.java @@ -2075,15 +2075,15 @@ public static PDict empty(VirtualFrame frame, int entries, @Variadic Object[] ke if (keysAndValues.length != entries * 2) { throw CompilerDirectives.shouldNotReachHere(); } - ObjectHashMap map = new ObjectHashMap(keysAndValues.length / 2); - PDict dict = PFactory.createDict(rootNode.getLanguage(), new EconomicMapStorage(map, false)); + EconomicMapStorage map = EconomicMapStorage.create(entries); + PDict dict = PFactory.createDict(rootNode.getLanguage(), map); for (int i = 0; i < entries; i++) { Object key = keysAndValues[i * 2]; Object value = keysAndValues[i * 2 + 1]; // Each entry represents either a k: v pair or a **splats. splats have no key. if (key == PNone.NO_VALUE) { updateNode.execute(frame, dict, value); - assert dict.getDictStorage() instanceof EconomicMapStorage es && es.mapIsEqualTo(map); + assert dict.getDictStorage() == map; } else { long hash = hashNode.execute(frame, inliningTarget, key); putNode.put(frame, inliningTarget, map, key, hash, value); From 5a53be7bc4826845d906e6d85b0d7e309f7941a7 Mon Sep 17 00:00:00 2001 From: Thomas Wuerthinger Date: Fri, 24 Apr 2026 17:49:36 +0200 Subject: [PATCH 2/2] Add regression coverage for dict storage changes --- .../test/builtin/objects/dict/PDictTest.java | 14 +- .../test/objects/ObjectHashMapTests.java | 28 +- .../src/tests/test_dict.py | 305 +++++++++++++++++- 3 files changed, 338 insertions(+), 9 deletions(-) diff --git a/graalpython/com.oracle.graal.python.test/src/com/oracle/graal/python/test/builtin/objects/dict/PDictTest.java b/graalpython/com.oracle.graal.python.test/src/com/oracle/graal/python/test/builtin/objects/dict/PDictTest.java index 30450f9d61..be6ce8d515 100644 --- a/graalpython/com.oracle.graal.python.test/src/com/oracle/graal/python/test/builtin/objects/dict/PDictTest.java +++ b/graalpython/com.oracle.graal.python.test/src/com/oracle/graal/python/test/builtin/objects/dict/PDictTest.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2022, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * The Universal Permissive License (UPL), Version 1.0 @@ -98,6 +98,7 @@ public void economicMapStorageSet() { dict.setItem(ts("key1"), 42); assertEquals(2, length(dict)); + assertTrue(dict.getDictStorage() instanceof EconomicMapStorage); assertEquals(42, dict.getItem(ts("key1"))); } @@ -113,9 +114,20 @@ public void economicMapStorageDel() { delItem(dict, ts("key2")); assertEquals(2, length(dict)); + assertTrue(dict.getDictStorage() instanceof EconomicMapStorage); assertEquals(42, dict.getItem(ts("key1"))); assertNull(dict.getItem(ts("key2"))); } + + @Test + public void economicMapStorageEightEntries() { + PDict dict = PFactory.createDict(PythonLanguage.get(null)); + for (int i = 0; i < 8; i++) { + dict.setItem(i, i); + } + assertTrue(dict.getDictStorage() instanceof EconomicMapStorage); + assertEquals(8, length(dict)); + } } diff --git a/graalpython/com.oracle.graal.python.test/src/com/oracle/graal/python/test/objects/ObjectHashMapTests.java b/graalpython/com.oracle.graal.python.test/src/com/oracle/graal/python/test/objects/ObjectHashMapTests.java index 18dcf485a3..510d275c03 100644 --- a/graalpython/com.oracle.graal.python.test/src/com/oracle/graal/python/test/objects/ObjectHashMapTests.java +++ b/graalpython/com.oracle.graal.python.test/src/com/oracle/graal/python/test/objects/ObjectHashMapTests.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * The Universal Permissive License (UPL), Version 1.0 @@ -173,7 +173,7 @@ public void testLongHashMapStressTest() { // Basic tests of other methods Object[] oldKeys = keysToArray(map); - ObjectHashMap copy = map.copy(); + ObjectHashMap copy = copyMap(map); assertEquals(map.size(), copy.size()); for (Object key : oldKeys) { assertEquals(key.toString(), // @@ -315,7 +315,7 @@ static void assertEqual(String message, LinkedHashMap expected, O Collections.reverse(keysValuesReversed); assertArrayEquals(message, keysValuesReversed.toArray(), reverseKeysToArray(actual)); - EconomicMapStorage storage = new EconomicMapStorage(actual, false); + EconomicMapStorage storage = toEconomicMapStorage(actual); int[] size = new int[]{0}; HashingStorageForEach.executeUncached(storage, new HashingStorageForEachCallback<>() { @Override @@ -330,12 +330,12 @@ public Object execute(Frame frame, Node inliningTarget, HashingStorage s, Hashin } private static Object[] keysToArray(ObjectHashMap m) { - EconomicMapStorage s = new EconomicMapStorage(m, false); + EconomicMapStorage s = toEconomicMapStorage(m); return iteratorToArray(s, HashingStorageGetIterator.executeUncached(s)); } private static Object[] reverseKeysToArray(ObjectHashMap m) { - EconomicMapStorage s = new EconomicMapStorage(m, false); + EconomicMapStorage s = toEconomicMapStorage(m); return iteratorToArray(s, HashingStorageGetReverseIterator.executeUncached(s)); } @@ -349,6 +349,24 @@ private static Object[] iteratorToArray(HashingStorage s, HashingStorageIterator private static int valueCounter = 0; + private static ObjectHashMap copyMap(ObjectHashMap original) { + EconomicMapStorage copy = EconomicMapStorage.create(original.size()); + MapCursor cursor = original.getEntries(); + while (cursor.advance()) { + put(copy, cursor.getKey().getValue(), cursor.getKey().getPythonHash(), cursor.getValue()); + } + return copy; + } + + private static EconomicMapStorage toEconomicMapStorage(ObjectHashMap map) { + EconomicMapStorage storage = EconomicMapStorage.create(map.size()); + MapCursor cursor = map.getEntries(); + while (cursor.advance()) { + put(storage, cursor.getKey().getValue(), cursor.getKey().getPythonHash(), cursor.getValue()); + } + return storage; + } + public static Object newValue() { return "Val: " + (valueCounter++); } diff --git a/graalpython/com.oracle.graal.python.test/src/tests/test_dict.py b/graalpython/com.oracle.graal.python.test/src/tests/test_dict.py index e96723941d..b87a4082f6 100644 --- a/graalpython/com.oracle.graal.python.test/src/tests/test_dict.py +++ b/graalpython/com.oracle.graal.python.test/src/tests/test_dict.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # The Universal Permissive License (UPL), Version 1.0 @@ -39,6 +39,8 @@ import unittest, sys +graalpy_only = unittest.skipUnless(sys.implementation.name == "graalpy", "GraalPy-specific dict storage test") + def assert_raises(err, fn, *args, **kwargs): raised = False try: @@ -47,7 +49,6 @@ def assert_raises(err, fn, *args, **kwargs): raised = True assert raised - def test_equality(): class EqualTo: @@ -272,6 +273,225 @@ def __len__(self): assert set(d.keys()) == key_set, "unexpected keys: %s" % str(d.keys()) assert set(d.values()) == {97, 98, 99, 100}, "unexpected values: %s" % str(d.values()) + +@graalpy_only +def test_economic_map_storage_small_dict_grows_cleanly(): + d = {} + for i in range(4): + d[i] = i + + assert list(d.items()) == [(0, 0), (1, 1), (2, 2), (3, 3)] + + d[4] = 4 + + assert list(d.items()) == [(0, 0), (1, 1), (2, 2), (3, 3), (4, 4)] + + del d[1] + d[1] = 10 + assert list(d.items()) == [(0, 0), (2, 2), (3, 3), (4, 4), (1, 10)] + + +def test_economic_map_storage_small_dict_hash_and_eq_counts(): + count_hash = 0 + count_eq = 0 + + class Key: + def __init__(self, x): + self.x = x + + def __hash__(self): + nonlocal count_hash + count_hash += 1 + return 12345 + + def __eq__(self, other): + nonlocal count_eq + count_eq += 1 + return isinstance(other, Key) and self.x == other.x + + a = Key(1) + d = {0: 0, 1: 1, 2: 2, 3: 3, a: 42} + b = Key(1) + count_hash = 0 + count_eq = 0 + + assert b in d + assert count_hash == 1, count_hash + assert count_eq == 1, count_eq + + assert d[b] == 42 + assert count_hash == 2, count_hash + assert count_eq == 2, count_eq + + d[b] = 112 + assert count_hash == 3, count_hash + assert count_eq == 3, count_eq + + del d[b] + assert count_hash == 4, count_hash + assert count_eq == 4, count_eq + assert a not in d + + +def test_economic_map_storage_small_dict_copy_does_not_rehash_keys(): + count_hash = 0 + count_eq = 0 + + class Key: + def __init__(self, x): + self.x = x + + def __hash__(self): + nonlocal count_hash + count_hash += 1 + return self.x + + def __eq__(self, other): + nonlocal count_eq + count_eq += 1 + return isinstance(other, Key) and self.x == other.x + + d = {Key(i): i for i in range(5)} + count_hash = 0 + count_eq = 0 + + copied = d.copy() + assert copied == d + assert count_hash == 0, count_hash + assert count_eq == 0, count_eq + + rebuilt = dict(d) + assert rebuilt == d + assert count_hash == 0, count_hash + assert count_eq == 0, count_eq + + +def test_economic_map_storage_hash_and_eq_counts(): + count_hash = 0 + count_eq = 0 + + class Key: + def __init__(self, x): + self.x = x + + def __hash__(self): + nonlocal count_hash + count_hash += 1 + return 1234567 + + def __eq__(self, other): + nonlocal count_eq + count_eq += 1 + return isinstance(other, Key) and self.x == other.x + + a = Key(1) + d = {i: i for i in range(8)} + d[a] = 42 + + b = Key(1) + count_hash = 0 + count_eq = 0 + + assert b in d + assert count_hash == 1, count_hash + assert count_eq == 1, count_eq + + assert d[b] == 42 + assert count_hash == 2, count_hash + assert count_eq == 2, count_eq + + d[b] = 112 + assert count_hash == 3, count_hash + assert count_eq == 3, count_eq + + del d[b] + assert count_hash == 4, count_hash + assert count_eq == 4, count_eq + assert a not in d + + +def test_economic_map_storage_copy_does_not_rehash_keys(): + count_hash = 0 + count_eq = 0 + + class Key: + def __init__(self, x): + self.x = x + + def __hash__(self): + nonlocal count_hash + count_hash += 1 + return self.x + 1000 + + def __eq__(self, other): + nonlocal count_eq + count_eq += 1 + return isinstance(other, Key) and self.x == other.x + + d = {Key(i): i for i in range(9)} + + count_hash = 0 + count_eq = 0 + + copied = d.copy() + assert copied == d + assert count_hash == 0, count_hash + assert count_eq == 0, count_eq + + rebuilt = dict(d) + assert rebuilt == d + assert count_hash == 0, count_hash + assert count_eq == 0, count_eq + + +def test_economic_map_storage_compaction_preserves_order_and_hashes(): + count_hash = 0 + count_eq = 0 + + class Key: + def __init__(self, x): + self.x = x + + def __hash__(self): + nonlocal count_hash + count_hash += 1 + return 987654321 + + def __eq__(self, other): + nonlocal count_eq + count_eq += 1 + return isinstance(other, Key) and self.x == other.x + + original = Key(1) + d = {i: i for i in range(11)} + d[original] = 99 + + for i in range(4): + del d[i] + + count_hash = 0 + count_eq = 0 + del d[Key(1)] + assert count_hash == 1, count_hash + assert count_eq == 1, count_eq + + replacement = Key(1) + count_hash = 0 + count_eq = 0 + d[replacement] = 100 + assert count_hash == 1, count_hash + assert count_eq == 0, count_eq + + assert list(d.items()) == [(4, 4), (5, 5), (6, 6), (7, 7), (8, 8), (9, 9), (10, 10), (replacement, 100)] + + probe = Key(1) + count_hash = 0 + count_eq = 0 + assert d[probe] == 100 + assert count_hash == 1, count_hash + assert count_eq == 1, count_eq + + def test_init6(): try: dict(1) @@ -893,6 +1113,45 @@ def __eq__(self, other): assert count_eq == 1, count_eq +def test_hash_and_eq_for_keywords_storage(): + count_hash = 0 + count_eq = 0 + + class Probe: + def __hash__(self): + nonlocal count_hash + count_hash += 1 + return hash("a") + + def __eq__(self, other): + nonlocal count_eq + count_eq += 1 + return other == "a" + + def check(**kwargs): + probe = Probe() + + assert probe in kwargs + assert count_hash == 1, count_hash + assert count_eq == 1, count_eq + + assert kwargs[probe] == 1 + assert count_hash == 2, count_hash + assert count_eq == 2, count_eq + + kwargs[probe] = 2 + assert kwargs["a"] == 2 + assert count_hash == 3, count_hash + assert count_eq == 3, count_eq + + del kwargs[probe] + assert "a" not in kwargs + assert count_hash == 4, count_hash + assert count_eq == 4, count_eq + + check(a=1) + + def test_hash_and_eq_for_dynamic_object_storage(): class MyObject: def __init__(self, string): @@ -923,6 +1182,47 @@ def __hash__(self): del d2[MyObject("1")] assert "1" not in d2 + +def test_hash_and_eq_count_for_dynamic_object_storage(): + count_hash = 0 + count_eq = 0 + + class Probe: + def __hash__(self): + nonlocal count_hash + count_hash += 1 + return hash("a") + + def __eq__(self, other): + nonlocal count_eq + count_eq += 1 + return other == "a" + + class MyObject: + pass + + d = MyObject().__dict__ + d["a"] = 1 + probe = Probe() + + assert probe in d + assert count_hash == 1, count_hash + assert count_eq == 1, count_eq + + assert d[probe] == 1 + assert count_hash == 2, count_hash + assert count_eq == 2, count_eq + + d[probe] = 2 + assert d["a"] == 2 + assert count_hash == 3, count_hash + assert count_eq == 3, count_eq + + del d[probe] + assert "a" not in d + assert count_hash == 4, count_hash + assert count_eq == 4, count_eq + def test_update_side_effect_on_other(): class X: def __hash__(self): @@ -1290,4 +1590,3 @@ class Test: del o.foo assert "foo" not in o.__dict__ -