| /* Dictionary object implementation using a hash table */ |
| |
| /* The distribution includes a separate file, Objects/dictnotes.txt, |
| describing explorations into dictionary design and optimization. |
| It covers typical dictionary use patterns, the parameters for |
| tuning dictionaries, and several ideas for possible optimizations. |
| */ |
| |
| /* PyDictKeysObject |
| |
| This implements the dictionary's hashtable. |
| |
| As of Python 3.6, this is compact and ordered. Basic idea is described here: |
| * https://mail.python.org/pipermail/python-dev/2012-December/123028.html |
| * https://morepypy.blogspot.com/2015/01/faster-more-memory-efficient-and-more.html |
| |
| layout: |
| |
| +---------------------+ |
| | dk_refcnt | |
| | dk_log2_size | |
| | dk_log2_index_bytes | |
| | dk_kind | |
| | dk_version | |
| | dk_usable | |
| | dk_nentries | |
| +---------------------+ |
| | dk_indices[] | |
| | | |
| +---------------------+ |
| | dk_entries[] | |
| | | |
| +---------------------+ |
| |
| dk_indices is actual hashtable. It holds index in entries, or DKIX_EMPTY(-1) |
| or DKIX_DUMMY(-2). |
| Size of indices is dk_size. Type of each index in indices varies with dk_size: |
| |
| * int8 for dk_size <= 128 |
| * int16 for 256 <= dk_size <= 2**15 |
| * int32 for 2**16 <= dk_size <= 2**31 |
| * int64 for 2**32 <= dk_size |
| |
| dk_entries is array of PyDictKeyEntry when dk_kind == DICT_KEYS_GENERAL or |
| PyDictUnicodeEntry otherwise. Its length is USABLE_FRACTION(dk_size). |
| |
| NOTE: Since negative value is used for DKIX_EMPTY and DKIX_DUMMY, type of |
| dk_indices entry is signed integer and int16 is used for table which |
| dk_size == 256. |
| */ |
| |
| |
| /* |
| The DictObject can be in one of two forms. |
| |
| Either: |
| A combined table: |
| ma_values == NULL, dk_refcnt == 1. |
| Values are stored in the me_value field of the PyDictKeyEntry. |
| Or: |
| A split table: |
| ma_values != NULL, dk_refcnt >= 1 |
| Values are stored in the ma_values array. |
| Only string (unicode) keys are allowed. |
| |
| There are four kinds of slots in the table (slot is index, and |
| DK_ENTRIES(keys)[index] if index >= 0): |
| |
| 1. Unused. index == DKIX_EMPTY |
| Does not hold an active (key, value) pair now and never did. Unused can |
| transition to Active upon key insertion. This is each slot's initial state. |
| |
| 2. Active. index >= 0, me_key != NULL and me_value != NULL |
| Holds an active (key, value) pair. Active can transition to Dummy or |
| Pending upon key deletion (for combined and split tables respectively). |
| This is the only case in which me_value != NULL. |
| |
| 3. Dummy. index == DKIX_DUMMY (combined only) |
| Previously held an active (key, value) pair, but that was deleted and an |
| active pair has not yet overwritten the slot. Dummy can transition to |
| Active upon key insertion. Dummy slots cannot be made Unused again |
| else the probe sequence in case of collision would have no way to know |
| they were once active. |
| In free-threaded builds dummy slots are not re-used to allow lock-free |
| lookups to proceed safely. |
| |
| 4. Pending. index >= 0, key != NULL, and value == NULL (split only) |
| Not yet inserted in split-table. |
| */ |
| |
| /* |
| Preserving insertion order |
| |
| It's simple for combined table. Since dk_entries is mostly append only, we can |
| get insertion order by just iterating dk_entries. |
| |
| One exception is .popitem(). It removes last item in dk_entries and decrement |
| dk_nentries to achieve amortized O(1). Since there are DKIX_DUMMY remains in |
| dk_indices, we can't increment dk_usable even though dk_nentries is |
| decremented. |
| |
| To preserve the order in a split table, a bit vector is used to record the |
| insertion order. When a key is inserted the bit vector is shifted up by 4 bits |
| and the index of the key is stored in the low 4 bits. |
| As a consequence of this, split keys have a maximum size of 16. |
| */ |
| |
| /* PyDict_MINSIZE is the starting size for any new dict. |
| * 8 allows dicts with no more than 5 active entries; experiments suggested |
| * this suffices for the majority of dicts (consisting mostly of usually-small |
| * dicts created to pass keyword arguments). |
| * Making this 8, rather than 4 reduces the number of resizes for most |
| * dictionaries, without any significant extra memory use. |
| */ |
| #define PyDict_LOG_MINSIZE 3 |
| #define PyDict_MINSIZE 8 |
| |
| #include "Python.h" |
| #include "pycore_bitutils.h" // _Py_bit_length |
| #include "pycore_call.h" // _PyObject_CallNoArgs() |
| #include "pycore_ceval.h" // _PyEval_GetBuiltin() |
| #include "pycore_code.h" // stats |
| #include "pycore_critical_section.h" // Py_BEGIN_CRITICAL_SECTION, Py_END_CRITICAL_SECTION |
| #include "pycore_dict.h" // export _PyDict_SizeOf() |
| #include "pycore_freelist.h" // _PyFreeListState_GET() |
| #include "pycore_gc.h" // _PyObject_GC_IS_TRACKED() |
| #include "pycore_object.h" // _PyObject_GC_TRACK(), _PyDebugAllocatorStats() |
| #include "pycore_pyatomic_ft_wrappers.h" // FT_ATOMIC_LOAD_SSIZE_RELAXED |
| #include "pycore_pyerrors.h" // _PyErr_GetRaisedException() |
| #include "pycore_pystate.h" // _PyThreadState_GET() |
| #include "pycore_setobject.h" // _PySet_NextEntry() |
| #include "stringlib/eq.h" // unicode_eq() |
| |
| #include <stdbool.h> |
| |
| /*[clinic input] |
| class dict "PyDictObject *" "&PyDict_Type" |
| [clinic start generated code]*/ |
| /*[clinic end generated code: output=da39a3ee5e6b4b0d input=f157a5a0ce9589d6]*/ |
| |
| |
| /* |
| To ensure the lookup algorithm terminates, there must be at least one Unused |
| slot (NULL key) in the table. |
| To avoid slowing down lookups on a near-full table, we resize the table when |
| it's USABLE_FRACTION (currently two-thirds) full. |
| */ |
| |
| #ifdef Py_GIL_DISABLED |
| |
| static inline void |
| ASSERT_DICT_LOCKED(PyObject *op) |
| { |
| _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op); |
| } |
| #define ASSERT_DICT_LOCKED(op) ASSERT_DICT_LOCKED(_Py_CAST(PyObject*, op)) |
| #define ASSERT_WORLD_STOPPED_OR_DICT_LOCKED(op) \ |
| if (!_PyInterpreterState_GET()->stoptheworld.world_stopped) { \ |
| ASSERT_DICT_LOCKED(op); \ |
| } |
| #define ASSERT_WORLD_STOPPED_OR_OBJ_LOCKED(op) \ |
| if (!_PyInterpreterState_GET()->stoptheworld.world_stopped) { \ |
| _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op); \ |
| } |
| |
| #define IS_DICT_SHARED(mp) _PyObject_GC_IS_SHARED(mp) |
| #define SET_DICT_SHARED(mp) _PyObject_GC_SET_SHARED(mp) |
| #define LOAD_INDEX(keys, size, idx) _Py_atomic_load_int##size##_relaxed(&((const int##size##_t*)keys->dk_indices)[idx]); |
| #define STORE_INDEX(keys, size, idx, value) _Py_atomic_store_int##size##_relaxed(&((int##size##_t*)keys->dk_indices)[idx], (int##size##_t)value); |
| #define ASSERT_OWNED_OR_SHARED(mp) \ |
| assert(_Py_IsOwnedByCurrentThread((PyObject *)mp) || IS_DICT_SHARED(mp)); |
| |
| #define LOCK_KEYS_IF_SPLIT(keys, kind) \ |
| if (kind == DICT_KEYS_SPLIT) { \ |
| LOCK_KEYS(keys); \ |
| } |
| |
| #define UNLOCK_KEYS_IF_SPLIT(keys, kind) \ |
| if (kind == DICT_KEYS_SPLIT) { \ |
| UNLOCK_KEYS(keys); \ |
| } |
| |
| static inline Py_ssize_t |
| load_keys_nentries(PyDictObject *mp) |
| { |
| PyDictKeysObject *keys = _Py_atomic_load_ptr(&mp->ma_keys); |
| return _Py_atomic_load_ssize(&keys->dk_nentries); |
| } |
| |
| static inline void |
| set_keys(PyDictObject *mp, PyDictKeysObject *keys) |
| { |
| ASSERT_OWNED_OR_SHARED(mp); |
| _Py_atomic_store_ptr_release(&mp->ma_keys, keys); |
| } |
| |
| static inline void |
| set_values(PyDictObject *mp, PyDictValues *values) |
| { |
| ASSERT_OWNED_OR_SHARED(mp); |
| _Py_atomic_store_ptr_release(&mp->ma_values, values); |
| } |
| |
| #define LOCK_KEYS(keys) PyMutex_LockFlags(&keys->dk_mutex, _Py_LOCK_DONT_DETACH) |
| #define UNLOCK_KEYS(keys) PyMutex_Unlock(&keys->dk_mutex) |
| |
| #define ASSERT_KEYS_LOCKED(keys) assert(PyMutex_IsLocked(&keys->dk_mutex)) |
| #define LOAD_SHARED_KEY(key) _Py_atomic_load_ptr_acquire(&key) |
| #define STORE_SHARED_KEY(key, value) _Py_atomic_store_ptr_release(&key, value) |
| // Inc refs the keys object, giving the previous value |
| #define INCREF_KEYS(dk) _Py_atomic_add_ssize(&dk->dk_refcnt, 1) |
| // Dec refs the keys object, giving the previous value |
| #define DECREF_KEYS(dk) _Py_atomic_add_ssize(&dk->dk_refcnt, -1) |
| #define LOAD_KEYS_NENTRIES(keys) _Py_atomic_load_ssize_relaxed(&keys->dk_nentries) |
| |
| #define INCREF_KEYS_FT(dk) dictkeys_incref(dk) |
| #define DECREF_KEYS_FT(dk, shared) dictkeys_decref(_PyInterpreterState_GET(), dk, shared) |
| |
| static inline void split_keys_entry_added(PyDictKeysObject *keys) |
| { |
| ASSERT_KEYS_LOCKED(keys); |
| |
| // We increase before we decrease so we never get too small of a value |
| // when we're racing with reads |
| _Py_atomic_store_ssize_relaxed(&keys->dk_nentries, keys->dk_nentries + 1); |
| _Py_atomic_store_ssize_release(&keys->dk_usable, keys->dk_usable - 1); |
| } |
| |
| #else /* Py_GIL_DISABLED */ |
| |
| #define ASSERT_DICT_LOCKED(op) |
| #define ASSERT_WORLD_STOPPED_OR_DICT_LOCKED(op) |
| #define ASSERT_WORLD_STOPPED_OR_OBJ_LOCKED(op) |
| #define LOCK_KEYS(keys) |
| #define UNLOCK_KEYS(keys) |
| #define ASSERT_KEYS_LOCKED(keys) |
| #define LOAD_SHARED_KEY(key) key |
| #define STORE_SHARED_KEY(key, value) key = value |
| #define INCREF_KEYS(dk) dk->dk_refcnt++ |
| #define DECREF_KEYS(dk) dk->dk_refcnt-- |
| #define LOAD_KEYS_NENTRIES(keys) keys->dk_nentries |
| #define INCREF_KEYS_FT(dk) |
| #define DECREF_KEYS_FT(dk, shared) |
| #define LOCK_KEYS_IF_SPLIT(keys, kind) |
| #define UNLOCK_KEYS_IF_SPLIT(keys, kind) |
| #define IS_DICT_SHARED(mp) (false) |
| #define SET_DICT_SHARED(mp) |
| #define LOAD_INDEX(keys, size, idx) ((const int##size##_t*)(keys->dk_indices))[idx] |
| #define STORE_INDEX(keys, size, idx, value) ((int##size##_t*)(keys->dk_indices))[idx] = (int##size##_t)value |
| |
| static inline void split_keys_entry_added(PyDictKeysObject *keys) |
| { |
| keys->dk_usable--; |
| keys->dk_nentries++; |
| } |
| |
| static inline void |
| set_keys(PyDictObject *mp, PyDictKeysObject *keys) |
| { |
| mp->ma_keys = keys; |
| } |
| |
| static inline void |
| set_values(PyDictObject *mp, PyDictValues *values) |
| { |
| mp->ma_values = values; |
| } |
| |
| static inline Py_ssize_t |
| load_keys_nentries(PyDictObject *mp) |
| { |
| return mp->ma_keys->dk_nentries; |
| } |
| |
| |
| #endif |
| |
| #define STORE_KEY(ep, key) FT_ATOMIC_STORE_PTR_RELEASE(ep->me_key, key) |
| #define STORE_VALUE(ep, value) FT_ATOMIC_STORE_PTR_RELEASE(ep->me_value, value) |
| #define STORE_SPLIT_VALUE(mp, idx, value) FT_ATOMIC_STORE_PTR_RELEASE(mp->ma_values->values[idx], value) |
| #define STORE_HASH(ep, hash) FT_ATOMIC_STORE_SSIZE_RELAXED(ep->me_hash, hash) |
| #define STORE_KEYS_USABLE(keys, usable) FT_ATOMIC_STORE_SSIZE_RELAXED(keys->dk_usable, usable) |
| #define STORE_KEYS_NENTRIES(keys, nentries) FT_ATOMIC_STORE_SSIZE_RELAXED(keys->dk_nentries, nentries) |
| #define STORE_USED(mp, used) FT_ATOMIC_STORE_SSIZE_RELAXED(mp->ma_used, used) |
| |
| #define PERTURB_SHIFT 5 |
| |
| /* |
| Major subtleties ahead: Most hash schemes depend on having a "good" hash |
| function, in the sense of simulating randomness. Python doesn't: its most |
| important hash functions (for ints) are very regular in common |
| cases: |
| |
| >>>[hash(i) for i in range(4)] |
| [0, 1, 2, 3] |
| |
| This isn't necessarily bad! To the contrary, in a table of size 2**i, taking |
| the low-order i bits as the initial table index is extremely fast, and there |
| are no collisions at all for dicts indexed by a contiguous range of ints. So |
| this gives better-than-random behavior in common cases, and that's very |
| desirable. |
| |
| OTOH, when collisions occur, the tendency to fill contiguous slices of the |
| hash table makes a good collision resolution strategy crucial. Taking only |
| the last i bits of the hash code is also vulnerable: for example, consider |
| the list [i << 16 for i in range(20000)] as a set of keys. Since ints are |
| their own hash codes, and this fits in a dict of size 2**15, the last 15 bits |
| of every hash code are all 0: they *all* map to the same table index. |
| |
| But catering to unusual cases should not slow the usual ones, so we just take |
| the last i bits anyway. It's up to collision resolution to do the rest. If |
| we *usually* find the key we're looking for on the first try (and, it turns |
| out, we usually do -- the table load factor is kept under 2/3, so the odds |
| are solidly in our favor), then it makes best sense to keep the initial index |
| computation dirt cheap. |
| |
| The first half of collision resolution is to visit table indices via this |
| recurrence: |
| |
| j = ((5*j) + 1) mod 2**i |
| |
| For any initial j in range(2**i), repeating that 2**i times generates each |
| int in range(2**i) exactly once (see any text on random-number generation for |
| proof). By itself, this doesn't help much: like linear probing (setting |
| j += 1, or j -= 1, on each loop trip), it scans the table entries in a fixed |
| order. This would be bad, except that's not the only thing we do, and it's |
| actually *good* in the common cases where hash keys are consecutive. In an |
| example that's really too small to make this entirely clear, for a table of |
| size 2**3 the order of indices is: |
| |
| 0 -> 1 -> 6 -> 7 -> 4 -> 5 -> 2 -> 3 -> 0 [and here it's repeating] |
| |
| If two things come in at index 5, the first place we look after is index 2, |
| not 6, so if another comes in at index 6 the collision at 5 didn't hurt it. |
| Linear probing is deadly in this case because there the fixed probe order |
| is the *same* as the order consecutive keys are likely to arrive. But it's |
| extremely unlikely hash codes will follow a 5*j+1 recurrence by accident, |
| and certain that consecutive hash codes do not. |
| |
| The other half of the strategy is to get the other bits of the hash code |
| into play. This is done by initializing a (unsigned) vrbl "perturb" to the |
| full hash code, and changing the recurrence to: |
| |
| perturb >>= PERTURB_SHIFT; |
| j = (5*j) + 1 + perturb; |
| use j % 2**i as the next table index; |
| |
| Now the probe sequence depends (eventually) on every bit in the hash code, |
| and the pseudo-scrambling property of recurring on 5*j+1 is more valuable, |
| because it quickly magnifies small differences in the bits that didn't affect |
| the initial index. Note that because perturb is unsigned, if the recurrence |
| is executed often enough perturb eventually becomes and remains 0. At that |
| point (very rarely reached) the recurrence is on (just) 5*j+1 again, and |
| that's certain to find an empty slot eventually (since it generates every int |
| in range(2**i), and we make sure there's always at least one empty slot). |
| |
| Selecting a good value for PERTURB_SHIFT is a balancing act. You want it |
| small so that the high bits of the hash code continue to affect the probe |
| sequence across iterations; but you want it large so that in really bad cases |
| the high-order hash bits have an effect on early iterations. 5 was "the |
| best" in minimizing total collisions across experiments Tim Peters ran (on |
| both normal and pathological cases), but 4 and 6 weren't significantly worse. |
| |
| Historical: Reimer Behrends contributed the idea of using a polynomial-based |
| approach, using repeated multiplication by x in GF(2**n) where an irreducible |
| polynomial for each table size was chosen such that x was a primitive root. |
| Christian Tismer later extended that to use division by x instead, as an |
| efficient way to get the high bits of the hash code into play. This scheme |
| also gave excellent collision statistics, but was more expensive: two |
| if-tests were required inside the loop; computing "the next" index took about |
| the same number of operations but without as much potential parallelism |
| (e.g., computing 5*j can go on at the same time as computing 1+perturb in the |
| above, and then shifting perturb can be done while the table index is being |
| masked); and the PyDictObject struct required a member to hold the table's |
| polynomial. In Tim's experiments the current scheme ran faster, produced |
| equally good collision statistics, needed less code & used less memory. |
| |
| */ |
| |
| static int dictresize(PyInterpreterState *interp, PyDictObject *mp, |
| uint8_t log_newsize, int unicode); |
| |
| static PyObject* dict_iter(PyObject *dict); |
| |
| static int |
| setitem_lock_held(PyDictObject *mp, PyObject *key, PyObject *value); |
| static int |
| dict_setdefault_ref_lock_held(PyObject *d, PyObject *key, PyObject *default_value, |
| PyObject **result, int incref_result); |
| |
| #ifndef NDEBUG |
| static int _PyObject_InlineValuesConsistencyCheck(PyObject *obj); |
| #endif |
| |
| #include "clinic/dictobject.c.h" |
| |
| |
| static inline Py_hash_t |
| unicode_get_hash(PyObject *o) |
| { |
| assert(PyUnicode_CheckExact(o)); |
| return FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyASCIIObject_CAST(o)->hash); |
| } |
| |
| /* Print summary info about the state of the optimized allocator */ |
| void |
| _PyDict_DebugMallocStats(FILE *out) |
| { |
| _PyDebugAllocatorStats(out, "free PyDictObject", |
| _Py_FREELIST_SIZE(dicts), |
| sizeof(PyDictObject)); |
| _PyDebugAllocatorStats(out, "free PyDictKeysObject", |
| _Py_FREELIST_SIZE(dictkeys), |
| sizeof(PyDictKeysObject)); |
| } |
| |
| #define DK_MASK(dk) (DK_SIZE(dk)-1) |
| |
| #define _Py_DICT_IMMORTAL_INITIAL_REFCNT PY_SSIZE_T_MIN |
| |
| static void free_keys_object(PyDictKeysObject *keys, bool use_qsbr); |
| |
| /* PyDictKeysObject has refcounts like PyObject does, so we have the |
| following two functions to mirror what Py_INCREF() and Py_DECREF() do. |
| (Keep in mind that PyDictKeysObject isn't actually a PyObject.) |
| Likewise a PyDictKeysObject can be immortal (e.g. Py_EMPTY_KEYS), |
| so we apply a naive version of what Py_INCREF() and Py_DECREF() do |
| for immortal objects. */ |
| |
| static inline void |
| dictkeys_incref(PyDictKeysObject *dk) |
| { |
| if (FT_ATOMIC_LOAD_SSIZE_RELAXED(dk->dk_refcnt) < 0) { |
| assert(FT_ATOMIC_LOAD_SSIZE_RELAXED(dk->dk_refcnt) == _Py_DICT_IMMORTAL_INITIAL_REFCNT); |
| return; |
| } |
| #ifdef Py_REF_DEBUG |
| _Py_IncRefTotal(_PyThreadState_GET()); |
| #endif |
| INCREF_KEYS(dk); |
| } |
| |
| static inline void |
| dictkeys_decref(PyInterpreterState *interp, PyDictKeysObject *dk, bool use_qsbr) |
| { |
| if (FT_ATOMIC_LOAD_SSIZE_RELAXED(dk->dk_refcnt) < 0) { |
| assert(FT_ATOMIC_LOAD_SSIZE_RELAXED(dk->dk_refcnt) == _Py_DICT_IMMORTAL_INITIAL_REFCNT); |
| return; |
| } |
| assert(FT_ATOMIC_LOAD_SSIZE(dk->dk_refcnt) > 0); |
| #ifdef Py_REF_DEBUG |
| _Py_DecRefTotal(_PyThreadState_GET()); |
| #endif |
| if (DECREF_KEYS(dk) == 1) { |
| if (DK_IS_UNICODE(dk)) { |
| PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(dk); |
| Py_ssize_t i, n; |
| for (i = 0, n = dk->dk_nentries; i < n; i++) { |
| Py_XDECREF(entries[i].me_key); |
| Py_XDECREF(entries[i].me_value); |
| } |
| } |
| else { |
| PyDictKeyEntry *entries = DK_ENTRIES(dk); |
| Py_ssize_t i, n; |
| for (i = 0, n = dk->dk_nentries; i < n; i++) { |
| Py_XDECREF(entries[i].me_key); |
| Py_XDECREF(entries[i].me_value); |
| } |
| } |
| free_keys_object(dk, use_qsbr); |
| } |
| } |
| |
| /* lookup indices. returns DKIX_EMPTY, DKIX_DUMMY, or ix >=0 */ |
| static inline Py_ssize_t |
| dictkeys_get_index(const PyDictKeysObject *keys, Py_ssize_t i) |
| { |
| int log2size = DK_LOG_SIZE(keys); |
| Py_ssize_t ix; |
| |
| if (log2size < 8) { |
| ix = LOAD_INDEX(keys, 8, i); |
| } |
| else if (log2size < 16) { |
| ix = LOAD_INDEX(keys, 16, i); |
| } |
| #if SIZEOF_VOID_P > 4 |
| else if (log2size >= 32) { |
| ix = LOAD_INDEX(keys, 64, i); |
| } |
| #endif |
| else { |
| ix = LOAD_INDEX(keys, 32, i); |
| } |
| assert(ix >= DKIX_DUMMY); |
| return ix; |
| } |
| |
| /* write to indices. */ |
| static inline void |
| dictkeys_set_index(PyDictKeysObject *keys, Py_ssize_t i, Py_ssize_t ix) |
| { |
| int log2size = DK_LOG_SIZE(keys); |
| |
| assert(ix >= DKIX_DUMMY); |
| assert(keys->dk_version == 0); |
| |
| if (log2size < 8) { |
| assert(ix <= 0x7f); |
| STORE_INDEX(keys, 8, i, ix); |
| } |
| else if (log2size < 16) { |
| assert(ix <= 0x7fff); |
| STORE_INDEX(keys, 16, i, ix); |
| } |
| #if SIZEOF_VOID_P > 4 |
| else if (log2size >= 32) { |
| STORE_INDEX(keys, 64, i, ix); |
| } |
| #endif |
| else { |
| assert(ix <= 0x7fffffff); |
| STORE_INDEX(keys, 32, i, ix); |
| } |
| } |
| |
| |
| /* USABLE_FRACTION is the maximum dictionary load. |
| * Increasing this ratio makes dictionaries more dense resulting in more |
| * collisions. Decreasing it improves sparseness at the expense of spreading |
| * indices over more cache lines and at the cost of total memory consumed. |
| * |
| * USABLE_FRACTION must obey the following: |
| * (0 < USABLE_FRACTION(n) < n) for all n >= 2 |
| * |
| * USABLE_FRACTION should be quick to calculate. |
| * Fractions around 1/2 to 2/3 seem to work well in practice. |
| */ |
| #define USABLE_FRACTION(n) (((n) << 1)/3) |
| |
| /* Find the smallest dk_size >= minsize. */ |
| static inline uint8_t |
| calculate_log2_keysize(Py_ssize_t minsize) |
| { |
| #if SIZEOF_LONG == SIZEOF_SIZE_T |
| minsize = (minsize | PyDict_MINSIZE) - 1; |
| return _Py_bit_length(minsize | (PyDict_MINSIZE-1)); |
| #elif defined(_MSC_VER) |
| // On 64bit Windows, sizeof(long) == 4. |
| minsize = (minsize | PyDict_MINSIZE) - 1; |
| unsigned long msb; |
| _BitScanReverse64(&msb, (uint64_t)minsize); |
| return (uint8_t)(msb + 1); |
| #else |
| uint8_t log2_size; |
| for (log2_size = PyDict_LOG_MINSIZE; |
| (((Py_ssize_t)1) << log2_size) < minsize; |
| log2_size++) |
| ; |
| return log2_size; |
| #endif |
| } |
| |
| /* estimate_keysize is reverse function of USABLE_FRACTION. |
| * |
| * This can be used to reserve enough size to insert n entries without |
| * resizing. |
| */ |
| static inline uint8_t |
| estimate_log2_keysize(Py_ssize_t n) |
| { |
| return calculate_log2_keysize((n*3 + 1) / 2); |
| } |
| |
| |
| /* GROWTH_RATE. Growth rate upon hitting maximum load. |
| * Currently set to used*3. |
| * This means that dicts double in size when growing without deletions, |
| * but have more head room when the number of deletions is on a par with the |
| * number of insertions. See also bpo-17563 and bpo-33205. |
| * |
| * GROWTH_RATE was set to used*4 up to version 3.2. |
| * GROWTH_RATE was set to used*2 in version 3.3.0 |
| * GROWTH_RATE was set to used*2 + capacity/2 in 3.4.0-3.6.0. |
| */ |
| #define GROWTH_RATE(d) ((d)->ma_used*3) |
| |
| /* This immutable, empty PyDictKeysObject is used for PyDict_Clear() |
| * (which cannot fail and thus can do no allocation). |
| */ |
| static PyDictKeysObject empty_keys_struct = { |
| _Py_DICT_IMMORTAL_INITIAL_REFCNT, /* dk_refcnt */ |
| 0, /* dk_log2_size */ |
| 0, /* dk_log2_index_bytes */ |
| DICT_KEYS_UNICODE, /* dk_kind */ |
| #ifdef Py_GIL_DISABLED |
| {0}, /* dk_mutex */ |
| #endif |
| 1, /* dk_version */ |
| 0, /* dk_usable (immutable) */ |
| 0, /* dk_nentries */ |
| {DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, |
| DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY}, /* dk_indices */ |
| }; |
| |
| #define Py_EMPTY_KEYS &empty_keys_struct |
| |
| /* Uncomment to check the dict content in _PyDict_CheckConsistency() */ |
| // #define DEBUG_PYDICT |
| |
| #ifdef DEBUG_PYDICT |
| # define ASSERT_CONSISTENT(op) assert(_PyDict_CheckConsistency((PyObject *)(op), 1)) |
| #else |
| # define ASSERT_CONSISTENT(op) assert(_PyDict_CheckConsistency((PyObject *)(op), 0)) |
| #endif |
| |
| static inline int |
| get_index_from_order(PyDictObject *mp, Py_ssize_t i) |
| { |
| assert(mp->ma_used <= SHARED_KEYS_MAX_SIZE); |
| assert(i < mp->ma_values->size); |
| uint8_t *array = get_insertion_order_array(mp->ma_values); |
| return array[i]; |
| } |
| |
| #ifdef DEBUG_PYDICT |
| static void |
| dump_entries(PyDictKeysObject *dk) |
| { |
| for (Py_ssize_t i = 0; i < dk->dk_nentries; i++) { |
| if (DK_IS_UNICODE(dk)) { |
| PyDictUnicodeEntry *ep = &DK_UNICODE_ENTRIES(dk)[i]; |
| printf("key=%p value=%p\n", ep->me_key, ep->me_value); |
| } |
| else { |
| PyDictKeyEntry *ep = &DK_ENTRIES(dk)[i]; |
| printf("key=%p hash=%lx value=%p\n", ep->me_key, ep->me_hash, ep->me_value); |
| } |
| } |
| } |
| #endif |
| |
| int |
| _PyDict_CheckConsistency(PyObject *op, int check_content) |
| { |
| ASSERT_WORLD_STOPPED_OR_DICT_LOCKED(op); |
| |
| #define CHECK(expr) \ |
| do { if (!(expr)) { _PyObject_ASSERT_FAILED_MSG(op, Py_STRINGIFY(expr)); } } while (0) |
| |
| assert(op != NULL); |
| CHECK(PyDict_Check(op)); |
| PyDictObject *mp = (PyDictObject *)op; |
| |
| PyDictKeysObject *keys = mp->ma_keys; |
| int splitted = _PyDict_HasSplitTable(mp); |
| Py_ssize_t usable = USABLE_FRACTION(DK_SIZE(keys)); |
| |
| // In the free-threaded build, shared keys may be concurrently modified, |
| // so use atomic loads. |
| Py_ssize_t dk_usable = FT_ATOMIC_LOAD_SSIZE_ACQUIRE(keys->dk_usable); |
| Py_ssize_t dk_nentries = FT_ATOMIC_LOAD_SSIZE_ACQUIRE(keys->dk_nentries); |
| |
| CHECK(0 <= mp->ma_used && mp->ma_used <= usable); |
| CHECK(0 <= dk_usable && dk_usable <= usable); |
| CHECK(0 <= dk_nentries && dk_nentries <= usable); |
| CHECK(dk_usable + dk_nentries <= usable); |
| |
| if (!splitted) { |
| /* combined table */ |
| CHECK(keys->dk_kind != DICT_KEYS_SPLIT); |
| CHECK(keys->dk_refcnt == 1 || keys == Py_EMPTY_KEYS); |
| } |
| else { |
| CHECK(keys->dk_kind == DICT_KEYS_SPLIT); |
| CHECK(mp->ma_used <= SHARED_KEYS_MAX_SIZE); |
| if (mp->ma_values->embedded) { |
| CHECK(mp->ma_values->embedded == 1); |
| CHECK(mp->ma_values->valid == 1); |
| } |
| } |
| |
| if (check_content) { |
| LOCK_KEYS_IF_SPLIT(keys, keys->dk_kind); |
| for (Py_ssize_t i=0; i < DK_SIZE(keys); i++) { |
| Py_ssize_t ix = dictkeys_get_index(keys, i); |
| CHECK(DKIX_DUMMY <= ix && ix <= usable); |
| } |
| |
| if (keys->dk_kind == DICT_KEYS_GENERAL) { |
| PyDictKeyEntry *entries = DK_ENTRIES(keys); |
| for (Py_ssize_t i=0; i < usable; i++) { |
| PyDictKeyEntry *entry = &entries[i]; |
| PyObject *key = entry->me_key; |
| |
| if (key != NULL) { |
| /* test_dict fails if PyObject_Hash() is called again */ |
| CHECK(entry->me_hash != -1); |
| CHECK(entry->me_value != NULL); |
| |
| if (PyUnicode_CheckExact(key)) { |
| Py_hash_t hash = unicode_get_hash(key); |
| CHECK(entry->me_hash == hash); |
| } |
| } |
| } |
| } |
| else { |
| PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(keys); |
| for (Py_ssize_t i=0; i < usable; i++) { |
| PyDictUnicodeEntry *entry = &entries[i]; |
| PyObject *key = entry->me_key; |
| |
| if (key != NULL) { |
| CHECK(PyUnicode_CheckExact(key)); |
| Py_hash_t hash = unicode_get_hash(key); |
| CHECK(hash != -1); |
| if (!splitted) { |
| CHECK(entry->me_value != NULL); |
| } |
| } |
| |
| if (splitted) { |
| CHECK(entry->me_value == NULL); |
| } |
| } |
| } |
| |
| if (splitted) { |
| CHECK(mp->ma_used <= SHARED_KEYS_MAX_SIZE); |
| /* splitted table */ |
| int duplicate_check = 0; |
| for (Py_ssize_t i=0; i < mp->ma_used; i++) { |
| int index = get_index_from_order(mp, i); |
| CHECK((duplicate_check & (1<<index)) == 0); |
| duplicate_check |= (1<<index); |
| CHECK(mp->ma_values->values[index] != NULL); |
| } |
| } |
| UNLOCK_KEYS_IF_SPLIT(keys, keys->dk_kind); |
| } |
| return 1; |
| |
| #undef CHECK |
| } |
| |
| |
| static PyDictKeysObject* |
| new_keys_object(PyInterpreterState *interp, uint8_t log2_size, bool unicode) |
| { |
| Py_ssize_t usable; |
| int log2_bytes; |
| size_t entry_size = unicode ? sizeof(PyDictUnicodeEntry) : sizeof(PyDictKeyEntry); |
| |
| assert(log2_size >= PyDict_LOG_MINSIZE); |
| |
| usable = USABLE_FRACTION((size_t)1<<log2_size); |
| if (log2_size < 8) { |
| log2_bytes = log2_size; |
| } |
| else if (log2_size < 16) { |
| log2_bytes = log2_size + 1; |
| } |
| #if SIZEOF_VOID_P > 4 |
| else if (log2_size >= 32) { |
| log2_bytes = log2_size + 3; |
| } |
| #endif |
| else { |
| log2_bytes = log2_size + 2; |
| } |
| |
| PyDictKeysObject *dk = NULL; |
| if (log2_size == PyDict_LOG_MINSIZE && unicode) { |
| dk = _Py_FREELIST_POP_MEM(dictkeys); |
| } |
| if (dk == NULL) { |
| dk = PyMem_Malloc(sizeof(PyDictKeysObject) |
| + ((size_t)1 << log2_bytes) |
| + entry_size * usable); |
| if (dk == NULL) { |
| PyErr_NoMemory(); |
| return NULL; |
| } |
| } |
| #ifdef Py_REF_DEBUG |
| _Py_IncRefTotal(_PyThreadState_GET()); |
| #endif |
| dk->dk_refcnt = 1; |
| dk->dk_log2_size = log2_size; |
| dk->dk_log2_index_bytes = log2_bytes; |
| dk->dk_kind = unicode ? DICT_KEYS_UNICODE : DICT_KEYS_GENERAL; |
| #ifdef Py_GIL_DISABLED |
| dk->dk_mutex = (PyMutex){0}; |
| #endif |
| dk->dk_nentries = 0; |
| dk->dk_usable = usable; |
| dk->dk_version = 0; |
| memset(&dk->dk_indices[0], 0xff, ((size_t)1 << log2_bytes)); |
| memset(&dk->dk_indices[(size_t)1 << log2_bytes], 0, entry_size * usable); |
| return dk; |
| } |
| |
| static void |
| free_keys_object(PyDictKeysObject *keys, bool use_qsbr) |
| { |
| #ifdef Py_GIL_DISABLED |
| if (use_qsbr) { |
| _PyMem_FreeDelayed(keys); |
| return; |
| } |
| #endif |
| if (DK_LOG_SIZE(keys) == PyDict_LOG_MINSIZE && keys->dk_kind == DICT_KEYS_UNICODE) { |
| _Py_FREELIST_FREE(dictkeys, keys, PyMem_Free); |
| } |
| else { |
| PyMem_Free(keys); |
| } |
| } |
| |
| static size_t |
| values_size_from_count(size_t count) |
| { |
| assert(count >= 1); |
| size_t suffix_size = _Py_SIZE_ROUND_UP(count, sizeof(PyObject *)); |
| assert(suffix_size < 128); |
| assert(suffix_size % sizeof(PyObject *) == 0); |
| return (count + 1) * sizeof(PyObject *) + suffix_size; |
| } |
| |
| #define CACHED_KEYS(tp) (((PyHeapTypeObject*)tp)->ht_cached_keys) |
| |
| static inline PyDictValues* |
| new_values(size_t size) |
| { |
| size_t n = values_size_from_count(size); |
| PyDictValues *res = (PyDictValues *)PyMem_Malloc(n); |
| if (res == NULL) { |
| return NULL; |
| } |
| res->embedded = 0; |
| res->size = 0; |
| assert(size < 256); |
| res->capacity = (uint8_t)size; |
| return res; |
| } |
| |
| static inline void |
| free_values(PyDictValues *values, bool use_qsbr) |
| { |
| assert(values->embedded == 0); |
| #ifdef Py_GIL_DISABLED |
| if (use_qsbr) { |
| _PyMem_FreeDelayed(values); |
| return; |
| } |
| #endif |
| PyMem_Free(values); |
| } |
| |
| /* Consumes a reference to the keys object */ |
| static PyObject * |
| new_dict(PyInterpreterState *interp, |
| PyDictKeysObject *keys, PyDictValues *values, |
| Py_ssize_t used, int free_values_on_failure) |
| { |
| assert(keys != NULL); |
| PyDictObject *mp = _Py_FREELIST_POP(PyDictObject, dicts); |
| if (mp == NULL) { |
| mp = PyObject_GC_New(PyDictObject, &PyDict_Type); |
| if (mp == NULL) { |
| dictkeys_decref(interp, keys, false); |
| if (free_values_on_failure) { |
| free_values(values, false); |
| } |
| return NULL; |
| } |
| } |
| assert(Py_IS_TYPE(mp, &PyDict_Type)); |
| mp->ma_keys = keys; |
| mp->ma_values = values; |
| mp->ma_used = used; |
| mp->_ma_watcher_tag = 0; |
| ASSERT_CONSISTENT(mp); |
| return (PyObject *)mp; |
| } |
| |
| static PyObject * |
| new_dict_with_shared_keys(PyInterpreterState *interp, PyDictKeysObject *keys) |
| { |
| size_t size = shared_keys_usable_size(keys); |
| PyDictValues *values = new_values(size); |
| if (values == NULL) { |
| return PyErr_NoMemory(); |
| } |
| dictkeys_incref(keys); |
| for (size_t i = 0; i < size; i++) { |
| values->values[i] = NULL; |
| } |
| return new_dict(interp, keys, values, 0, 1); |
| } |
| |
| |
| static PyDictKeysObject * |
| clone_combined_dict_keys(PyDictObject *orig) |
| { |
| assert(PyDict_Check(orig)); |
| assert(Py_TYPE(orig)->tp_iter == dict_iter); |
| assert(orig->ma_values == NULL); |
| assert(orig->ma_keys != Py_EMPTY_KEYS); |
| assert(orig->ma_keys->dk_refcnt == 1); |
| |
| ASSERT_DICT_LOCKED(orig); |
| |
| size_t keys_size = _PyDict_KeysSize(orig->ma_keys); |
| PyDictKeysObject *keys = PyMem_Malloc(keys_size); |
| if (keys == NULL) { |
| PyErr_NoMemory(); |
| return NULL; |
| } |
| |
| memcpy(keys, orig->ma_keys, keys_size); |
| |
| /* After copying key/value pairs, we need to incref all |
| keys and values and they are about to be co-owned by a |
| new dict object. */ |
| PyObject **pkey, **pvalue; |
| size_t offs; |
| if (DK_IS_UNICODE(orig->ma_keys)) { |
| PyDictUnicodeEntry *ep0 = DK_UNICODE_ENTRIES(keys); |
| pkey = &ep0->me_key; |
| pvalue = &ep0->me_value; |
| offs = sizeof(PyDictUnicodeEntry) / sizeof(PyObject*); |
| } |
| else { |
| PyDictKeyEntry *ep0 = DK_ENTRIES(keys); |
| pkey = &ep0->me_key; |
| pvalue = &ep0->me_value; |
| offs = sizeof(PyDictKeyEntry) / sizeof(PyObject*); |
| } |
| |
| Py_ssize_t n = keys->dk_nentries; |
| for (Py_ssize_t i = 0; i < n; i++) { |
| PyObject *value = *pvalue; |
| if (value != NULL) { |
| Py_INCREF(value); |
| Py_INCREF(*pkey); |
| } |
| pvalue += offs; |
| pkey += offs; |
| } |
| |
| /* Since we copied the keys table we now have an extra reference |
| in the system. Manually call increment _Py_RefTotal to signal that |
| we have it now; calling dictkeys_incref would be an error as |
| keys->dk_refcnt is already set to 1 (after memcpy). */ |
| #ifdef Py_REF_DEBUG |
| _Py_IncRefTotal(_PyThreadState_GET()); |
| #endif |
| return keys; |
| } |
| |
| PyObject * |
| PyDict_New(void) |
| { |
| PyInterpreterState *interp = _PyInterpreterState_GET(); |
| /* We don't incref Py_EMPTY_KEYS here because it is immortal. */ |
| return new_dict(interp, Py_EMPTY_KEYS, NULL, 0, 0); |
| } |
| |
| /* Search index of hash table from offset of entry table */ |
| static Py_ssize_t |
| lookdict_index(PyDictKeysObject *k, Py_hash_t hash, Py_ssize_t index) |
| { |
| size_t mask = DK_MASK(k); |
| size_t perturb = (size_t)hash; |
| size_t i = (size_t)hash & mask; |
| |
| for (;;) { |
| Py_ssize_t ix = dictkeys_get_index(k, i); |
| if (ix == index) { |
| return i; |
| } |
| if (ix == DKIX_EMPTY) { |
| return DKIX_EMPTY; |
| } |
| perturb >>= PERTURB_SHIFT; |
| i = mask & (i*5 + perturb + 1); |
| } |
| Py_UNREACHABLE(); |
| } |
| |
| static inline Py_ALWAYS_INLINE Py_ssize_t |
| do_lookup(PyDictObject *mp, PyDictKeysObject *dk, PyObject *key, Py_hash_t hash, |
| int (*check_lookup)(PyDictObject *, PyDictKeysObject *, void *, Py_ssize_t ix, PyObject *key, Py_hash_t)) |
| { |
| void *ep0 = _DK_ENTRIES(dk); |
| size_t mask = DK_MASK(dk); |
| size_t perturb = hash; |
| size_t i = (size_t)hash & mask; |
| Py_ssize_t ix; |
| for (;;) { |
| ix = dictkeys_get_index(dk, i); |
| if (ix >= 0) { |
| int cmp = check_lookup(mp, dk, ep0, ix, key, hash); |
| if (cmp < 0) { |
| return cmp; |
| } else if (cmp) { |
| return ix; |
| } |
| } |
| else if (ix == DKIX_EMPTY) { |
| return DKIX_EMPTY; |
| } |
| perturb >>= PERTURB_SHIFT; |
| i = mask & (i*5 + perturb + 1); |
| |
| // Manual loop unrolling |
| ix = dictkeys_get_index(dk, i); |
| if (ix >= 0) { |
| int cmp = check_lookup(mp, dk, ep0, ix, key, hash); |
| if (cmp < 0) { |
| return cmp; |
| } else if (cmp) { |
| return ix; |
| } |
| } |
| else if (ix == DKIX_EMPTY) { |
| return DKIX_EMPTY; |
| } |
| perturb >>= PERTURB_SHIFT; |
| i = mask & (i*5 + perturb + 1); |
| } |
| Py_UNREACHABLE(); |
| } |
| |
| static inline int |
| compare_unicode_generic(PyDictObject *mp, PyDictKeysObject *dk, |
| void *ep0, Py_ssize_t ix, PyObject *key, Py_hash_t hash) |
| { |
| PyDictUnicodeEntry *ep = &((PyDictUnicodeEntry *)ep0)[ix]; |
| assert(ep->me_key != NULL); |
| assert(PyUnicode_CheckExact(ep->me_key)); |
| assert(!PyUnicode_CheckExact(key)); |
| |
| if (unicode_get_hash(ep->me_key) == hash) { |
| PyObject *startkey = ep->me_key; |
| Py_INCREF(startkey); |
| int cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); |
| Py_DECREF(startkey); |
| if (cmp < 0) { |
| return DKIX_ERROR; |
| } |
| if (dk == mp->ma_keys && ep->me_key == startkey) { |
| return cmp; |
| } |
| else { |
| /* The dict was mutated, restart */ |
| return DKIX_KEY_CHANGED; |
| } |
| } |
| return 0; |
| } |
| |
| // Search non-Unicode key from Unicode table |
| static Py_ssize_t |
| unicodekeys_lookup_generic(PyDictObject *mp, PyDictKeysObject* dk, PyObject *key, Py_hash_t hash) |
| { |
| return do_lookup(mp, dk, key, hash, compare_unicode_generic); |
| } |
| |
| static inline int |
| compare_unicode_unicode(PyDictObject *mp, PyDictKeysObject *dk, |
| void *ep0, Py_ssize_t ix, PyObject *key, Py_hash_t hash) |
| { |
| PyDictUnicodeEntry *ep = &((PyDictUnicodeEntry *)ep0)[ix]; |
| PyObject *ep_key = FT_ATOMIC_LOAD_PTR_RELAXED(ep->me_key); |
| assert(ep_key != NULL); |
| assert(PyUnicode_CheckExact(ep_key)); |
| if (ep_key == key || |
| (unicode_get_hash(ep_key) == hash && unicode_eq(ep_key, key))) { |
| return 1; |
| } |
| return 0; |
| } |
| |
| static Py_ssize_t _Py_HOT_FUNCTION |
| unicodekeys_lookup_unicode(PyDictKeysObject* dk, PyObject *key, Py_hash_t hash) |
| { |
| return do_lookup(NULL, dk, key, hash, compare_unicode_unicode); |
| } |
| |
| static inline int |
| compare_generic(PyDictObject *mp, PyDictKeysObject *dk, |
| void *ep0, Py_ssize_t ix, PyObject *key, Py_hash_t hash) |
| { |
| PyDictKeyEntry *ep = &((PyDictKeyEntry *)ep0)[ix]; |
| assert(ep->me_key != NULL); |
| if (ep->me_key == key) { |
| return 1; |
| } |
| if (ep->me_hash == hash) { |
| PyObject *startkey = ep->me_key; |
| Py_INCREF(startkey); |
| int cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); |
| Py_DECREF(startkey); |
| if (cmp < 0) { |
| return DKIX_ERROR; |
| } |
| if (dk == mp->ma_keys && ep->me_key == startkey) { |
| return cmp; |
| } |
| else { |
| /* The dict was mutated, restart */ |
| return DKIX_KEY_CHANGED; |
| } |
| } |
| return 0; |
| } |
| |
| static Py_ssize_t |
| dictkeys_generic_lookup(PyDictObject *mp, PyDictKeysObject* dk, PyObject *key, Py_hash_t hash) |
| { |
| return do_lookup(mp, dk, key, hash, compare_generic); |
| } |
| |
| /* Lookup a string in a (all unicode) dict keys. |
| * Returns DKIX_ERROR if key is not a string, |
| * or if the dict keys is not all strings. |
| * If the keys is present then return the index of key. |
| * If the key is not present then return DKIX_EMPTY. |
| */ |
| Py_ssize_t |
| _PyDictKeys_StringLookup(PyDictKeysObject* dk, PyObject *key) |
| { |
| DictKeysKind kind = dk->dk_kind; |
| if (!PyUnicode_CheckExact(key) || kind == DICT_KEYS_GENERAL) { |
| return DKIX_ERROR; |
| } |
| Py_hash_t hash = unicode_get_hash(key); |
| if (hash == -1) { |
| hash = PyUnicode_Type.tp_hash(key); |
| if (hash == -1) { |
| PyErr_Clear(); |
| return DKIX_ERROR; |
| } |
| } |
| return unicodekeys_lookup_unicode(dk, key, hash); |
| } |
| |
| #ifdef Py_GIL_DISABLED |
| |
| static Py_ssize_t |
| unicodekeys_lookup_unicode_threadsafe(PyDictKeysObject* dk, PyObject *key, |
| Py_hash_t hash); |
| |
| #endif |
| |
| /* |
| The basic lookup function used by all operations. |
| This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4. |
| Open addressing is preferred over chaining since the link overhead for |
| chaining would be substantial (100% with typical malloc overhead). |
| |
| The initial probe index is computed as hash mod the table size. Subsequent |
| probe indices are computed as explained earlier. |
| |
| All arithmetic on hash should ignore overflow. |
| |
| _Py_dict_lookup() is general-purpose, and may return DKIX_ERROR if (and only if) a |
| comparison raises an exception. |
| When the key isn't found a DKIX_EMPTY is returned. |
| */ |
| Py_ssize_t |
| _Py_dict_lookup(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject **value_addr) |
| { |
| PyDictKeysObject *dk; |
| DictKeysKind kind; |
| Py_ssize_t ix; |
| |
| _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(mp); |
| start: |
| dk = mp->ma_keys; |
| kind = dk->dk_kind; |
| |
| if (kind != DICT_KEYS_GENERAL) { |
| if (PyUnicode_CheckExact(key)) { |
| #ifdef Py_GIL_DISABLED |
| if (kind == DICT_KEYS_SPLIT) { |
| // A split dictionaries keys can be mutated by other |
| // dictionaries but if we have a unicode key we can avoid |
| // locking the shared keys. |
| ix = unicodekeys_lookup_unicode_threadsafe(dk, key, hash); |
| if (ix == DKIX_KEY_CHANGED) { |
| LOCK_KEYS(dk); |
| ix = unicodekeys_lookup_unicode(dk, key, hash); |
| UNLOCK_KEYS(dk); |
| } |
| } |
| else { |
| ix = unicodekeys_lookup_unicode(dk, key, hash); |
| } |
| #else |
| ix = unicodekeys_lookup_unicode(dk, key, hash); |
| #endif |
| } |
| else { |
| INCREF_KEYS_FT(dk); |
| LOCK_KEYS_IF_SPLIT(dk, kind); |
| |
| ix = unicodekeys_lookup_generic(mp, dk, key, hash); |
| |
| UNLOCK_KEYS_IF_SPLIT(dk, kind); |
| DECREF_KEYS_FT(dk, IS_DICT_SHARED(mp)); |
| if (ix == DKIX_KEY_CHANGED) { |
| goto start; |
| } |
| } |
| |
| if (ix >= 0) { |
| if (kind == DICT_KEYS_SPLIT) { |
| *value_addr = mp->ma_values->values[ix]; |
| } |
| else { |
| *value_addr = DK_UNICODE_ENTRIES(dk)[ix].me_value; |
| } |
| } |
| else { |
| *value_addr = NULL; |
| } |
| } |
| else { |
| ix = dictkeys_generic_lookup(mp, dk, key, hash); |
| if (ix == DKIX_KEY_CHANGED) { |
| goto start; |
| } |
| if (ix >= 0) { |
| *value_addr = DK_ENTRIES(dk)[ix].me_value; |
| } |
| else { |
| *value_addr = NULL; |
| } |
| } |
| |
| return ix; |
| } |
| |
| #ifdef Py_GIL_DISABLED |
| static inline void |
| ensure_shared_on_read(PyDictObject *mp) |
| { |
| if (!_Py_IsOwnedByCurrentThread((PyObject *)mp) && !IS_DICT_SHARED(mp)) { |
| // The first time we access a dict from a non-owning thread we mark it |
| // as shared. This ensures that a concurrent resize operation will |
| // delay freeing the old keys or values using QSBR, which is necessary |
| // to safely allow concurrent reads without locking... |
| Py_BEGIN_CRITICAL_SECTION(mp); |
| if (!IS_DICT_SHARED(mp)) { |
| SET_DICT_SHARED(mp); |
| } |
| Py_END_CRITICAL_SECTION(); |
| } |
| } |
| #endif |
| |
| static inline void |
| ensure_shared_on_resize(PyDictObject *mp) |
| { |
| #ifdef Py_GIL_DISABLED |
| _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(mp); |
| |
| if (!_Py_IsOwnedByCurrentThread((PyObject *)mp) && !IS_DICT_SHARED(mp)) { |
| // We are writing to the dict from another thread that owns |
| // it and we haven't marked it as shared which will ensure |
| // that when we re-size ma_keys or ma_values that we will |
| // free using QSBR. We need to lock the dictionary to |
| // contend with writes from the owning thread, mark it as |
| // shared, and then we can continue with lock-free reads. |
| // Technically this is a little heavy handed, we could just |
| // free the individual old keys / old-values using qsbr |
| SET_DICT_SHARED(mp); |
| } |
| #endif |
| } |
| |
| #ifdef Py_GIL_DISABLED |
| |
| static inline Py_ALWAYS_INLINE int |
| compare_unicode_generic_threadsafe(PyDictObject *mp, PyDictKeysObject *dk, |
| void *ep0, Py_ssize_t ix, PyObject *key, Py_hash_t hash) |
| { |
| PyDictUnicodeEntry *ep = &((PyDictUnicodeEntry *)ep0)[ix]; |
| PyObject *startkey = _Py_atomic_load_ptr_relaxed(&ep->me_key); |
| assert(startkey == NULL || PyUnicode_CheckExact(ep->me_key)); |
| assert(!PyUnicode_CheckExact(key)); |
| |
| if (startkey != NULL) { |
| if (!_Py_TryIncrefCompare(&ep->me_key, startkey)) { |
| return DKIX_KEY_CHANGED; |
| } |
| |
| if (unicode_get_hash(startkey) == hash) { |
| int cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); |
| Py_DECREF(startkey); |
| if (cmp < 0) { |
| return DKIX_ERROR; |
| } |
| if (dk == _Py_atomic_load_ptr_relaxed(&mp->ma_keys) && |
| startkey == _Py_atomic_load_ptr_relaxed(&ep->me_key)) { |
| return cmp; |
| } |
| else { |
| /* The dict was mutated, restart */ |
| return DKIX_KEY_CHANGED; |
| } |
| } |
| else { |
| Py_DECREF(startkey); |
| } |
| } |
| return 0; |
| } |
| |
| // Search non-Unicode key from Unicode table |
| static Py_ssize_t |
| unicodekeys_lookup_generic_threadsafe(PyDictObject *mp, PyDictKeysObject* dk, PyObject *key, Py_hash_t hash) |
| { |
| return do_lookup(mp, dk, key, hash, compare_unicode_generic_threadsafe); |
| } |
| |
| static inline Py_ALWAYS_INLINE int |
| compare_unicode_unicode_threadsafe(PyDictObject *mp, PyDictKeysObject *dk, |
| void *ep0, Py_ssize_t ix, PyObject *key, Py_hash_t hash) |
| { |
| PyDictUnicodeEntry *ep = &((PyDictUnicodeEntry *)ep0)[ix]; |
| PyObject *startkey = _Py_atomic_load_ptr_relaxed(&ep->me_key); |
| assert(startkey == NULL || PyUnicode_CheckExact(startkey)); |
| if (startkey == key) { |
| return 1; |
| } |
| if (startkey != NULL) { |
| if (_Py_IsImmortal(startkey)) { |
| return unicode_get_hash(startkey) == hash && unicode_eq(startkey, key); |
| } |
| else { |
| if (!_Py_TryIncrefCompare(&ep->me_key, startkey)) { |
| return DKIX_KEY_CHANGED; |
| } |
| if (unicode_get_hash(startkey) == hash && unicode_eq(startkey, key)) { |
| Py_DECREF(startkey); |
| return 1; |
| } |
| Py_DECREF(startkey); |
| } |
| } |
| return 0; |
| } |
| |
| static Py_ssize_t _Py_HOT_FUNCTION |
| unicodekeys_lookup_unicode_threadsafe(PyDictKeysObject* dk, PyObject *key, Py_hash_t hash) |
| { |
| return do_lookup(NULL, dk, key, hash, compare_unicode_unicode_threadsafe); |
| } |
| |
| static inline Py_ALWAYS_INLINE int |
| compare_generic_threadsafe(PyDictObject *mp, PyDictKeysObject *dk, |
| void *ep0, Py_ssize_t ix, PyObject *key, Py_hash_t hash) |
| { |
| PyDictKeyEntry *ep = &((PyDictKeyEntry *)ep0)[ix]; |
| PyObject *startkey = _Py_atomic_load_ptr_relaxed(&ep->me_key); |
| if (startkey == key) { |
| return 1; |
| } |
| Py_ssize_t ep_hash = _Py_atomic_load_ssize_relaxed(&ep->me_hash); |
| if (ep_hash == hash) { |
| if (startkey == NULL || !_Py_TryIncrefCompare(&ep->me_key, startkey)) { |
| return DKIX_KEY_CHANGED; |
| } |
| int cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); |
| Py_DECREF(startkey); |
| if (cmp < 0) { |
| return DKIX_ERROR; |
| } |
| if (dk == _Py_atomic_load_ptr_relaxed(&mp->ma_keys) && |
| startkey == _Py_atomic_load_ptr_relaxed(&ep->me_key)) { |
| return cmp; |
| } |
| else { |
| /* The dict was mutated, restart */ |
| return DKIX_KEY_CHANGED; |
| } |
| } |
| return 0; |
| } |
| |
| static Py_ssize_t |
| dictkeys_generic_lookup_threadsafe(PyDictObject *mp, PyDictKeysObject* dk, PyObject *key, Py_hash_t hash) |
| { |
| return do_lookup(mp, dk, key, hash, compare_generic_threadsafe); |
| } |
| |
| Py_ssize_t |
| _Py_dict_lookup_threadsafe(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject **value_addr) |
| { |
| PyDictKeysObject *dk; |
| DictKeysKind kind; |
| Py_ssize_t ix; |
| PyObject *value; |
| |
| ensure_shared_on_read(mp); |
| |
| dk = _Py_atomic_load_ptr(&mp->ma_keys); |
| kind = dk->dk_kind; |
| |
| if (kind != DICT_KEYS_GENERAL) { |
| if (PyUnicode_CheckExact(key)) { |
| ix = unicodekeys_lookup_unicode_threadsafe(dk, key, hash); |
| } |
| else { |
| ix = unicodekeys_lookup_generic_threadsafe(mp, dk, key, hash); |
| } |
| if (ix == DKIX_KEY_CHANGED) { |
| goto read_failed; |
| } |
| |
| if (ix >= 0) { |
| if (kind == DICT_KEYS_SPLIT) { |
| PyDictValues *values = _Py_atomic_load_ptr(&mp->ma_values); |
| if (values == NULL) |
| goto read_failed; |
| |
| uint8_t capacity = _Py_atomic_load_uint8_relaxed(&values->capacity); |
| if (ix >= (Py_ssize_t)capacity) |
| goto read_failed; |
| |
| value = _Py_TryXGetRef(&values->values[ix]); |
| if (value == NULL) |
| goto read_failed; |
| |
| if (values != _Py_atomic_load_ptr(&mp->ma_values)) { |
| Py_DECREF(value); |
| goto read_failed; |
| } |
| } |
| else { |
| value = _Py_TryXGetRef(&DK_UNICODE_ENTRIES(dk)[ix].me_value); |
| if (value == NULL) { |
| goto read_failed; |
| } |
| |
| if (dk != _Py_atomic_load_ptr(&mp->ma_keys)) { |
| Py_DECREF(value); |
| goto read_failed; |
| } |
| } |
| } |
| else { |
| value = NULL; |
| } |
| } |
| else { |
| ix = dictkeys_generic_lookup_threadsafe(mp, dk, key, hash); |
| if (ix == DKIX_KEY_CHANGED) { |
| goto read_failed; |
| } |
| if (ix >= 0) { |
| value = _Py_TryXGetRef(&DK_ENTRIES(dk)[ix].me_value); |
| if (value == NULL) |
| goto read_failed; |
| |
| if (dk != _Py_atomic_load_ptr(&mp->ma_keys)) { |
| Py_DECREF(value); |
| goto read_failed; |
| } |
| } |
| else { |
| value = NULL; |
| } |
| } |
| |
| *value_addr = value; |
| return ix; |
| |
| read_failed: |
| // In addition to the normal races of the dict being modified the _Py_TryXGetRef |
| // can all fail if they don't yet have a shared ref count. That can happen here |
| // or in the *_lookup_* helper. In that case we need to take the lock to avoid |
| // mutation and do a normal incref which will make them shared. |
| Py_BEGIN_CRITICAL_SECTION(mp); |
| ix = _Py_dict_lookup(mp, key, hash, &value); |
| *value_addr = value; |
| if (value != NULL) { |
| assert(ix >= 0); |
| _Py_NewRefWithLock(value); |
| } |
| Py_END_CRITICAL_SECTION(); |
| return ix; |
| } |
| |
| Py_ssize_t |
| _Py_dict_lookup_threadsafe_stackref(PyDictObject *mp, PyObject *key, Py_hash_t hash, _PyStackRef *value_addr) |
| { |
| PyDictKeysObject *dk = _Py_atomic_load_ptr(&mp->ma_keys); |
| if (dk->dk_kind == DICT_KEYS_UNICODE && PyUnicode_CheckExact(key)) { |
| Py_ssize_t ix = unicodekeys_lookup_unicode_threadsafe(dk, key, hash); |
| if (ix == DKIX_EMPTY) { |
| *value_addr = PyStackRef_NULL; |
| return ix; |
| } |
| else if (ix >= 0) { |
| PyObject **addr_of_value = &DK_UNICODE_ENTRIES(dk)[ix].me_value; |
| PyObject *value = _Py_atomic_load_ptr(addr_of_value); |
| if (value == NULL) { |
| *value_addr = PyStackRef_NULL; |
| return DKIX_EMPTY; |
| } |
| if (_Py_IsImmortal(value) || _PyObject_HasDeferredRefcount(value)) { |
| *value_addr = (_PyStackRef){ .bits = (uintptr_t)value | Py_TAG_DEFERRED }; |
| return ix; |
| } |
| if (_Py_TryIncrefCompare(addr_of_value, value)) { |
| *value_addr = PyStackRef_FromPyObjectSteal(value); |
| return ix; |
| } |
| } |
| } |
| |
| PyObject *obj; |
| Py_ssize_t ix = _Py_dict_lookup_threadsafe(mp, key, hash, &obj); |
| if (ix >= 0 && obj != NULL) { |
| *value_addr = PyStackRef_FromPyObjectSteal(obj); |
| } |
| else { |
| *value_addr = PyStackRef_NULL; |
| } |
| return ix; |
| } |
| |
| #else // Py_GIL_DISABLED |
| |
| Py_ssize_t |
| _Py_dict_lookup_threadsafe(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject **value_addr) |
| { |
| Py_ssize_t ix = _Py_dict_lookup(mp, key, hash, value_addr); |
| Py_XNewRef(*value_addr); |
| return ix; |
| } |
| |
| Py_ssize_t |
| _Py_dict_lookup_threadsafe_stackref(PyDictObject *mp, PyObject *key, Py_hash_t hash, _PyStackRef *value_addr) |
| { |
| PyObject *val; |
| Py_ssize_t ix = _Py_dict_lookup(mp, key, hash, &val); |
| if (val == NULL) { |
| *value_addr = PyStackRef_NULL; |
| } |
| else { |
| *value_addr = PyStackRef_FromPyObjectNew(val); |
| } |
| return ix; |
| } |
| |
| #endif |
| |
| int |
| _PyDict_HasOnlyStringKeys(PyObject *dict) |
| { |
| Py_ssize_t pos = 0; |
| PyObject *key, *value; |
| assert(PyDict_Check(dict)); |
| /* Shortcut */ |
| if (((PyDictObject *)dict)->ma_keys->dk_kind != DICT_KEYS_GENERAL) |
| return 1; |
| while (PyDict_Next(dict, &pos, &key, &value)) |
| if (!PyUnicode_Check(key)) |
| return 0; |
| return 1; |
| } |
| |
| #define MAINTAIN_TRACKING(mp, key, value) \ |
| do { \ |
| if (!_PyObject_GC_IS_TRACKED(mp)) { \ |
| if (_PyObject_GC_MAY_BE_TRACKED(key) || \ |
| _PyObject_GC_MAY_BE_TRACKED(value)) { \ |
| _PyObject_GC_TRACK(mp); \ |
| } \ |
| } \ |
| } while(0) |
| |
| void |
| _PyDict_MaybeUntrack(PyObject *op) |
| { |
| PyDictObject *mp; |
| PyObject *value; |
| Py_ssize_t i, numentries; |
| |
| ASSERT_WORLD_STOPPED_OR_DICT_LOCKED(op); |
| |
| if (!PyDict_CheckExact(op) || !_PyObject_GC_IS_TRACKED(op)) |
| return; |
| |
| mp = (PyDictObject *) op; |
| ASSERT_CONSISTENT(mp); |
| numentries = mp->ma_keys->dk_nentries; |
| if (_PyDict_HasSplitTable(mp)) { |
| for (i = 0; i < numentries; i++) { |
| if ((value = mp->ma_values->values[i]) == NULL) |
| continue; |
| if (_PyObject_GC_MAY_BE_TRACKED(value)) { |
| return; |
| } |
| } |
| } |
| else { |
| if (DK_IS_UNICODE(mp->ma_keys)) { |
| PyDictUnicodeEntry *ep0 = DK_UNICODE_ENTRIES(mp->ma_keys); |
| for (i = 0; i < numentries; i++) { |
| if ((value = ep0[i].me_value) == NULL) |
| continue; |
| if (_PyObject_GC_MAY_BE_TRACKED(value)) |
| return; |
| } |
| } |
| else { |
| PyDictKeyEntry *ep0 = DK_ENTRIES(mp->ma_keys); |
| for (i = 0; i < numentries; i++) { |
| if ((value = ep0[i].me_value) == NULL) |
| continue; |
| if (_PyObject_GC_MAY_BE_TRACKED(value) || |
| _PyObject_GC_MAY_BE_TRACKED(ep0[i].me_key)) |
| return; |
| } |
| } |
| } |
| _PyObject_GC_UNTRACK(op); |
| } |
| |
| void |
| _PyDict_EnablePerThreadRefcounting(PyObject *op) |
| { |
| assert(PyDict_Check(op)); |
| #ifdef Py_GIL_DISABLED |
| Py_ssize_t id = _PyObject_AssignUniqueId(op); |
| if ((uint64_t)id >= (uint64_t)DICT_UNIQUE_ID_MAX) { |
| _PyObject_ReleaseUniqueId(id); |
| return; |
| } |
| |
| PyDictObject *mp = (PyDictObject *)op; |
| assert((mp->_ma_watcher_tag >> DICT_UNIQUE_ID_SHIFT) == 0); |
| // Plus 1 so that _ma_watcher_tag=0 represents an unassigned id |
| mp->_ma_watcher_tag += ((uint64_t)id + 1) << DICT_UNIQUE_ID_SHIFT; |
| #endif |
| } |
| |
| static inline int |
| is_unusable_slot(Py_ssize_t ix) |
| { |
| #ifdef Py_GIL_DISABLED |
| return ix >= 0 || ix == DKIX_DUMMY; |
| #else |
| return ix >= 0; |
| #endif |
| } |
| |
| /* Internal function to find slot for an item from its hash |
| when it is known that the key is not present in the dict. |
| */ |
| static Py_ssize_t |
| find_empty_slot(PyDictKeysObject *keys, Py_hash_t hash) |
| { |
| assert(keys != NULL); |
| |
| const size_t mask = DK_MASK(keys); |
| size_t i = hash & mask; |
| Py_ssize_t ix = dictkeys_get_index(keys, i); |
| for (size_t perturb = hash; is_unusable_slot(ix);) { |
| perturb >>= PERTURB_SHIFT; |
| i = (i*5 + perturb + 1) & mask; |
| ix = dictkeys_get_index(keys, i); |
| } |
| return i; |
| } |
| |
| static int |
| insertion_resize(PyInterpreterState *interp, PyDictObject *mp, int unicode) |
| { |
| return dictresize(interp, mp, calculate_log2_keysize(GROWTH_RATE(mp)), unicode); |
| } |
| |
| static inline int |
| insert_combined_dict(PyInterpreterState *interp, PyDictObject *mp, |
| Py_hash_t hash, PyObject *key, PyObject *value) |
| { |
| if (mp->ma_keys->dk_usable <= 0) { |
| /* Need to resize. */ |
| if (insertion_resize(interp, mp, 1) < 0) { |
| return -1; |
| } |
| } |
| |
| _PyDict_NotifyEvent(interp, PyDict_EVENT_ADDED, mp, key, value); |
| mp->ma_keys->dk_version = 0; |
| |
| Py_ssize_t hashpos = find_empty_slot(mp->ma_keys, hash); |
| dictkeys_set_index(mp->ma_keys, hashpos, mp->ma_keys->dk_nentries); |
| |
| if (DK_IS_UNICODE(mp->ma_keys)) { |
| PyDictUnicodeEntry *ep; |
| ep = &DK_UNICODE_ENTRIES(mp->ma_keys)[mp->ma_keys->dk_nentries]; |
| STORE_KEY(ep, key); |
| STORE_VALUE(ep, value); |
| } |
| else { |
| PyDictKeyEntry *ep; |
| ep = &DK_ENTRIES(mp->ma_keys)[mp->ma_keys->dk_nentries]; |
| STORE_KEY(ep, key); |
| STORE_VALUE(ep, value); |
| STORE_HASH(ep, hash); |
| } |
| STORE_KEYS_USABLE(mp->ma_keys, mp->ma_keys->dk_usable - 1); |
| STORE_KEYS_NENTRIES(mp->ma_keys, mp->ma_keys->dk_nentries + 1); |
| assert(mp->ma_keys->dk_usable >= 0); |
| return 0; |
| } |
| |
| static Py_ssize_t |
| insert_split_key(PyDictKeysObject *keys, PyObject *key, Py_hash_t hash) |
| { |
| assert(PyUnicode_CheckExact(key)); |
| Py_ssize_t ix; |
| |
| |
| #ifdef Py_GIL_DISABLED |
| ix = unicodekeys_lookup_unicode_threadsafe(keys, key, hash); |
| if (ix >= 0) { |
| return ix; |
| } |
| #endif |
| |
| LOCK_KEYS(keys); |
| ix = unicodekeys_lookup_unicode(keys, key, hash); |
| if (ix == DKIX_EMPTY && keys->dk_usable > 0) { |
| // Insert into new slot |
| keys->dk_version = 0; |
| Py_ssize_t hashpos = find_empty_slot(keys, hash); |
| ix = keys->dk_nentries; |
| dictkeys_set_index(keys, hashpos, ix); |
| PyDictUnicodeEntry *ep = &DK_UNICODE_ENTRIES(keys)[ix]; |
| STORE_SHARED_KEY(ep->me_key, Py_NewRef(key)); |
| split_keys_entry_added(keys); |
| } |
| assert (ix < SHARED_KEYS_MAX_SIZE); |
| UNLOCK_KEYS(keys); |
| return ix; |
| } |
| |
| static void |
| insert_split_value(PyInterpreterState *interp, PyDictObject *mp, PyObject *key, PyObject *value, Py_ssize_t ix) |
| { |
| assert(PyUnicode_CheckExact(key)); |
| ASSERT_DICT_LOCKED(mp); |
| MAINTAIN_TRACKING(mp, key, value); |
| PyObject *old_value = mp->ma_values->values[ix]; |
| if (old_value == NULL) { |
| _PyDict_NotifyEvent(interp, PyDict_EVENT_ADDED, mp, key, value); |
| STORE_SPLIT_VALUE(mp, ix, Py_NewRef(value)); |
| _PyDictValues_AddToInsertionOrder(mp->ma_values, ix); |
| STORE_USED(mp, mp->ma_used + 1); |
| } |
| else { |
| _PyDict_NotifyEvent(interp, PyDict_EVENT_MODIFIED, mp, key, value); |
| STORE_SPLIT_VALUE(mp, ix, Py_NewRef(value)); |
| // old_value should be DECREFed after GC track checking is done, if not, it could raise a segmentation fault, |
| // when dict only holds the strong reference to value in ep->me_value. |
| Py_DECREF(old_value); |
| } |
| ASSERT_CONSISTENT(mp); |
| } |
| |
| /* |
| Internal routine to insert a new item into the table. |
| Used both by the internal resize routine and by the public insert routine. |
| Returns -1 if an error occurred, or 0 on success. |
| Consumes key and value references. |
| */ |
| static int |
| insertdict(PyInterpreterState *interp, PyDictObject *mp, |
| PyObject *key, Py_hash_t hash, PyObject *value) |
| { |
| PyObject *old_value; |
| |
| ASSERT_DICT_LOCKED(mp); |
| |
| if (DK_IS_UNICODE(mp->ma_keys) && !PyUnicode_CheckExact(key)) { |
| if (insertion_resize(interp, mp, 0) < 0) |
| goto Fail; |
| assert(mp->ma_keys->dk_kind == DICT_KEYS_GENERAL); |
| } |
| |
| if (_PyDict_HasSplitTable(mp)) { |
| Py_ssize_t ix = insert_split_key(mp->ma_keys, key, hash); |
| if (ix != DKIX_EMPTY) { |
| insert_split_value(interp, mp, key, value, ix); |
| Py_DECREF(key); |
| Py_DECREF(value); |
| return 0; |
| } |
| |
| /* No space in shared keys. Resize and continue below. */ |
| if (insertion_resize(interp, mp, 1) < 0) { |
| goto Fail; |
| } |
| } |
| |
| Py_ssize_t ix = _Py_dict_lookup(mp, key, hash, &old_value); |
| if (ix == DKIX_ERROR) |
| goto Fail; |
| |
| MAINTAIN_TRACKING(mp, key, value); |
| |
| if (ix == DKIX_EMPTY) { |
| assert(!_PyDict_HasSplitTable(mp)); |
| /* Insert into new slot. */ |
| assert(old_value == NULL); |
| if (insert_combined_dict(interp, mp, hash, key, value) < 0) { |
| goto Fail; |
| } |
| STORE_USED(mp, mp->ma_used + 1); |
| ASSERT_CONSISTENT(mp); |
| return 0; |
| } |
| |
| if (old_value != value) { |
| _PyDict_NotifyEvent(interp, PyDict_EVENT_MODIFIED, mp, key, value); |
| assert(old_value != NULL); |
| assert(!_PyDict_HasSplitTable(mp)); |
| if (DK_IS_UNICODE(mp->ma_keys)) { |
| PyDictUnicodeEntry *ep = &DK_UNICODE_ENTRIES(mp->ma_keys)[ix]; |
| STORE_VALUE(ep, value); |
| } |
| else { |
| PyDictKeyEntry *ep = &DK_ENTRIES(mp->ma_keys)[ix]; |
| STORE_VALUE(ep, value); |
| } |
| } |
| Py_XDECREF(old_value); /* which **CAN** re-enter (see issue #22653) */ |
| ASSERT_CONSISTENT(mp); |
| Py_DECREF(key); |
| return 0; |
| |
| Fail: |
| Py_DECREF(value); |
| Py_DECREF(key); |
| return -1; |
| } |
| |
| // Same as insertdict but specialized for ma_keys == Py_EMPTY_KEYS. |
| // Consumes key and value references. |
| static int |
| insert_to_emptydict(PyInterpreterState *interp, PyDictObject *mp, |
| PyObject *key, Py_hash_t hash, PyObject *value) |
| { |
| assert(mp->ma_keys == Py_EMPTY_KEYS); |
| ASSERT_DICT_LOCKED(mp); |
| |
| int unicode = PyUnicode_CheckExact(key); |
| PyDictKeysObject *newkeys = new_keys_object( |
| interp, PyDict_LOG_MINSIZE, unicode); |
| if (newkeys == NULL) { |
| Py_DECREF(key); |
| Py_DECREF(value); |
| return -1; |
| } |
| _PyDict_NotifyEvent(interp, PyDict_EVENT_ADDED, mp, key, value); |
| |
| /* We don't decref Py_EMPTY_KEYS here because it is immortal. */ |
| assert(mp->ma_values == NULL); |
| |
| MAINTAIN_TRACKING(mp, key, value); |
| |
| size_t hashpos = (size_t)hash & (PyDict_MINSIZE-1); |
| dictkeys_set_index(newkeys, hashpos, 0); |
| if (unicode) { |
| PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(newkeys); |
| ep->me_key = key; |
| STORE_VALUE(ep, value); |
| } |
| else { |
| PyDictKeyEntry *ep = DK_ENTRIES(newkeys); |
| ep->me_key = key; |
| ep->me_hash = hash; |
| STORE_VALUE(ep, value); |
| } |
| STORE_USED(mp, mp->ma_used + 1); |
| newkeys->dk_usable--; |
| newkeys->dk_nentries++; |
| // We store the keys last so no one can see them in a partially inconsistent |
| // state so that we don't need to switch the keys to being shared yet for |
| // the case where we're inserting from the non-owner thread. We don't use |
| // set_keys here because the transition from empty to non-empty is safe |
| // as the empty keys will never be freed. |
| FT_ATOMIC_STORE_PTR_RELEASE(mp->ma_keys, newkeys); |
| return 0; |
| } |
| |
| /* |
| Internal routine used by dictresize() to build a hashtable of entries. |
| */ |
| static void |
| build_indices_generic(PyDictKeysObject *keys, PyDictKeyEntry *ep, Py_ssize_t n) |
| { |
| size_t mask = DK_MASK(keys); |
| for (Py_ssize_t ix = 0; ix != n; ix++, ep++) { |
| Py_hash_t hash = ep->me_hash; |
| size_t i = hash & mask; |
| for (size_t perturb = hash; dictkeys_get_index(keys, i) != DKIX_EMPTY;) { |
| perturb >>= PERTURB_SHIFT; |
| i = mask & (i*5 + perturb + 1); |
| } |
| dictkeys_set_index(keys, i, ix); |
| } |
| } |
| |
| static void |
| build_indices_unicode(PyDictKeysObject *keys, PyDictUnicodeEntry *ep, Py_ssize_t n) |
| { |
| size_t mask = DK_MASK(keys); |
| for (Py_ssize_t ix = 0; ix != n; ix++, ep++) { |
| Py_hash_t hash = unicode_get_hash(ep->me_key); |
| assert(hash != -1); |
| size_t i = hash & mask; |
| for (size_t perturb = hash; dictkeys_get_index(keys, i) != DKIX_EMPTY;) { |
| perturb >>= PERTURB_SHIFT; |
| i = mask & (i*5 + perturb + 1); |
| } |
| dictkeys_set_index(keys, i, ix); |
| } |
| } |
| |
| /* |
| Restructure the table by allocating a new table and reinserting all |
| items again. When entries have been deleted, the new table may |
| actually be smaller than the old one. |
| If a table is split (its keys and hashes are shared, its values are not), |
| then the values are temporarily copied into the table, it is resized as |
| a combined table, then the me_value slots in the old table are NULLed out. |
| After resizing, a table is always combined. |
| |
| This function supports: |
| - Unicode split -> Unicode combined or Generic |
| - Unicode combined -> Unicode combined or Generic |
| - Generic -> Generic |
| */ |
| static int |
| dictresize(PyInterpreterState *interp, PyDictObject *mp, |
| uint8_t log2_newsize, int unicode) |
| { |
| PyDictKeysObject *oldkeys, *newkeys; |
| PyDictValues *oldvalues; |
| |
| ASSERT_DICT_LOCKED(mp); |
| |
| if (log2_newsize >= SIZEOF_SIZE_T*8) { |
| PyErr_NoMemory(); |
| return -1; |
| } |
| assert(log2_newsize >= PyDict_LOG_MINSIZE); |
| |
| oldkeys = mp->ma_keys; |
| oldvalues = mp->ma_values; |
| |
| if (!DK_IS_UNICODE(oldkeys)) { |
| unicode = 0; |
| } |
| |
| ensure_shared_on_resize(mp); |
| /* NOTE: Current odict checks mp->ma_keys to detect resize happen. |
| * So we can't reuse oldkeys even if oldkeys->dk_size == newsize. |
| * TODO: Try reusing oldkeys when reimplement odict. |
| */ |
| |
| /* Allocate a new table. */ |
| newkeys = new_keys_object(interp, log2_newsize, unicode); |
| if (newkeys == NULL) { |
| return -1; |
| } |
| // New table must be large enough. |
| assert(newkeys->dk_usable >= mp->ma_used); |
| |
| Py_ssize_t numentries = mp->ma_used; |
| |
| if (oldvalues != NULL) { |
| LOCK_KEYS(oldkeys); |
| PyDictUnicodeEntry *oldentries = DK_UNICODE_ENTRIES(oldkeys); |
| /* Convert split table into new combined table. |
| * We must incref keys; we can transfer values. |
| */ |
| if (newkeys->dk_kind == DICT_KEYS_GENERAL) { |
| // split -> generic |
| PyDictKeyEntry *newentries = DK_ENTRIES(newkeys); |
| |
| for (Py_ssize_t i = 0; i < numentries; i++) { |
| int index = get_index_from_order(mp, i); |
| PyDictUnicodeEntry *ep = &oldentries[index]; |
| assert(oldvalues->values[index] != NULL); |
| newentries[i].me_key = Py_NewRef(ep->me_key); |
| newentries[i].me_hash = unicode_get_hash(ep->me_key); |
| newentries[i].me_value = oldvalues->values[index]; |
| } |
| build_indices_generic(newkeys, newentries, numentries); |
| } |
| else { // split -> combined unicode |
| PyDictUnicodeEntry *newentries = DK_UNICODE_ENTRIES(newkeys); |
| |
| for (Py_ssize_t i = 0; i < numentries; i++) { |
| int index = get_index_from_order(mp, i); |
| PyDictUnicodeEntry *ep = &oldentries[index]; |
| assert(oldvalues->values[index] != NULL); |
| newentries[i].me_key = Py_NewRef(ep->me_key); |
| newentries[i].me_value = oldvalues->values[index]; |
| } |
| build_indices_unicode(newkeys, newentries, numentries); |
| } |
| UNLOCK_KEYS(oldkeys); |
| set_keys(mp, newkeys); |
| dictkeys_decref(interp, oldkeys, IS_DICT_SHARED(mp)); |
| set_values(mp, NULL); |
| if (oldvalues->embedded) { |
| assert(oldvalues->embedded == 1); |
| assert(oldvalues->valid == 1); |
| FT_ATOMIC_STORE_UINT8(oldvalues->valid, 0); |
| } |
| else { |
| free_values(oldvalues, IS_DICT_SHARED(mp)); |
| } |
| } |
| else { // oldkeys is combined. |
| if (oldkeys->dk_kind == DICT_KEYS_GENERAL) { |
| // generic -> generic |
| assert(newkeys->dk_kind == DICT_KEYS_GENERAL); |
| PyDictKeyEntry *oldentries = DK_ENTRIES(oldkeys); |
| PyDictKeyEntry *newentries = DK_ENTRIES(newkeys); |
| if (oldkeys->dk_nentries == numentries) { |
| memcpy(newentries, oldentries, numentries * sizeof(PyDictKeyEntry)); |
| } |
| else { |
| PyDictKeyEntry *ep = oldentries; |
| for (Py_ssize_t i = 0; i < numentries; i++) { |
| while (ep->me_value == NULL) |
| ep++; |
| newentries[i] = *ep++; |
| } |
| } |
| build_indices_generic(newkeys, newentries, numentries); |
| } |
| else { // oldkeys is combined unicode |
| PyDictUnicodeEntry *oldentries = DK_UNICODE_ENTRIES(oldkeys); |
| if (unicode) { // combined unicode -> combined unicode |
| PyDictUnicodeEntry *newentries = DK_UNICODE_ENTRIES(newkeys); |
| if (oldkeys->dk_nentries == numentries && mp->ma_keys->dk_kind == DICT_KEYS_UNICODE) { |
| memcpy(newentries, oldentries, numentries * sizeof(PyDictUnicodeEntry)); |
| } |
| else { |
| PyDictUnicodeEntry *ep = oldentries; |
| for (Py_ssize_t i = 0; i < numentries; i++) { |
| while (ep->me_value == NULL) |
| ep++; |
| newentries[i] = *ep++; |
| } |
| } |
| build_indices_unicode(newkeys, newentries, numentries); |
| } |
| else { // combined unicode -> generic |
| PyDictKeyEntry *newentries = DK_ENTRIES(newkeys); |
| PyDictUnicodeEntry *ep = oldentries; |
| for (Py_ssize_t i = 0; i < numentries; i++) { |
| while (ep->me_value == NULL) |
| ep++; |
| newentries[i].me_key = ep->me_key; |
| newentries[i].me_hash = unicode_get_hash(ep->me_key); |
| newentries[i].me_value = ep->me_value; |
| ep++; |
| } |
| build_indices_generic(newkeys, newentries, numentries); |
| } |
| } |
| |
| set_keys(mp, newkeys); |
| |
| if (oldkeys != Py_EMPTY_KEYS) { |
| #ifdef Py_REF_DEBUG |
| _Py_DecRefTotal(_PyThreadState_GET()); |
| #endif |
| assert(oldkeys->dk_kind != DICT_KEYS_SPLIT); |
| assert(oldkeys->dk_refcnt == 1); |
| free_keys_object(oldkeys, IS_DICT_SHARED(mp)); |
| } |
| } |
| |
| STORE_KEYS_USABLE(mp->ma_keys, mp->ma_keys->dk_usable - numentries); |
| STORE_KEYS_NENTRIES(mp->ma_keys, numentries); |
| ASSERT_CONSISTENT(mp); |
| return 0; |
| } |
| |
| static PyObject * |
| dict_new_presized(PyInterpreterState *interp, Py_ssize_t minused, bool unicode) |
| { |
| const uint8_t log2_max_presize = 17; |
| const Py_ssize_t max_presize = ((Py_ssize_t)1) << log2_max_presize; |
| uint8_t log2_newsize; |
| PyDictKeysObject *new_keys; |
| |
| if (minused <= USABLE_FRACTION(PyDict_MINSIZE)) { |
| return PyDict_New(); |
| } |
| /* There are no strict guarantee that returned dict can contain minused |
| * items without resize. So we create medium size dict instead of very |
| * large dict or MemoryError. |
| */ |
| if (minused > USABLE_FRACTION(max_presize)) { |
| log2_newsize = log2_max_presize; |
| } |
| else { |
| log2_newsize = estimate_log2_keysize(minused); |
| } |
| |
| new_keys = new_keys_object(interp, log2_newsize, unicode); |
| if (new_keys == NULL) |
| return NULL; |
| return new_dict(interp, new_keys, NULL, 0, 0); |
| } |
| |
| PyObject * |
| _PyDict_NewPresized(Py_ssize_t minused) |
| { |
| PyInterpreterState *interp = _PyInterpreterState_GET(); |
| return dict_new_presized(interp, minused, false); |
| } |
| |
| PyObject * |
| _PyDict_FromItems(PyObject *const *keys, Py_ssize_t keys_offset, |
| PyObject *const *values, Py_ssize_t values_offset, |
| Py_ssize_t length) |
| { |
| bool unicode = true; |
| PyObject *const *ks = keys; |
| PyInterpreterState *interp = _PyInterpreterState_GET(); |
| |
| for (Py_ssize_t i = 0; i < length; i++) { |
| if (!PyUnicode_CheckExact(*ks)) { |
| unicode = false; |
| break; |
| } |
| ks += keys_offset; |
| } |
| |
| PyObject *dict = dict_new_presized(interp, length, unicode); |
| if (dict == NULL) { |
| return NULL; |
| } |
| |
| ks = keys; |
| PyObject *const *vs = values; |
| |
| for (Py_ssize_t i = 0; i < length; i++) { |
| PyObject *key = *ks; |
| PyObject *value = *vs; |
| if (setitem_lock_held((PyDictObject *)dict, key, value) < 0) { |
| Py_DECREF(dict); |
| return NULL; |
| } |
| ks += keys_offset; |
| vs += values_offset; |
| } |
| |
| return dict; |
| } |
| |
| /* Note that, for historical reasons, PyDict_GetItem() suppresses all errors |
| * that may occur (originally dicts supported only string keys, and exceptions |
| * weren't possible). So, while the original intent was that a NULL return |
| * meant the key wasn't present, in reality it can mean that, or that an error |
| * (suppressed) occurred while computing the key's hash, or that some error |
| * (suppressed) occurred when comparing keys in the dict's internal probe |
| * sequence. A nasty example of the latter is when a Python-coded comparison |
| * function hits a stack-depth error, which can cause this to return NULL |
| * even if the key is present. |
| */ |
| static PyObject * |
| dict_getitem(PyObject *op, PyObject *key, const char *warnmsg) |
| { |
| if (!PyDict_Check(op)) { |
| return NULL; |
| } |
| PyDictObject *mp = (PyDictObject *)op; |
| |
| Py_hash_t hash = _PyObject_HashFast(key); |
| if (hash == -1) { |
| PyErr_FormatUnraisable(warnmsg); |
| return NULL; |
| } |
| |
| PyThreadState *tstate = _PyThreadState_GET(); |
| #ifdef Py_DEBUG |
| // bpo-40839: Before Python 3.10, it was possible to call PyDict_GetItem() |
| // with the GIL released. |
| _Py_EnsureTstateNotNULL(tstate); |
| #endif |
| |
| /* Preserve the existing exception */ |
| PyObject *value; |
| Py_ssize_t ix; (void)ix; |
| |
| PyObject *exc = _PyErr_GetRaisedException(tstate); |
| #ifdef Py_GIL_DISABLED |
| ix = _Py_dict_lookup_threadsafe(mp, key, hash, &value); |
| Py_XDECREF(value); |
| #else |
| ix = _Py_dict_lookup(mp, key, hash, &value); |
| #endif |
| |
| /* Ignore any exception raised by the lookup */ |
| PyObject *exc2 = _PyErr_Occurred(tstate); |
| if (exc2 && !PyErr_GivenExceptionMatches(exc2, PyExc_KeyError)) { |
| PyErr_FormatUnraisable(warnmsg); |
| } |
| _PyErr_SetRaisedException(tstate, exc); |
| |
| assert(ix >= 0 || value == NULL); |
| return value; // borrowed reference |
| } |
| |
| PyObject * |
| PyDict_GetItem(PyObject *op, PyObject *key) |
| { |
| return dict_getitem(op, key, |
| "Exception ignored in PyDict_GetItem(); consider using " |
| "PyDict_GetItemRef() or PyDict_GetItemWithError()"); |
| } |
| |
| Py_ssize_t |
| _PyDict_LookupIndex(PyDictObject *mp, PyObject *key) |
| { |
| // TODO: Thread safety |
| PyObject *value; |
| assert(PyDict_CheckExact((PyObject*)mp)); |
| assert(PyUnicode_CheckExact(key)); |
| |
| Py_hash_t hash = _PyObject_HashFast(key); |
| if (hash == -1) { |
| return -1; |
| } |
| |
| return _Py_dict_lookup(mp, key, hash, &value); |
| } |
| |
| /* Same as PyDict_GetItemWithError() but with hash supplied by caller. |
| This returns NULL *with* an exception set if an exception occurred. |
| It returns NULL *without* an exception set if the key wasn't present. |
| */ |
| PyObject * |
| _PyDict_GetItem_KnownHash(PyObject *op, PyObject *key, Py_hash_t hash) |
| { |
| Py_ssize_t ix; (void)ix; |
| PyDictObject *mp = (PyDictObject *)op; |
| PyObject *value; |
| |
| if (!PyDict_Check(op)) { |
| PyErr_BadInternalCall(); |
| return NULL; |
| } |
| |
| #ifdef Py_GIL_DISABLED |
| ix = _Py_dict_lookup_threadsafe(mp, key, hash, &value); |
| Py_XDECREF(value); |
| #else |
| ix = _Py_dict_lookup(mp, key, hash, &value); |
| #endif |
| assert(ix >= 0 || value == NULL); |
| return value; // borrowed reference |
| } |
| |
| /* Gets an item and provides a new reference if the value is present. |
| * Returns 1 if the key is present, 0 if the key is missing, and -1 if an |
| * exception occurred. |
| */ |
| int |
| _PyDict_GetItemRef_KnownHash_LockHeld(PyDictObject *op, PyObject *key, |
| Py_hash_t hash, PyObject **result) |
| { |
| PyObject *value; |
| Py_ssize_t ix = _Py_dict_lookup(op, key, hash, &value); |
| assert(ix >= 0 || value == NULL); |
| if (ix == DKIX_ERROR) { |
| *result = NULL; |
| return -1; |
| } |
| if (value == NULL) { |
| *result = NULL; |
| return 0; // missing key |
| } |
| *result = Py_NewRef(value); |
| return 1; // key is present |
| } |
| |
| /* Gets an item and provides a new reference if the value is present. |
| * Returns 1 if the key is present, 0 if the key is missing, and -1 if an |
| * exception occurred. |
| */ |
| int |
| _PyDict_GetItemRef_KnownHash(PyDictObject *op, PyObject *key, Py_hash_t hash, PyObject **result) |
| { |
| PyObject *value; |
| #ifdef Py_GIL_DISABLED |
| Py_ssize_t ix = _Py_dict_lookup_threadsafe(op, key, hash, &value); |
| #else |
| Py_ssize_t ix = _Py_dict_lookup(op, key, hash, &value); |
| #endif |
| assert(ix >= 0 || value == NULL); |
| if (ix == DKIX_ERROR) { |
| *result = NULL; |
| return -1; |
| } |
| if (value == NULL) { |
| *result = NULL; |
| return 0; // missing key |
| } |
| #ifdef Py_GIL_DISABLED |
| *result = value; |
| #else |
| *result = Py_NewRef(value); |
| #endif |
| return 1; // key is present |
| } |
| |
| int |
| PyDict_GetItemRef(PyObject *op, PyObject *key, PyObject **result) |
| { |
| if (!PyDict_Check(op)) { |
| PyErr_BadInternalCall(); |
| *result = NULL; |
| return -1; |
| } |
| |
| Py_hash_t hash = _PyObject_HashFast(key); |
| if (hash == -1) { |
| *result = NULL; |
| return -1; |
| } |
| |
| return _PyDict_GetItemRef_KnownHash((PyDictObject *)op, key, hash, result); |
| } |
| |
| int |
| _PyDict_GetItemRef_Unicode_LockHeld(PyDictObject *op, PyObject *key, PyObject **result) |
| { |
| ASSERT_DICT_LOCKED(op); |
| assert(PyUnicode_CheckExact(key)); |
| |
| Py_hash_t hash = _PyObject_HashFast(key); |
| if (hash == -1) { |
| *result = NULL; |
| return -1; |
| } |
| |
| PyObject *value; |
| Py_ssize_t ix = _Py_dict_lookup(op, key, hash, &value); |
| assert(ix >= 0 || value == NULL); |
| if (ix == DKIX_ERROR) { |
| *result = NULL; |
| return -1; |
| } |
| if (value == NULL) { |
| *result = NULL; |
| return 0; // missing key |
| } |
| *result = Py_NewRef(value); |
| return 1; // key is present |
| } |
| |
| /* Variant of PyDict_GetItem() that doesn't suppress exceptions. |
| This returns NULL *with* an exception set if an exception occurred. |
| It returns NULL *without* an exception set if the key wasn't present. |
| */ |
| PyObject * |
| PyDict_GetItemWithError(PyObject *op, PyObject *key) |
| { |
| Py_ssize_t ix; (void)ix; |
| Py_hash_t hash; |
| PyDictObject*mp = (PyDictObject *)op; |
| PyObject *value; |
| |
| if (!PyDict_Check(op)) { |
| PyErr_BadInternalCall(); |
| return NULL; |
| } |
| hash = _PyObject_HashFast(key); |
| if (hash == -1) { |
| return NULL; |
| } |
| |
| #ifdef Py_GIL_DISABLED |
| ix = _Py_dict_lookup_threadsafe(mp, key, hash, &value); |
| Py_XDECREF(value); |
| #else |
| ix = _Py_dict_lookup(mp, key, hash, &value); |
| #endif |
| assert(ix >= 0 || value == NULL); |
| return value; // borrowed reference |
| } |
| |
| PyObject * |
| _PyDict_GetItemWithError(PyObject *dp, PyObject *kv) |
| { |
| assert(PyUnicode_CheckExact(kv)); |
| Py_hash_t hash = Py_TYPE(kv)->tp_hash(kv); |
| if (hash == -1) { |
| return NULL; |
| } |
| return _PyDict_GetItem_KnownHash(dp, kv, hash); // borrowed reference |
| } |
| |
| PyObject * |
| _PyDict_GetItemIdWithError(PyObject *dp, _Py_Identifier *key) |
| { |
| PyObject *kv; |
| kv = _PyUnicode_FromId(key); /* borrowed */ |
| if (kv == NULL) |
| return NULL; |
| Py_hash_t hash = unicode_get_hash(kv); |
| assert (hash != -1); /* interned strings have their hash value initialised */ |
| return _PyDict_GetItem_KnownHash(dp, kv, hash); // borrowed reference |
| } |
| |
| PyObject * |
| _PyDict_GetItemStringWithError(PyObject *v, const char *key) |
| { |
| PyObject *kv, *rv; |
| kv = PyUnicode_FromString(key); |
| if (kv == NULL) { |
| return NULL; |
| } |
| rv = PyDict_GetItemWithError(v, kv); |
| Py_DECREF(kv); |
| return rv; |
| } |
| |
| /* Fast version of global value lookup (LOAD_GLOBAL). |
| * Lookup in globals, then builtins. |
| * |
| * |
| * |
| * |
| * Raise an exception and return NULL if an error occurred (ex: computing the |
| * key hash failed, key comparison failed, ...). Return NULL if the key doesn't |
| * exist. Return the value if the key exists. |
| * |
| * Returns a new reference. |
| */ |
| PyObject * |
| _PyDict_LoadGlobal(PyDictObject *globals, PyDictObject *builtins, PyObject *key) |
| { |
| Py_ssize_t ix; |
| Py_hash_t hash; |
| PyObject *value; |
| |
| hash = _PyObject_HashFast(key); |
| if (hash == -1) { |
| return NULL; |
| } |
| |
| /* namespace 1: globals */ |
| ix = _Py_dict_lookup_threadsafe(globals, key, hash, &value); |
| if (ix == DKIX_ERROR) |
| return NULL; |
| if (ix != DKIX_EMPTY && value != NULL) |
| return value; |
| |
| /* namespace 2: builtins */ |
| ix = _Py_dict_lookup_threadsafe(builtins, key, hash, &value); |
| assert(ix >= 0 || value == NULL); |
| return value; |
| } |
| |
| void |
| _PyDict_LoadGlobalStackRef(PyDictObject *globals, PyDictObject *builtins, PyObject *key, _PyStackRef *res) |
| { |
| Py_ssize_t ix; |
| Py_hash_t hash; |
| |
| hash = _PyObject_HashFast(key); |
| if (hash == -1) { |
| *res = PyStackRef_NULL; |
| return; |
| } |
| |
| /* namespace 1: globals */ |
| ix = _Py_dict_lookup_threadsafe_stackref(globals, key, hash, res); |
| if (ix == DKIX_ERROR) { |
| return; |
| } |
| if (ix != DKIX_EMPTY && !PyStackRef_IsNull(*res)) { |
| return; |
| } |
| |
| /* namespace 2: builtins */ |
| ix = _Py_dict_lookup_threadsafe_stackref(builtins, key, hash, res); |
| assert(ix >= 0 || PyStackRef_IsNull(*res)); |
| } |
| |
| PyObject * |
| _PyDict_LoadBuiltinsFromGlobals(PyObject *globals) |
| { |
| if (!PyDict_Check(globals)) { |
| PyErr_BadInternalCall(); |
| return NULL; |
| } |
| |
| PyDictObject *mp = (PyDictObject *)globals; |
| PyObject *key = &_Py_ID(__builtins__); |
| Py_hash_t hash = unicode_get_hash(key); |
| |
| // Use the stackref variant to avoid reference count contention on the |
| // builtins module in the free threading build. It's important not to |
| // make any escaping calls between the lookup and the `PyStackRef_CLOSE()` |
| // because the `ref` is not visible to the GC. |
| _PyStackRef ref; |
| Py_ssize_t ix = _Py_dict_lookup_threadsafe_stackref(mp, key, hash, &ref); |
| if (ix == DKIX_ERROR) { |
| return NULL; |
| } |
| if (PyStackRef_IsNull(ref)) { |
| return Py_NewRef(PyEval_GetBuiltins()); |
| } |
| PyObject *builtins = PyStackRef_AsPyObjectBorrow(ref); |
| if (PyModule_Check(builtins)) { |
| builtins = _PyModule_GetDict(builtins); |
| assert(builtins != NULL); |
| } |
| _Py_INCREF_BUILTINS(builtins); |
| PyStackRef_CLOSE(ref); |
| return builtins; |
| } |
| |
| /* Consumes references to key and value */ |
| static int |
| setitem_take2_lock_held(PyDictObject *mp, PyObject *key, PyObject *value) |
| { |
| ASSERT_DICT_LOCKED(mp); |
| |
| assert(key); |
| assert(value); |
| assert(PyDict_Check(mp)); |
| Py_hash_t hash = _PyObject_HashFast(key); |
| if (hash == -1) { |
| Py_DECREF(key); |
| Py_DECREF(value); |
| return -1; |
| } |
| |
| PyInterpreterState *interp = _PyInterpreterState_GET(); |
| |
| if (mp->ma_keys == Py_EMPTY_KEYS) { |
| return insert_to_emptydict(interp, mp, key, hash, value); |
| } |
| /* insertdict() handles any resizing that might be necessary */ |
| return insertdict(interp, mp, key, hash, value); |
| } |
| |
| int |
| _PyDict_SetItem_Take2(PyDictObject *mp, PyObject *key, PyObject *value) |
| { |
| int res; |
| Py_BEGIN_CRITICAL_SECTION(mp); |
| res = setitem_take2_lock_held(mp, key, value); |
| Py_END_CRITICAL_SECTION(); |
| return res; |
| } |
| |
| /* CAUTION: PyDict_SetItem() must guarantee that it won't resize the |
| * dictionary if it's merely replacing the value for an existing key. |
| * This means that it's safe to loop over a dictionary with PyDict_Next() |
| * and occasionally replace a value -- but you can't insert new keys or |
| * remove them. |
| */ |
| int |
| PyDict_SetItem(PyObject *op, PyObject *key, PyObject *value) |
| { |
| if (!PyDict_Check(op)) { |
| PyErr_BadInternalCall(); |
| return -1; |
| } |
| assert(key); |
| assert(value); |
| return _PyDict_SetItem_Take2((PyDictObject *)op, |
| Py_NewRef(key), Py_NewRef(value)); |
| } |
| |
| static int |
| setitem_lock_held(PyDictObject *mp, PyObject *key, PyObject *value) |
| { |
| assert(key); |
| assert(value); |
| return setitem_take2_lock_held(mp, |
| Py_NewRef(key), Py_NewRef(value)); |
| } |
| |
| |
| int |
| _PyDict_SetItem_KnownHash_LockHeld(PyDictObject *mp, PyObject *key, PyObject *value, |
| Py_hash_t hash) |
| { |
| PyInterpreterState *interp = _PyInterpreterState_GET(); |
| if (mp->ma_keys == Py_EMPTY_KEYS) { |
| return insert_to_emptydict(interp, mp, Py_NewRef(key), hash, Py_NewRef(value)); |
| } |
| /* insertdict() handles any resizing that might be necessary */ |
| return insertdict(interp, mp, Py_NewRef(key), hash, Py_NewRef(value)); |
| } |
| |
| int |
| _PyDict_SetItem_KnownHash(PyObject *op, PyObject *key, PyObject *value, |
| Py_hash_t hash) |
| { |
| if (!PyDict_Check(op)) { |
| PyErr_BadInternalCall(); |
| return -1; |
| } |
| assert(key); |
| assert(value); |
| assert(hash != -1); |
| |
| int res; |
| Py_BEGIN_CRITICAL_SECTION(op); |
| res = _PyDict_SetItem_KnownHash_LockHeld((PyDictObject *)op, key, value, hash); |
| Py_END_CRITICAL_SECTION(); |
| return res; |
| } |
| |
| static void |
| delete_index_from_values(PyDictValues *values, Py_ssize_t ix) |
| { |
| uint8_t *array = get_insertion_order_array(values); |
| int size = values->size; |
| assert(size <= values->capacity); |
| int i; |
| for (i = 0; array[i] != ix; i++) { |
| assert(i < size); |
| } |
| assert(i < size); |
| size--; |
| for (; i < size; i++) { |
| array[i] = array[i+1]; |
| } |
| values->size = size; |
| } |
| |
| static void |
| delitem_common(PyDictObject *mp, Py_hash_t hash, Py_ssize_t ix, |
| PyObject *old_value) |
| { |
| PyObject *old_key; |
| |
| ASSERT_DICT_LOCKED(mp); |
| |
| Py_ssize_t hashpos = lookdict_index(mp->ma_keys, hash, ix); |
| assert(hashpos >= 0); |
| |
| STORE_USED(mp, mp->ma_used - 1); |
| if (_PyDict_HasSplitTable(mp)) { |
| assert(old_value == mp->ma_values->values[ix]); |
| STORE_SPLIT_VALUE(mp, ix, NULL); |
| assert(ix < SHARED_KEYS_MAX_SIZE); |
| /* Update order */ |
| delete_index_from_values(mp->ma_values, ix); |
| ASSERT_CONSISTENT(mp); |
| } |
| else { |
| mp->ma_keys->dk_version = 0; |
| dictkeys_set_index(mp->ma_keys, hashpos, DKIX_DUMMY); |
| if (DK_IS_UNICODE(mp->ma_keys)) { |
| PyDictUnicodeEntry *ep = &DK_UNICODE_ENTRIES(mp->ma_keys)[ix]; |
| old_key = ep->me_key; |
| STORE_KEY(ep, NULL); |
| STORE_VALUE(ep, NULL); |
| } |
| else { |
| PyDictKeyEntry *ep = &DK_ENTRIES(mp->ma_keys)[ix]; |
| old_key = ep->me_key; |
| STORE_KEY(ep, NULL); |
| STORE_VALUE(ep, NULL); |
| STORE_HASH(ep, 0); |
| } |
| Py_DECREF(old_key); |
| } |
| Py_DECREF(old_value); |
| |
| ASSERT_CONSISTENT(mp); |
| } |
| |
| int |
| PyDict_DelItem(PyObject *op, PyObject *key) |
| { |
| assert(key); |
| Py_hash_t hash = _PyObject_HashFast(key); |
| if (hash == -1) { |
| return -1; |
| } |
| |
| return _PyDict_DelItem_KnownHash(op, key, hash); |
| } |
| |
| static int |
| delitem_knownhash_lock_held(PyObject *op, PyObject *key, Py_hash_t hash) |
| { |
| Py_ssize_t ix; |
| PyDictObject *mp; |
| PyObject *old_value; |
| |
| if (!PyDict_Check(op)) { |
| PyErr_BadInternalCall(); |
| return -1; |
| } |
| |
| ASSERT_DICT_LOCKED(op); |
| |
| assert(key); |
| assert(hash != -1); |
| mp = (PyDictObject *)op; |
| ix = _Py_dict_lookup(mp, key, hash, &old_value); |
| if (ix == DKIX_ERROR) |
| return -1; |
| if (ix == DKIX_EMPTY || old_value == NULL) { |
| _PyErr_SetKeyError(key); |
| return -1; |
| } |
| |
| PyInterpreterState *interp = _PyInterpreterState_GET(); |
| _PyDict_NotifyEvent(interp, PyDict_EVENT_DELETED, mp, key, NULL); |
| delitem_common(mp, hash, ix, old_value); |
| return 0; |
| } |
| |
| int |
| _PyDict_DelItem_KnownHash(PyObject *op, PyObject *key, Py_hash_t hash) |
| { |
| int res; |
| Py_BEGIN_CRITICAL_SECTION(op); |
| res = delitem_knownhash_lock_held(op, key, hash); |
| Py_END_CRITICAL_SECTION(); |
| return res; |
| } |
| |
| static int |
| delitemif_lock_held(PyObject *op, PyObject *key, |
| int (*predicate)(PyObject *value, void *arg), |
| void *arg) |
| { |
| Py_ssize_t ix; |
| PyDictObject *mp; |
| Py_hash_t hash; |
| PyObject *old_value; |
| int res; |
| |
| ASSERT_DICT_LOCKED(op); |
| |
| assert(key); |
| hash = PyObject_Hash(key); |
| if (hash == -1) |
| return -1; |
| mp = (PyDictObject *)op; |
| ix = _Py_dict_lookup(mp, key, hash, &old_value); |
| if (ix == DKIX_ERROR) { |
| return -1; |
| } |
| if (ix == DKIX_EMPTY || old_value == NULL) { |
| return 0; |
| } |
| |
| res = predicate(old_value, arg); |
| if (res == -1) |
| return -1; |
| |
| if (res > 0) { |
| PyInterpreterState *interp = _PyInterpreterState_GET(); |
| _PyDict_NotifyEvent(interp, PyDict_EVENT_DELETED, mp, key, NULL); |
| delitem_common(mp, hash, ix, old_value); |
| return 1; |
| } else { |
| return 0; |
| } |
| } |
| /* This function promises that the predicate -> deletion sequence is atomic |
| * (i.e. protected by the GIL or the per-dict mutex in free threaded builds), |
| * assuming the predicate itself doesn't release the GIL (or cause re-entrancy |
| * which would release the per-dict mutex) |
| */ |
| int |
| _PyDict_DelItemIf(PyObject *op, PyObject *key, |
| int (*predicate)(PyObject *value, void *arg), |
| void *arg) |
| { |
| assert(PyDict_Check(op)); |
| int res; |
| Py_BEGIN_CRITICAL_SECTION(op); |
| res = delitemif_lock_held(op, key, predicate, arg); |
| Py_END_CRITICAL_SECTION(); |
| return res; |
| } |
| |
| static void |
| clear_lock_held(PyObject *op) |
| { |
| PyDictObject *mp; |
| PyDictKeysObject *oldkeys; |
| PyDictValues *oldvalues; |
| Py_ssize_t i, n; |
| |
| ASSERT_DICT_LOCKED(op); |
| |
| if (!PyDict_Check(op)) |
| return; |
| mp = ((PyDictObject *)op); |
| oldkeys = mp->ma_keys; |
| oldvalues = mp->ma_values; |
| if (oldkeys == Py_EMPTY_KEYS) { |
| return; |
| } |
| /* Empty the dict... */ |
| PyInterpreterState *interp = _PyInterpreterState_GET(); |
| _PyDict_NotifyEvent(interp, PyDict_EVENT_CLEARED, mp, NULL, NULL); |
| // We don't inc ref empty keys because they're immortal |
| ensure_shared_on_resize(mp); |
| STORE_USED(mp, 0); |
| if (oldvalues == NULL) { |
| set_keys(mp, Py_EMPTY_KEYS); |
| assert(oldkeys->dk_refcnt == 1); |
| dictkeys_decref(interp, oldkeys, IS_DICT_SHARED(mp)); |
| } |
| else { |
| n = oldkeys->dk_nentries; |
| for (i = 0; i < n; i++) { |
| Py_CLEAR(oldvalues->values[i]); |
| } |
| if (oldvalues->embedded) { |
| oldvalues->size = 0; |
| } |
| else { |
| set_values(mp, NULL); |
| set_keys(mp, Py_EMPTY_KEYS); |
| free_values(oldvalues, IS_DICT_SHARED(mp)); |
| dictkeys_decref(interp, oldkeys, false); |
| } |
| } |
| ASSERT_CONSISTENT(mp); |
| } |
| |
| void |
| PyDict_Clear(PyObject *op) |
| { |
| Py_BEGIN_CRITICAL_SECTION(op); |
| clear_lock_held(op); |
| Py_END_CRITICAL_SECTION(); |
| } |
| |
| /* Internal version of PyDict_Next that returns a hash value in addition |
| * to the key and value. |
| * Return 1 on success, return 0 when the reached the end of the dictionary |
| * (or if op is not a dictionary) |
| */ |
| int |
| _PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, |
| PyObject **pvalue, Py_hash_t *phash) |
| { |
| Py_ssize_t i; |
| PyDictObject *mp; |
| PyObject *key, *value; |
| Py_hash_t hash; |
| |
| if (!PyDict_Check(op)) |
| return 0; |
| |
| mp = (PyDictObject *)op; |
| i = *ppos; |
| if (_PyDict_HasSplitTable(mp)) { |
| assert(mp->ma_used <= SHARED_KEYS_MAX_SIZE); |
| if (i < 0 || i >= mp->ma_used) |
| return 0; |
| int index = get_index_from_order(mp, i); |
| value = mp->ma_values->values[index]; |
| key = LOAD_SHARED_KEY(DK_UNICODE_ENTRIES(mp->ma_keys)[index].me_key); |
| hash = unicode_get_hash(key); |
| assert(value != NULL); |
| } |
| else { |
| Py_ssize_t n = mp->ma_keys->dk_nentries; |
| if (i < 0 || i >= n) |
| return 0; |
| if (DK_IS_UNICODE(mp->ma_keys)) { |
| PyDictUnicodeEntry *entry_ptr = &DK_UNICODE_ENTRIES(mp->ma_keys)[i]; |
| while (i < n && entry_ptr->me_value == NULL) { |
| entry_ptr++; |
| i++; |
| } |
| if (i >= n) |
| return 0; |
| key = entry_ptr->me_key; |
| hash = unicode_get_hash(entry_ptr->me_key); |
| value = entry_ptr->me_value; |
| } |
| else { |
| PyDictKeyEntry *entry_ptr = &DK_ENTRIES(mp->ma_keys)[i]; |
| while (i < n && entry_ptr->me_value == NULL) { |
| entry_ptr++; |
| i++; |
| } |
| if (i >= n) |
| return 0; |
| key = entry_ptr->me_key; |
| hash = entry_ptr->me_hash; |
| value = entry_ptr->me_value; |
| } |
| } |
| *ppos = i+1; |
| if (pkey) |
| *pkey = key; |
| if (pvalue) |
| *pvalue = value; |
| if (phash) |
| *phash = hash; |
| return 1; |
| } |
| |
| /* |
| * Iterate over a dict. Use like so: |
| * |
| * Py_ssize_t i; |
| * PyObject *key, *value; |
| * i = 0; # important! i should not otherwise be changed by you |
| * while (PyDict_Next(yourdict, &i, &key, &value)) { |
| * Refer to borrowed references in key and value. |
| * } |
| * |
| * Return 1 on success, return 0 when the reached the end of the dictionary |
| * (or if op is not a dictionary) |
| * |
| * CAUTION: In general, it isn't safe to use PyDict_Next in a loop that |
| * mutates the dict. One exception: it is safe if the loop merely changes |
| * the values associated with the keys (but doesn't insert new keys or |
| * delete keys), via PyDict_SetItem(). |
| */ |
| int |
| PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue) |
| { |
| return _PyDict_Next(op, ppos, pkey, pvalue, NULL); |
| } |
| |
| |
| /* Internal version of dict.pop(). */ |
| int |
| _PyDict_Pop_KnownHash(PyDictObject *mp, PyObject *key, Py_hash_t hash, |
| PyObject **result) |
| { |
| assert(PyDict_Check(mp)); |
| |
| ASSERT_DICT_LOCKED(mp); |
| |
| if (mp->ma_used == 0) { |
| if (result) { |
|