Merge pull request #28393 from linux-on-ibm-z/main-vec-fp-operators-clang-s390x
BUG: Fix building on s390x with clang
diff --git a/.github/workflows/compiler_sanitizers.yml b/.github/workflows/compiler_sanitizers.yml
index 9477e0b..9452289 100644
--- a/.github/workflows/compiler_sanitizers.yml
+++ b/.github/workflows/compiler_sanitizers.yml
@@ -53,10 +53,12 @@
echo CPPFLAGS="-I$LLVM_PREFIX/include" >> $GITHUB_ENV
- name: Build Python with address sanitizer
run: |
- CONFIGURE_OPTS="--with-address-sanitizer" pyenv install 3.13
- pyenv global 3.13
+ CONFIGURE_OPTS="--with-address-sanitizer" pyenv install 3.13t
+ pyenv global 3.13t
- name: Install dependencies
run: |
+ # TODO: remove when a released cython supports free-threaded python
+ pip install -i https://pypi.anaconda.org/scientific-python-nightly-wheels/simple cython
pip install -r requirements/build_requirements.txt
pip install -r requirements/ci_requirements.txt
pip install -r requirements/test_requirements.txt
@@ -68,7 +70,7 @@
- name: Test
run: |
# pass -s to pytest to see ASAN errors and warnings, otherwise pytest captures them
- ASAN_OPTIONS=detect_leaks=0:symbolize=1:strict_init_order=true:allocator_may_return_null=1:halt_on_error=1 \
+ ASAN_OPTIONS=detect_leaks=0:symbolize=1:strict_init_order=true:allocator_may_return_null=1 \
python -m spin test -- -v -s --timeout=600 --durations=10
clang_TSAN:
@@ -121,7 +123,7 @@
- name: Test
run: |
# These tests are slow, so only run tests in files that do "import threading" to make them count
- TSAN_OPTIONS=allocator_may_return_null=1:halt_on_error=1 \
+ TSAN_OPTIONS="allocator_may_return_null=1:suppressions=$GITHUB_WORKSPACE/tools/ci/tsan_suppressions.txt" \
python -m spin test \
`find numpy -name "test*.py" | xargs grep -l "import threading" | tr '\n' ' '` \
-- -v -s --timeout=600 --durations=10
diff --git a/doc/release/upcoming_changes/26018.change.rst b/doc/release/upcoming_changes/26018.change.rst
new file mode 100644
index 0000000..9d7c139
--- /dev/null
+++ b/doc/release/upcoming_changes/26018.change.rst
@@ -0,0 +1,7 @@
+``unique_values`` may return unsorted data
+------------------------------------------
+The relatively new function (added in NumPy 2.0) ``unique_values`` may now
+return unsorted results. Just as ``unique_counts`` and ``unique_all``
+these never guaranteed a sorted result, however, the result
+was sorted until now. In cases where these do return a sorted result, this
+may change in future releases to improve performance.
diff --git a/doc/release/upcoming_changes/26018.performance.rst b/doc/release/upcoming_changes/26018.performance.rst
new file mode 100644
index 0000000..ffeab51
--- /dev/null
+++ b/doc/release/upcoming_changes/26018.performance.rst
@@ -0,0 +1,7 @@
+Performance improvements to ``np.unique``
+-----------------------------------------
+``np.unique`` now tries to use a hash table to find unique values instead of sorting
+values before finding unique values. This is limited to certain dtypes for now, and
+the function is now faster for those dtypes. The function now also exposes a ``sorted``
+parameter to allow returning unique values as they were found, instead of sorting them
+afterwards.
\ No newline at end of file
diff --git a/doc/source/building/cross_compilation.rst b/doc/source/building/cross_compilation.rst
index 82b896a..0a2e3a5 100644
--- a/doc/source/building/cross_compilation.rst
+++ b/doc/source/building/cross_compilation.rst
@@ -15,7 +15,7 @@
distros:
- `Void Linux <https://github.com/void-linux/void-packages/blob/master/srcpkgs/python3-numpy/template>`_
-- `Nix <https://github.com/nixos/nixpkgs/blob/master/pkgs/development/python-modules/numpy/default.nix>`_
+- `Nix <https://github.com/NixOS/nixpkgs/tree/master/pkgs/development/python-modules/numpy>`_
- `Conda-forge <https://github.com/conda-forge/numpy-feedstock/blob/main/recipe/build.sh>`_
See also `Meson's documentation on cross compilation
diff --git a/doc/source/reference/c-api/array.rst b/doc/source/reference/c-api/array.rst
index aface4e..14ff61a 100644
--- a/doc/source/reference/c-api/array.rst
+++ b/doc/source/reference/c-api/array.rst
@@ -121,7 +121,7 @@
Returns the total size (in number of elements) of the array.
-.. c:function:: npy_intp PyArray_Size(PyArrayObject* obj)
+.. c:function:: npy_intp PyArray_Size(PyObject* obj)
Returns 0 if *obj* is not a sub-class of ndarray. Otherwise,
returns the total number of elements in the array. Safer version
diff --git a/doc/source/reference/random/extending.rst b/doc/source/reference/random/extending.rst
index 7aead60..20c8375 100644
--- a/doc/source/reference/random/extending.rst
+++ b/doc/source/reference/random/extending.rst
@@ -11,10 +11,13 @@
Numba
-----
-Numba can be used with either CTypes or CFFI. The current iteration of the
+Numba can be used with either
+`CTypes <https://docs.python.org/3/library/ctypes.html>`_
+or `CFFI <https://cffi.readthedocs.io/en/stable/overview.html>`_.
+The current iteration of the
`BitGenerator`\ s all export a small set of functions through both interfaces.
-This example shows how numba can be used to produce gaussian samples using
+This example shows how Numba can be used to produce Gaussian samples using
a pure Python implementation which is then compiled. The random numbers are
provided by ``ctypes.next_double``.
diff --git a/numpy/_core/_add_newdocs.py b/numpy/_core/_add_newdocs.py
index ece371d..c1ab650 100644
--- a/numpy/_core/_add_newdocs.py
+++ b/numpy/_core/_add_newdocs.py
@@ -6945,9 +6945,10 @@ def refer_to_array_attribute(attr, method=True):
array([False, True, False])
>>> np.array([1.2, object(), "hello world"],
- ... dtype=StringDType(coerce=True))
- ValueError: StringDType only allows string data when string coercion
- is disabled.
+ ... dtype=StringDType(coerce=False))
+ Traceback (most recent call last):
+ ...
+ ValueError: StringDType only allows string data when string coercion is disabled.
>>> np.array(["hello", "world"], dtype=StringDType(coerce=True))
array(["hello", "world"], dtype=StringDType(coerce=True))
diff --git a/numpy/_core/meson.build b/numpy/_core/meson.build
index bc21752..c3b5451 100644
--- a/numpy/_core/meson.build
+++ b/numpy/_core/meson.build
@@ -685,6 +685,16 @@
cflags_large_file_support,
]
+# CPP exceptions are handled in the unique_hash code and therefore the `-fexceptions`
+# flag.
+unique_hash_cpp_args = c_args_common
+if cc.get_argument_syntax() != 'msvc'
+ unique_hash_cpp_args += [
+ '-fexceptions',
+ '-fno-rtti', # no runtime type information
+ ]
+endif
+
# Same as NPY_CXX_FLAGS (TODO: extend for what ccompiler_opt adds)
cpp_args_common = c_args_common + [
]
@@ -1063,7 +1073,6 @@
'src/common/npy_hashtable.cpp',
'src/common/npy_import.c',
'src/common/npy_longdouble.c',
- 'src/common/ucsnarrow.c',
'src/common/ufunc_override.c',
'src/common/numpyos.c',
'src/common/npy_cpu_features.c',
@@ -1221,6 +1230,21 @@
endforeach
endif
+unique_hash_so = static_library(
+ 'unique_hash',
+ ['src/multiarray/unique.cpp'],
+ c_args: c_args_common,
+ cpp_args: unique_hash_cpp_args,
+ include_directories: [
+ 'include',
+ 'src/common',
+ ],
+ dependencies: [
+ py_dep,
+ np_core_dep,
+ ],
+)
+
py.extension_module('_multiarray_umath',
[
config_h,
@@ -1245,7 +1269,11 @@
'src/highway'
],
dependencies: [blas_dep],
- link_with: [npymath_lib, multiarray_umath_mtargets.static_lib('_multiarray_umath_mtargets')] + highway_lib,
+ link_with: [
+ npymath_lib,
+ unique_hash_so,
+ multiarray_umath_mtargets.static_lib('_multiarray_umath_mtargets')
+ ] + highway_lib,
install: true,
subdir: 'numpy/_core',
)
diff --git a/numpy/_core/src/common/ucsnarrow.c b/numpy/_core/src/common/ucsnarrow.c
deleted file mode 100644
index 203e02f..0000000
--- a/numpy/_core/src/common/ucsnarrow.c
+++ /dev/null
@@ -1,71 +0,0 @@
-#define NPY_NO_DEPRECATED_API NPY_API_VERSION
-#define _MULTIARRAYMODULE
-
-#define PY_SSIZE_T_CLEAN
-#include <Python.h>
-
-#include "numpy/arrayobject.h"
-#include "numpy/npy_math.h"
-
-#include "npy_config.h"
-
-
-#include "ctors.h"
-
-/*
- * This file originally contained functions only needed on narrow builds of
- * Python for converting back and forth between the NumPy Unicode data-type
- * (always 4-bytes) and the Python Unicode scalar (2-bytes on a narrow build).
- *
- * This "narrow" interface is now deprecated in python and unused in NumPy.
- */
-
-/*
- * Returns a PyUnicodeObject initialized from a buffer containing
- * UCS4 unicode.
- *
- * Parameters
- * ----------
- * src: char *
- * Pointer to buffer containing UCS4 unicode.
- * size: Py_ssize_t
- * Size of buffer in bytes.
- * swap: int
- * If true, the data will be swapped.
- * align: int
- * If true, the data will be aligned.
- *
- * Returns
- * -------
- * new_reference: PyUnicodeObject
- */
-NPY_NO_EXPORT PyUnicodeObject *
-PyUnicode_FromUCS4(char const *src_char, Py_ssize_t size, int swap, int align)
-{
- Py_ssize_t ucs4len = size / sizeof(npy_ucs4);
- npy_ucs4 const *src = (npy_ucs4 const *)src_char;
- npy_ucs4 *buf = NULL;
-
- /* swap and align if needed */
- if (swap || align) {
- buf = (npy_ucs4 *)malloc(size);
- if (buf == NULL) {
- PyErr_NoMemory();
- return NULL;
- }
- memcpy(buf, src, size);
- if (swap) {
- byte_swap_vector(buf, ucs4len, sizeof(npy_ucs4));
- }
- src = buf;
- }
-
- /* trim trailing zeros */
- while (ucs4len > 0 && src[ucs4len - 1] == 0) {
- ucs4len--;
- }
- PyUnicodeObject *ret = (PyUnicodeObject *)PyUnicode_FromKindAndData(
- PyUnicode_4BYTE_KIND, src, ucs4len);
- free(buf);
- return ret;
-}
diff --git a/numpy/_core/src/common/ucsnarrow.h b/numpy/_core/src/common/ucsnarrow.h
deleted file mode 100644
index 4b17a28..0000000
--- a/numpy/_core/src/common/ucsnarrow.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef NUMPY_CORE_SRC_COMMON_NPY_UCSNARROW_H_
-#define NUMPY_CORE_SRC_COMMON_NPY_UCSNARROW_H_
-
-NPY_NO_EXPORT PyUnicodeObject *
-PyUnicode_FromUCS4(char const *src, Py_ssize_t size, int swap, int align);
-
-#endif /* NUMPY_CORE_SRC_COMMON_NPY_UCSNARROW_H_ */
diff --git a/numpy/_core/src/multiarray/arraytypes.c.src b/numpy/_core/src/multiarray/arraytypes.c.src
index 931ced5..8de16af 100644
--- a/numpy/_core/src/multiarray/arraytypes.c.src
+++ b/numpy/_core/src/multiarray/arraytypes.c.src
@@ -632,10 +632,33 @@
{
PyArrayObject *ap = vap;
Py_ssize_t size = PyArray_ITEMSIZE(ap);
+ Py_ssize_t ucs4len = size / sizeof(npy_ucs4);
int swap = PyArray_ISBYTESWAPPED(ap);
int align = !PyArray_ISALIGNED(ap);
+ npy_ucs4 const *src = (npy_ucs4 const*)ip;
+ npy_ucs4 *buf = NULL;
- return (PyObject *)PyUnicode_FromUCS4(ip, size, swap, align);
+ /* swap and align if needed */
+ if (swap || align) {
+ buf = (npy_ucs4 *)malloc(size);
+ if (buf == NULL) {
+ PyErr_NoMemory();
+ return NULL;
+ }
+ memcpy(buf, src, size);
+ if (swap) {
+ byte_swap_vector(buf, ucs4len, sizeof(npy_ucs4));
+ }
+ src = buf;
+ }
+
+ /* trim trailing zeros */
+ while (ucs4len > 0 && src[ucs4len - 1] == 0) {
+ ucs4len--;
+ }
+ PyObject *ret = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, src, ucs4len);
+ free(buf);
+ return ret;
}
static int
diff --git a/numpy/_core/src/multiarray/common.h b/numpy/_core/src/multiarray/common.h
index 46fe2a6..e356b82 100644
--- a/numpy/_core/src/multiarray/common.h
+++ b/numpy/_core/src/multiarray/common.h
@@ -318,8 +318,6 @@
check_is_convertible_to_scalar(PyArrayObject *v);
-#include "ucsnarrow.h"
-
/*
* Make a new empty array, of the passed size, of a type that takes the
* priority of ap1 and ap2 into account.
diff --git a/numpy/_core/src/multiarray/item_selection.c b/numpy/_core/src/multiarray/item_selection.c
index 254f1ea..d02e420 100644
--- a/numpy/_core/src/multiarray/item_selection.c
+++ b/numpy/_core/src/multiarray/item_selection.c
@@ -1028,6 +1028,7 @@
}
dtype = PyArray_DESCR(mps[0]);
+ int copy_existing_out = 0;
/* Set-up return array */
if (out == NULL) {
Py_INCREF(dtype);
@@ -1039,10 +1040,6 @@
(PyObject *)ap);
}
else {
- int flags = NPY_ARRAY_CARRAY |
- NPY_ARRAY_WRITEBACKIFCOPY |
- NPY_ARRAY_FORCECAST;
-
if ((PyArray_NDIM(out) != multi->nd)
|| !PyArray_CompareLists(PyArray_DIMS(out),
multi->dimensions,
@@ -1052,9 +1049,13 @@
goto fail;
}
+ if (PyArray_FailUnlessWriteable(out, "output array") < 0) {
+ goto fail;
+ }
+
for (i = 0; i < n; i++) {
if (arrays_overlap(out, mps[i])) {
- flags |= NPY_ARRAY_ENSURECOPY;
+ copy_existing_out = 1;
}
}
@@ -1064,10 +1065,25 @@
* so the input array is not changed
* before the error is called
*/
- flags |= NPY_ARRAY_ENSURECOPY;
+ copy_existing_out = 1;
}
- Py_INCREF(dtype);
- obj = (PyArrayObject *)PyArray_FromArray(out, dtype, flags);
+
+ if (!PyArray_EquivTypes(dtype, PyArray_DESCR(out))) {
+ copy_existing_out = 1;
+ }
+
+ if (copy_existing_out) {
+ Py_INCREF(dtype);
+ obj = (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type,
+ dtype,
+ multi->nd,
+ multi->dimensions,
+ NULL, NULL, 0,
+ (PyObject *)out);
+ }
+ else {
+ obj = (PyArrayObject *)Py_NewRef(out);
+ }
}
if (obj == NULL) {
@@ -1080,12 +1096,13 @@
NPY_ARRAYMETHOD_FLAGS transfer_flags = 0;
if (PyDataType_REFCHK(dtype)) {
int is_aligned = IsUintAligned(obj);
+ PyArray_Descr *obj_dtype = PyArray_DESCR(obj);
PyArray_GetDTypeTransferFunction(
is_aligned,
dtype->elsize,
- dtype->elsize,
+ obj_dtype->elsize,
dtype,
- dtype, 0, &cast_info,
+ obj_dtype, 0, &cast_info,
&transfer_flags);
}
@@ -1142,11 +1159,13 @@
}
Py_DECREF(ap);
PyDataMem_FREE(mps);
- if (out != NULL && out != obj) {
- Py_INCREF(out);
- PyArray_ResolveWritebackIfCopy(obj);
+ if (copy_existing_out) {
+ int res = PyArray_CopyInto(out, obj);
Py_DECREF(obj);
- obj = out;
+ if (res < 0) {
+ return NULL;
+ }
+ return Py_NewRef(out);
}
return (PyObject *)obj;
@@ -2893,10 +2912,11 @@
* the fast bool count is followed by this sparse path is faster
* than combining the two loops, even for larger arrays
*/
+ npy_intp * multi_index_end = multi_index + nonzero_count;
if (((double)nonzero_count / count) <= 0.1) {
npy_intp subsize;
npy_intp j = 0;
- while (1) {
+ while (multi_index < multi_index_end) {
npy_memchr(data + j * stride, 0, stride, count - j,
&subsize, 1);
j += subsize;
@@ -2911,11 +2931,10 @@
* stalls that are very expensive on most modern processors.
*/
else {
- npy_intp *multi_index_end = multi_index + nonzero_count;
npy_intp j = 0;
/* Manually unroll for GCC and maybe other compilers */
- while (multi_index + 4 < multi_index_end) {
+ while (multi_index + 4 < multi_index_end && (j < count - 4) ) {
*multi_index = j;
multi_index += data[0] != 0;
*multi_index = j + 1;
@@ -2928,7 +2947,7 @@
j += 4;
}
- while (multi_index < multi_index_end) {
+ while (multi_index < multi_index_end && (j < count) ) {
*multi_index = j;
multi_index += *data != 0;
data += stride;
diff --git a/numpy/_core/src/multiarray/multiarraymodule.c b/numpy/_core/src/multiarray/multiarraymodule.c
index 54165222..d24af97 100644
--- a/numpy/_core/src/multiarray/multiarraymodule.c
+++ b/numpy/_core/src/multiarray/multiarraymodule.c
@@ -83,6 +83,8 @@
#include "umathmodule.h"
+#include "unique.h"
+
/*
*****************************************************************************
** INCLUDE GENERATED CODE **
@@ -4562,6 +4564,8 @@
"Give a warning on reload and big warning in sub-interpreters."},
{"from_dlpack", (PyCFunction)from_dlpack,
METH_FASTCALL | METH_KEYWORDS, NULL},
+ {"_unique_hash", (PyCFunction)array__unique_hash,
+ METH_O, "Collect unique values via a hash map."},
{NULL, NULL, 0, NULL} /* sentinel */
};
diff --git a/numpy/_core/src/multiarray/stringdtype/casts.cpp b/numpy/_core/src/multiarray/stringdtype/casts.cpp
index f74f642..f667275 100644
--- a/numpy/_core/src/multiarray/stringdtype/casts.cpp
+++ b/numpy/_core/src/multiarray/stringdtype/casts.cpp
@@ -1,13 +1,13 @@
-#include <cmath>
-#include <type_traits>
-
-#include "numpy/npy_common.h"
#define PY_SSIZE_T_CLEAN
#include <Python.h>
+#include "numpy/npy_common.h"
#define NPY_NO_DEPRECATED_API NPY_API_VERSION
#define _MULTIARRAYMODULE
#define _UMATHMODULE
+#include <cmath>
+#include <type_traits>
+
#include "numpy/ndarraytypes.h"
#include "numpy/arrayobject.h"
#include "numpy/halffloat.h"
diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp
new file mode 100644
index 0000000..f36acfd
--- /dev/null
+++ b/numpy/_core/src/multiarray/unique.cpp
@@ -0,0 +1,183 @@
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+#define _MULTIARRAYMODULE
+
+#include <Python.h>
+
+#include <unordered_set>
+#include <functional>
+
+#include <numpy/npy_common.h>
+#include "numpy/arrayobject.h"
+
+// This is to use RAII pattern to handle cpp exceptions while avoiding memory leaks.
+// Adapted from https://stackoverflow.com/a/25510879/2536294
+template <typename F>
+struct FinalAction {
+ FinalAction(F f) : clean_{f} {}
+ ~FinalAction() { clean_(); }
+ private:
+ F clean_;
+};
+
+template <typename F>
+FinalAction<F> finally(F f) {
+ return FinalAction<F>(f);
+}
+
+template<typename T>
+static PyObject*
+unique(PyArrayObject *self)
+{
+ /* This function takes a numpy array and returns a numpy array containing
+ the unique values.
+
+ It assumes the numpy array includes data that can be viewed as unsigned integers
+ of a certain size (sizeof(T)).
+
+ It doesn't need to know the actual type, since it needs to find unique values
+ among binary representations of the input data. This means it won't apply to
+ custom or complicated dtypes or string values.
+ */
+ NPY_ALLOW_C_API_DEF;
+ std::unordered_set<T> hashset;
+
+ NpyIter *iter = NpyIter_New(self, NPY_ITER_READONLY |
+ NPY_ITER_EXTERNAL_LOOP |
+ NPY_ITER_REFS_OK |
+ NPY_ITER_ZEROSIZE_OK |
+ NPY_ITER_GROWINNER,
+ NPY_KEEPORDER, NPY_NO_CASTING,
+ NULL);
+ // Making sure the iterator is deallocated when the function returns, with
+ // or w/o an exception
+ auto iter_dealloc = finally([&]() { NpyIter_Deallocate(iter); });
+ if (iter == NULL) {
+ return NULL;
+ }
+
+ NpyIter_IterNextFunc *iternext = NpyIter_GetIterNext(iter, NULL);
+ if (iternext == NULL) {
+ return NULL;
+ }
+ char **dataptr = NpyIter_GetDataPtrArray(iter);
+ npy_intp *strideptr = NpyIter_GetInnerStrideArray(iter);
+ npy_intp *innersizeptr = NpyIter_GetInnerLoopSizePtr(iter);
+
+ // release the GIL
+ PyThreadState *_save;
+ _save = PyEval_SaveThread();
+ // Making sure the GIL is re-acquired when the function returns, with
+ // or w/o an exception
+ auto grab_gil = finally([&]() { PyEval_RestoreThread(_save); });
+ // first we put the data in a hash map
+
+ if (NpyIter_GetIterSize(iter) > 0) {
+ do {
+ char* data = *dataptr;
+ npy_intp stride = *strideptr;
+ npy_intp count = *innersizeptr;
+
+ while (count--) {
+ hashset.insert(*((T *) data));
+ data += stride;
+ }
+ } while (iternext(iter));
+ }
+
+ npy_intp length = hashset.size();
+
+ NPY_ALLOW_C_API;
+ PyArray_Descr *descr = PyArray_DESCR(self);
+ Py_INCREF(descr);
+ PyObject *res_obj = PyArray_NewFromDescr(
+ &PyArray_Type,
+ descr,
+ 1, // ndim
+ &length, // shape
+ NULL, // strides
+ NULL, // data
+ // This flag is needed to be able to call .sort on it.
+ NPY_ARRAY_WRITEABLE, // flags
+ NULL // obj
+ );
+ NPY_DISABLE_C_API;
+
+ if (res_obj == NULL) {
+ return NULL;
+ }
+
+ // then we iterate through the map's keys to get the unique values
+ T* data = (T *)PyArray_DATA((PyArrayObject *)res_obj);
+ auto it = hashset.begin();
+ size_t i = 0;
+ for (; it != hashset.end(); it++, i++) {
+ data[i] = *it;
+ }
+
+ return res_obj;
+}
+
+
+// this map contains the functions used for each item size.
+typedef std::function<PyObject *(PyArrayObject *)> function_type;
+std::unordered_map<int, function_type> unique_funcs = {
+ {NPY_BYTE, unique<npy_byte>},
+ {NPY_UBYTE, unique<npy_ubyte>},
+ {NPY_SHORT, unique<npy_short>},
+ {NPY_USHORT, unique<npy_ushort>},
+ {NPY_INT, unique<npy_int>},
+ {NPY_UINT, unique<npy_uint>},
+ {NPY_LONG, unique<npy_long>},
+ {NPY_ULONG, unique<npy_ulong>},
+ {NPY_LONGLONG, unique<npy_longlong>},
+ {NPY_ULONGLONG, unique<npy_ulonglong>},
+ {NPY_INT8, unique<npy_int8>},
+ {NPY_INT16, unique<npy_int16>},
+ {NPY_INT32, unique<npy_int32>},
+ {NPY_INT64, unique<npy_int64>},
+ {NPY_UINT8, unique<npy_uint8>},
+ {NPY_UINT16, unique<npy_uint16>},
+ {NPY_UINT32, unique<npy_uint32>},
+ {NPY_UINT64, unique<npy_uint64>},
+ {NPY_DATETIME, unique<npy_uint64>},
+};
+
+
+/**
+ * Python exposed implementation of `_unique_hash`.
+ *
+ * This is a C only function wrapping code that may cause C++ exceptions into
+ * try/catch.
+ *
+ * @param arr NumPy array to find the unique values of.
+ * @return Base-class NumPy array with unique values, `NotImplemented` if the
+ * type is unsupported or `NULL` with an error set.
+ */
+extern "C" NPY_NO_EXPORT PyObject *
+array__unique_hash(PyObject *NPY_UNUSED(module), PyObject *arr_obj)
+{
+ if (!PyArray_Check(arr_obj)) {
+ PyErr_SetString(PyExc_TypeError,
+ "_unique_hash() requires a NumPy array input.");
+ return NULL;
+ }
+ PyArrayObject *arr = (PyArrayObject *)arr_obj;
+
+ try {
+ auto type = PyArray_TYPE(arr);
+ // we only support data types present in our unique_funcs map
+ if (unique_funcs.find(type) == unique_funcs.end()) {
+ Py_RETURN_NOTIMPLEMENTED;
+ }
+
+ return unique_funcs[type](arr);
+ }
+ catch (const std::bad_alloc &e) {
+ PyErr_NoMemory();
+ return NULL;
+ }
+ catch (const std::exception &e) {
+ PyErr_SetString(PyExc_RuntimeError, e.what());
+ return NULL;
+ }
+}
diff --git a/numpy/_core/src/multiarray/unique.h b/numpy/_core/src/multiarray/unique.h
new file mode 100644
index 0000000..3e25840
--- /dev/null
+++ b/numpy/_core/src/multiarray/unique.h
@@ -0,0 +1,14 @@
+#ifndef NUMPY_CORE_INCLUDE_NUMPY_MULTIARRAY_UNIQUE_H_
+#define NUMPY_CORE_INCLUDE_NUMPY_MULTIARRAY_UNIQUE_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+PyObject* array__unique_hash(PyObject *NPY_UNUSED(dummy), PyObject *args);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // NUMPY_CORE_INCLUDE_NUMPY_MULTIARRAY_UNIQUE_H_
diff --git a/numpy/_core/tests/test_multiarray.py b/numpy/_core/tests/test_multiarray.py
index fba5f47..3de42ba 100644
--- a/numpy/_core/tests/test_multiarray.py
+++ b/numpy/_core/tests/test_multiarray.py
@@ -1980,6 +1980,12 @@ def test_choose(self):
y = np.choose([0, 0, 0], [x[:3], x[:3], x[:3]], out=x[1:4], mode='wrap')
assert_equal(y, np.array([0, 1, 2]))
+ # gh_28206 check fail when out not writeable
+ x = np.arange(3)
+ out = np.zeros(3)
+ out.setflags(write=False)
+ assert_raises(ValueError, np.choose, [0, 1, 2], [x, x, x], out=out)
+
def test_prod(self):
ba = [1, 2, 10, 11, 6, 5, 4]
ba2 = [[1, 2, 3, 4], [5, 6, 7, 9], [10, 3, 4, 5]]
@@ -10287,6 +10293,16 @@ def test_gh_24459():
np.choose(a, [3, -1])
+def test_gh_28206():
+ a = np.arange(3)
+ b = np.ones((3, 3), dtype=np.int64)
+ out = np.array([np.nan, np.nan, np.nan])
+
+ with warnings.catch_warnings():
+ warnings.simplefilter("error", RuntimeWarning)
+ np.choose(a, b, out=out)
+
+
@pytest.mark.parametrize("N", np.arange(2, 512))
@pytest.mark.parametrize("dtype", [np.int16, np.uint16,
np.int32, np.uint32, np.int64, np.uint64])
diff --git a/numpy/_core/tests/test_multithreading.py b/numpy/_core/tests/test_multithreading.py
index 133268d..03f4b40 100644
--- a/numpy/_core/tests/test_multithreading.py
+++ b/numpy/_core/tests/test_multithreading.py
@@ -271,3 +271,26 @@ def closure(b):
# Reducing the number of threads means the test doesn't trigger the
# bug. Better to skip on some platforms than add a useless test.
pytest.skip("Couldn't spawn enough threads to run the test")
+
+@pytest.mark.parametrize("dtype", [bool, int, float])
+def test_nonzero(dtype):
+ # See: gh-28361
+ #
+ # np.nonzero uses np.count_nonzero to determine the size of the output array
+ # In a second pass the indices of the non-zero elements are determined, but they can have changed
+ #
+ # This test triggers a data race which is suppressed in the TSAN CI. The test is to ensure
+ # np.nonzero does not generate a segmentation fault
+ x = np.random.randint(4, size=100).astype(dtype)
+
+ def func(index):
+ for _ in range(10):
+ if index == 0:
+ x[::2] = np.random.randint(2)
+ else:
+ try:
+ _ = np.nonzero(x)
+ except RuntimeError as ex:
+ assert 'number of non-zero array elements changed during function execution' in str(ex)
+
+ run_threaded(func, max_workers=10, pass_count=True, outer_iterations=5)
diff --git a/numpy/lib/_arraysetops_impl.py b/numpy/lib/_arraysetops_impl.py
index 97dae64..5217704 100644
--- a/numpy/lib/_arraysetops_impl.py
+++ b/numpy/lib/_arraysetops_impl.py
@@ -21,6 +21,7 @@
import numpy as np
from numpy._core import overrides
from numpy._core._multiarray_umath import _array_converter
+from numpy._core._multiarray_umath import _unique_hash
array_function_dispatch = functools.partial(
@@ -138,13 +139,15 @@ def _unpack_tuple(x):
def _unique_dispatcher(ar, return_index=None, return_inverse=None,
- return_counts=None, axis=None, *, equal_nan=None):
+ return_counts=None, axis=None, *, equal_nan=None,
+ sorted=True):
return (ar,)
@array_function_dispatch(_unique_dispatcher)
def unique(ar, return_index=False, return_inverse=False,
- return_counts=False, axis=None, *, equal_nan=True):
+ return_counts=False, axis=None, *, equal_nan=True,
+ sorted=True):
"""
Find the unique elements of an array.
@@ -182,6 +185,11 @@ def unique(ar, return_index=False, return_inverse=False,
.. versionadded:: 1.24
+ sorted : bool, optional
+ If True, the unique elements are sorted.
+
+ .. versionadded:: 2.3
+
Returns
-------
unique : ndarray
@@ -284,7 +292,8 @@ def unique(ar, return_index=False, return_inverse=False,
ar = np.asanyarray(ar)
if axis is None:
ret = _unique1d(ar, return_index, return_inverse, return_counts,
- equal_nan=equal_nan, inverse_shape=ar.shape, axis=None)
+ equal_nan=equal_nan, inverse_shape=ar.shape, axis=None,
+ sorted=sorted)
return _unpack_tuple(ret)
# axis was specified and not None
@@ -331,16 +340,18 @@ def reshape_uniq(uniq):
output = _unique1d(consolidated, return_index,
return_inverse, return_counts,
equal_nan=equal_nan, inverse_shape=inverse_shape,
- axis=axis)
+ axis=axis, sorted=sorted)
output = (reshape_uniq(output[0]),) + output[1:]
return _unpack_tuple(output)
def _unique1d(ar, return_index=False, return_inverse=False,
return_counts=False, *, equal_nan=True, inverse_shape=None,
- axis=None):
+ axis=None, sorted=True):
"""
Find the unique elements of an array, ignoring shape.
+
+ Uses a hash table to find the unique elements if possible.
"""
ar = np.asanyarray(ar).flatten()
if len(ar.shape) != 1:
@@ -350,6 +361,26 @@ def _unique1d(ar, return_index=False, return_inverse=False,
optional_indices = return_index or return_inverse
+ if (optional_indices or return_counts) and not sorted:
+ raise ValueError(
+ "Currently, `sorted` can only be False if `return_index`, "
+ "`return_inverse`, and `return_counts` are all False."
+ )
+
+ # masked arrays are not supported yet.
+ if not optional_indices and not return_counts and not np.ma.is_masked(ar):
+ # First we convert the array to a numpy array, later we wrap it back
+ # in case it was a subclass of numpy.ndarray.
+ conv = _array_converter(ar)
+ ar_, = conv
+
+ if (hash_unique := _unique_hash(ar_)) is not NotImplemented:
+ if sorted:
+ hash_unique.sort()
+ # We wrap the result back in case it was a subclass of numpy.ndarray.
+ return (conv.wrap(hash_unique),)
+
+ # If we don't use the hash map, we use the slower sorting method.
if optional_indices:
perm = ar.argsort(kind='mergesort' if return_index else 'quicksort')
aux = ar[perm]
@@ -460,7 +491,7 @@ def unique_all(x):
return_index=True,
return_inverse=True,
return_counts=True,
- equal_nan=False
+ equal_nan=False,
)
return UniqueAllResult(*result)
@@ -512,7 +543,7 @@ def unique_counts(x):
return_index=False,
return_inverse=False,
return_counts=True,
- equal_nan=False
+ equal_nan=False,
)
return UniqueCountsResult(*result)
@@ -565,7 +596,7 @@ def unique_inverse(x):
return_index=False,
return_inverse=True,
return_counts=False,
- equal_nan=False
+ equal_nan=False,
)
return UniqueInverseResult(*result)
@@ -601,7 +632,7 @@ def unique_values(x):
--------
>>> import numpy as np
>>> np.unique_values([1, 1, 2])
- array([1, 2])
+ array([1, 2]) # may vary
"""
return unique(
@@ -609,7 +640,8 @@ def unique_values(x):
return_index=False,
return_inverse=False,
return_counts=False,
- equal_nan=False
+ equal_nan=False,
+ sorted=False,
)
diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py
index e89adb8..3de5e68 100644
--- a/numpy/lib/tests/test_arraysetops.py
+++ b/numpy/lib/tests/test_arraysetops.py
@@ -628,56 +628,72 @@ def test_manyways(self):
class TestUnique:
+ def check_all(self, a, b, i1, i2, c, dt):
+ base_msg = 'check {0} failed for type {1}'
+
+ msg = base_msg.format('values', dt)
+ v = unique(a)
+ assert_array_equal(v, b, msg)
+ assert type(v) == type(b)
+
+ msg = base_msg.format('return_index', dt)
+ v, j = unique(a, True, False, False)
+ assert_array_equal(v, b, msg)
+ assert_array_equal(j, i1, msg)
+ assert type(v) == type(b)
+
+ msg = base_msg.format('return_inverse', dt)
+ v, j = unique(a, False, True, False)
+ assert_array_equal(v, b, msg)
+ assert_array_equal(j, i2, msg)
+ assert type(v) == type(b)
+
+ msg = base_msg.format('return_counts', dt)
+ v, j = unique(a, False, False, True)
+ assert_array_equal(v, b, msg)
+ assert_array_equal(j, c, msg)
+ assert type(v) == type(b)
+
+ msg = base_msg.format('return_index and return_inverse', dt)
+ v, j1, j2 = unique(a, True, True, False)
+ assert_array_equal(v, b, msg)
+ assert_array_equal(j1, i1, msg)
+ assert_array_equal(j2, i2, msg)
+ assert type(v) == type(b)
+
+ msg = base_msg.format('return_index and return_counts', dt)
+ v, j1, j2 = unique(a, True, False, True)
+ assert_array_equal(v, b, msg)
+ assert_array_equal(j1, i1, msg)
+ assert_array_equal(j2, c, msg)
+ assert type(v) == type(b)
+
+ msg = base_msg.format('return_inverse and return_counts', dt)
+ v, j1, j2 = unique(a, False, True, True)
+ assert_array_equal(v, b, msg)
+ assert_array_equal(j1, i2, msg)
+ assert_array_equal(j2, c, msg)
+ assert type(v) == type(b)
+
+ msg = base_msg.format(('return_index, return_inverse '
+ 'and return_counts'), dt)
+ v, j1, j2, j3 = unique(a, True, True, True)
+ assert_array_equal(v, b, msg)
+ assert_array_equal(j1, i1, msg)
+ assert_array_equal(j2, i2, msg)
+ assert_array_equal(j3, c, msg)
+ assert type(v) == type(b)
+
+ def get_types(self):
+ types = []
+ types.extend(np.typecodes['AllInteger'])
+ types.extend(np.typecodes['AllFloat'])
+ types.append('datetime64[D]')
+ types.append('timedelta64[D]')
+ return types
+
def test_unique_1d(self):
- def check_all(a, b, i1, i2, c, dt):
- base_msg = 'check {0} failed for type {1}'
-
- msg = base_msg.format('values', dt)
- v = unique(a)
- assert_array_equal(v, b, msg)
-
- msg = base_msg.format('return_index', dt)
- v, j = unique(a, True, False, False)
- assert_array_equal(v, b, msg)
- assert_array_equal(j, i1, msg)
-
- msg = base_msg.format('return_inverse', dt)
- v, j = unique(a, False, True, False)
- assert_array_equal(v, b, msg)
- assert_array_equal(j, i2, msg)
-
- msg = base_msg.format('return_counts', dt)
- v, j = unique(a, False, False, True)
- assert_array_equal(v, b, msg)
- assert_array_equal(j, c, msg)
-
- msg = base_msg.format('return_index and return_inverse', dt)
- v, j1, j2 = unique(a, True, True, False)
- assert_array_equal(v, b, msg)
- assert_array_equal(j1, i1, msg)
- assert_array_equal(j2, i2, msg)
-
- msg = base_msg.format('return_index and return_counts', dt)
- v, j1, j2 = unique(a, True, False, True)
- assert_array_equal(v, b, msg)
- assert_array_equal(j1, i1, msg)
- assert_array_equal(j2, c, msg)
-
- msg = base_msg.format('return_inverse and return_counts', dt)
- v, j1, j2 = unique(a, False, True, True)
- assert_array_equal(v, b, msg)
- assert_array_equal(j1, i2, msg)
- assert_array_equal(j2, c, msg)
-
- msg = base_msg.format(('return_index, return_inverse '
- 'and return_counts'), dt)
- v, j1, j2, j3 = unique(a, True, True, True)
- assert_array_equal(v, b, msg)
- assert_array_equal(j1, i1, msg)
- assert_array_equal(j2, i2, msg)
- assert_array_equal(j3, c, msg)
-
a = [5, 7, 1, 2, 1, 5, 7] * 10
b = [1, 2, 5, 7]
i1 = [2, 3, 0, 1]
@@ -685,15 +701,11 @@ def check_all(a, b, i1, i2, c, dt):
c = np.multiply([2, 1, 2, 2], 10)
# test for numeric arrays
- types = []
- types.extend(np.typecodes['AllInteger'])
- types.extend(np.typecodes['AllFloat'])
- types.append('datetime64[D]')
- types.append('timedelta64[D]')
+ types = self.get_types()
for dt in types:
aa = np.array(a, dt)
bb = np.array(b, dt)
- check_all(aa, bb, i1, i2, c, dt)
+ self.check_all(aa, bb, i1, i2, c, dt)
# test for object arrays
dt = 'O'
@@ -701,13 +713,13 @@ def check_all(a, b, i1, i2, c, dt):
aa[:] = a
bb = np.empty(len(b), dt)
bb[:] = b
- check_all(aa, bb, i1, i2, c, dt)
+ self.check_all(aa, bb, i1, i2, c, dt)
# test for structured arrays
dt = [('', 'i'), ('', 'i')]
aa = np.array(list(zip(a, a)), dt)
bb = np.array(list(zip(b, b)), dt)
- check_all(aa, bb, i1, i2, c, dt)
+ self.check_all(aa, bb, i1, i2, c, dt)
# test for ticket #2799
aa = [1. + 0.j, 1 - 1.j, 1]
@@ -797,6 +809,44 @@ def check_all(a, b, i1, i2, c, dt):
assert_equal(np.unique(all_nans, return_inverse=True), (ua, ua_inv))
assert_equal(np.unique(all_nans, return_counts=True), (ua, ua_cnt))
+ def test_unique_zero_sized(self):
+ # test for zero-sized arrays
+ for dt in self.get_types():
+ a = np.array([], dt)
+ b = np.array([], dt)
+ i1 = np.array([], np.int64)
+ i2 = np.array([], np.int64)
+ c = np.array([], np.int64)
+ self.check_all(a, b, i1, i2, c, dt)
+
+ def test_unique_subclass(self):
+ class Subclass(np.ndarray):
+ pass
+
+ i1 = [2, 3, 0, 1]
+ i2 = [2, 3, 0, 1, 0, 2, 3] * 10
+ c = np.multiply([2, 1, 2, 2], 10)
+
+ # test for numeric arrays
+ types = self.get_types()
+ for dt in types:
+ a = np.array([5, 7, 1, 2, 1, 5, 7] * 10, dtype=dt)
+ b = np.array([1, 2, 5, 7], dtype=dt)
+ aa = Subclass(a.shape, dtype=dt, buffer=a)
+ bb = Subclass(b.shape, dtype=dt, buffer=b)
+ self.check_all(aa, bb, i1, i2, c, dt)
+
+ @pytest.mark.parametrize("arg", ["return_index", "return_inverse", "return_counts"])
+ def test_unsupported_hash_based(self, arg):
+ """Test that hash based unique is not supported when either of
+ return_index, return_inverse, or return_counts is True.
+
+ This is WIP and the above will gradually be supported in the future.
+ """
+ msg = "Currently, `sorted` can only be False"
+ with pytest.raises(ValueError, match=msg):
+ np.unique([1, 1], sorted=False, **{arg: True})
+
def test_unique_axis_errors(self):
assert_raises(TypeError, self._run_axis_tests, object)
assert_raises(TypeError, self._run_axis_tests,
diff --git a/ruff.toml b/ruff.toml
index 39c32e9..9f8bf26 100644
--- a/ruff.toml
+++ b/ruff.toml
@@ -1,4 +1,4 @@
-exclude = [
+extend-exclude = [
"numpy/__config__.py",
"numpy/distutils",
"numpy/typing/_char_codes.py",
diff --git a/tools/ci/tsan_suppressions.txt b/tools/ci/tsan_suppressions.txt
new file mode 100644
index 0000000..0745deb
--- /dev/null
+++ b/tools/ci/tsan_suppressions.txt
@@ -0,0 +1,11 @@
+# This file contains suppressions for the TSAN tool
+#
+# Reference: https://github.com/google/sanitizers/wiki/ThreadSanitizerSuppressions
+
+# For np.nonzero, see gh-28361
+race:PyArray_Nonzero
+race:count_nonzero_int
+race:count_nonzero_bool
+race:count_nonzero_float
+race:DOUBLE_nonzero
+