diff --git a/dpnp/dpnp_iface_histograms.py b/dpnp/dpnp_iface_histograms.py index 628beca8939..00d37a4267f 100644 --- a/dpnp/dpnp_iface_histograms.py +++ b/dpnp/dpnp_iface_histograms.py @@ -42,21 +42,25 @@ import dpctl.utils as dpu import numpy -from dpctl.tensor._type_utils import _can_cast import dpnp # pylint: disable=no-name-in-module import dpnp.backend.extensions.statistics._statistics_impl as statistics_ext +from dpnp.dpnp_utils.dpnp_utils_common import ( + result_type_for_device, + to_supported_dtypes, +) # pylint: disable=no-name-in-module -from .dpnp_utils import get_usm_allocations, map_dtype_to_device +from .dpnp_utils import get_usm_allocations __all__ = [ "bincount", "digitize", "histogram", "histogram_bin_edges", + "histogram2d", "histogramdd", ] @@ -65,33 +69,15 @@ _range = range -def _result_type_for_device(dtypes, device): - rt = dpnp.result_type(*dtypes) - return map_dtype_to_device(rt, device) - - def _align_dtypes(a_dtype, bins_dtype, ntype, supported_types, device): - has_fp64 = device.has_aspect_fp64 - has_fp16 = device.has_aspect_fp16 - - a_bin_dtype = _result_type_for_device([a_dtype, bins_dtype], device) + a_bin_dtype = result_type_for_device([a_dtype, bins_dtype], device) # histogram implementation doesn't support uint64 as histogram type # we can use int64 instead. Result would be correct even in case of overflow if ntype == numpy.uint64: ntype = dpnp.int64 - if (a_bin_dtype, ntype) in supported_types: - return a_bin_dtype, ntype - - for sample_type, hist_type in supported_types: - if _can_cast( - a_bin_dtype, sample_type, has_fp16, has_fp64 - ) and _can_cast(ntype, hist_type, has_fp16, has_fp64): - return sample_type, hist_type - - # should not happen - return None, None # pragma: no cover + return to_supported_dtypes([a_bin_dtype, ntype], supported_types, device) def _ravel_check_a_and_weights(a, weights): @@ -138,6 +124,9 @@ def _is_finite(a): return numpy.isfinite(a) if range is not None: + if len(range) != 2: + raise ValueError("range argument must consist of 2 elements.") + first_edge, last_edge = range if first_edge > last_edge: raise ValueError("max must be larger than min in range parameter.") @@ -520,6 +509,7 @@ def histogram(a, bins=10, range=None, density=None, weights=None): If `bins` is a sequence, it defines a monotonically increasing array of bin edges, including the rightmost edge, allowing for non-uniform bin widths. + Default: ``10``. range : {None, 2-tuple of float}, optional The lower and upper range of the bins. If not provided, range is simply @@ -528,6 +518,7 @@ def histogram(a, bins=10, range=None, density=None, weights=None): affects the automatic bin computation as well. While bin width is computed to be optimal based on the actual data within `range`, the bin count will fill the entire range including portions containing no data. + Default: ``None``. density : {None, bool}, optional If ``False`` or ``None``, the result will contain the number of samples @@ -536,6 +527,7 @@ def histogram(a, bins=10, range=None, density=None, weights=None): the range is ``1``. Note that the sum of the histogram values will not be equal to ``1`` unless bins of unity width are chosen; it is not a probability *mass* function. + Default: ``None``. weights : {None, dpnp.ndarray, usm_ndarray}, optional An array of weights, of the same shape as `a`. Each value in `a` only @@ -545,6 +537,7 @@ def histogram(a, bins=10, range=None, density=None, weights=None): Please note that the ``dtype`` of `weights` will also become the ``dtype`` of the returned accumulator (`hist`), so it must be large enough to hold accumulated values as well. + Default: ``None``. Returns @@ -751,6 +744,167 @@ def histogram_bin_edges(a, bins=10, range=None, weights=None): return bin_edges +def histogram2d(x, y, bins=10, range=None, density=None, weights=None): + """ + Compute the bi-dimensional histogram of two data samples. + + Parameters + ---------- + x : {dpnp.ndarray, usm_ndarray} of shape (N,) + An array containing the `x` coordinates of the points to be + histogrammed. + y : {dpnp.ndarray, usm_ndarray} of shape (N,) + An array containing the `y` coordinates of the points to be + histogrammed. + bins : {int, dpnp.ndarray, usm_ndarray, [int, int], [array, array], \ + [int, array], [array, int]}, optional + + The bins specification: + + * If int, the number of bins for the two dimensions (nx=ny=bins). + * If array, the bin edges for the two dimensions + (x_edges=y_edges=bins). + * If [int, int], the number of bins in each dimension + (nx, ny = bins). + * If [array, array], the bin edges in each dimension + (x_edges, y_edges = bins). + * A combination [int, array] or [array, int], where int + is the number of bins and array is the bin edges. + + Default: ``10``. + range : {None, dpnp.ndarray, usm_ndarray} of shape (2,2), optional + The leftmost and rightmost edges of the bins along each dimension + If ``None`` the ranges are + ``[[x.min(), x.max()], [y.min(), y.max()]]``. All values outside + of this range will be considered outliers and not tallied in the + histogram. + + Default: ``None``. + density : {None, bool}, optional + If ``False`` or ``None``, the default, returns the number of + samples in each bin. + If ``True``, returns the probability *density* function at the bin, + ``bin_count / sample_count / bin_volume``. + + Default: ``None``. + weights : {None, dpnp.ndarray, usm_ndarray} of shape (N,), optional + An array of values ``w_i`` weighing each sample ``(x_i, y_i)``. + Weights are normalized to ``1`` if `density` is ``True``. + If `density` is ``False``, the values of the returned histogram + are equal to the sum of the weights belonging to the samples + falling into each bin. + If ``None`` all samples are assigned a weight of ``1``. + + Default: ``None``. + Returns + ------- + H : dpnp.ndarray of shape (nx, ny) + The bi-dimensional histogram of samples `x` and `y`. Values in `x` + are histogrammed along the first dimension and values in `y` are + histogrammed along the second dimension. + xedges : dpnp.ndarray of shape (nx+1,) + The bin edges along the first dimension. + yedges : dpnp.ndarray of shape (ny+1,) + The bin edges along the second dimension. + + See Also + -------- + :obj:`dpnp.histogram` : 1D histogram + :obj:`dpnp.histogramdd` : Multidimensional histogram + + Notes + ----- + When `density` is ``True``, then the returned histogram is the sample + density, defined such that the sum over bins of the product + ``bin_value * bin_area`` is 1. + + Please note that the histogram does not follow the Cartesian convention + where `x` values are on the abscissa and `y` values on the ordinate + axis. Rather, `x` is histogrammed along the first dimension of the + array (vertical), and `y` along the second dimension of the array + (horizontal). This ensures compatibility with `histogramdd`. + + Examples + -------- + >>> import dpnp as np + >>> x = np.random.randn(20).astype("float32") + >>> y = np.random.randn(20).astype("float32") + >>> hist, edges_x, edges_y = np.histogram2d(x, y, bins=(4, 3)) + >>> hist.shape + (4, 3) + >>> hist + array([[1., 2., 0.], + [0., 3., 1.], + [1., 4., 1.], + [1., 3., 3.]], dtype=float32) + >>> edges_x.shape + (5,) + >>> edges_x + array([-1.7516936 , -0.96109843, -0.17050326, 0.62009203, 1.4106871 ], + dtype=float32) + >>> edges_y.shape + (4,) + >>> edges_y + array([-2.6604428 , -0.94615364, 0.76813555, 2.4824247 ], dtype=float32) + + Please note, that resulting values of histogram and edges may vary. + + """ + + dpnp.check_supported_arrays_type(x, y) + if weights is not None: + dpnp.check_supported_arrays_type(weights) + + if x.ndim != 1 or y.ndim != 1: + raise ValueError( + f"x and y must be 1-dimensional arrays." + f"Got {x.ndim} and {y.ndim} respectively" + ) + + if len(x) != len(y): + raise ValueError( + f"x and y must have the same length." + f"Got {len(x)} and {len(y)} respectively" + ) + + usm_type, exec_q = get_usm_allocations([x, y, bins, range, weights]) + device = exec_q.sycl_device + + sample_dtype = result_type_for_device([x.dtype, y.dtype], device) + + # Unlike histogramdd histogram2d accepts 1d bins and + # apply it to both dimensions + # at the same moment two elements bins should be interpreted as + # number of bins in each dimension and array-like bins with one element + # is not allowed + if isinstance(bins, Iterable) and len(bins) > 2: + bins = [bins] * 2 + + bins = _histdd_normalize_bins(bins, 2) + bins_dtypes = [sample_dtype] + bins_dtypes += [b.dtype for b in bins if hasattr(b, "dtype")] + + bins_dtype = result_type_for_device(bins_dtypes, device) + hist_dtype = _histdd_hist_dtype(exec_q, weights) + + supported_types = statistics_ext.histogramdd_dtypes() + + sample_dtype, _ = _align_dtypes( + sample_dtype, bins_dtype, hist_dtype, supported_types, device + ) + + sample = dpnp.empty_like( + x, shape=x.shape + (2,), dtype=sample_dtype, usm_type=usm_type + ) + sample[:, 0] = x + sample[:, 1] = y + + hist, edges = histogramdd( + sample, bins=bins, range=range, density=density, weights=weights + ) + return hist, edges[0], edges[1] + + def _histdd_validate_bins(bins): for i, b in enumerate(bins): if numpy.ndim(b) == 0: @@ -873,9 +1027,7 @@ def _histdd_hist_dtype(queue, weights): # hist_dtype is either float or complex, so it is ok # to calculate it as result type between default_float and # weights.dtype - hist_dtype = _result_type_for_device( - [hist_dtype, weights.dtype], device - ) + hist_dtype = result_type_for_device([hist_dtype, weights.dtype], device) return hist_dtype @@ -886,7 +1038,7 @@ def _histdd_sample_dtype(queue, sample, bin_edges_list): dtypes_ = [bin_edges.dtype for bin_edges in bin_edges_list] dtypes_.append(sample.dtype) - return _result_type_for_device(dtypes_, device) + return result_type_for_device(dtypes_, device) def _histdd_supported_dtypes(sample, bin_edges_list, weights): @@ -918,7 +1070,7 @@ def _histdd_extract_arrays(sample, weights, bins): return all_arrays -def histogramdd(sample, bins=10, range=None, weights=None, density=False): +def histogramdd(sample, bins=10, range=None, density=None, weights=None): """ Compute the multidimensional histogram of some data. @@ -936,30 +1088,33 @@ def histogramdd(sample, bins=10, range=None, weights=None, density=False): * The number of bins for each dimension (nx, ny, ... =bins) * The number of bins for all dimensions (nx=ny=...=bins). - Default: ``10`` + Default: ``10``. range : {None, sequence}, optional A sequence of length D, each an optional (lower, upper) tuple giving the outer bin edges to be used if the edges are not given explicitly in `bins`. - An entry of None in the sequence results in the minimum and maximum + An entry of ``None`` in the sequence results in the minimum and maximum values being used for the corresponding dimension. - None is equivalent to passing a tuple of D None values. - - Default: ``None`` - weights : {dpnp.ndarray, usm_ndarray}, optional - An (N,)-shaped array of values `w_i` weighing each sample - `(x_i, y_i, z_i, ...)`. - Weights are normalized to 1 if density is True. If density is False, - the values of the returned histogram are equal to the sum of the - weights belonging to the samples falling into each bin. + ``None`` is equivalent to passing a tuple of D ``None`` values. - Default: ``None`` - density : bool, optional - If ``False``, the default, returns the number of samples in each bin. + Default: ``None``. + density : {None, bool}, optional + If ``False`` or ``None``, the default, returns the number of + samples in each bin. If ``True``, returns the probability *density* function at the bin, ``bin_count / sample_count / bin_volume``. - Default: ``False`` + Default: ``None``. + weights : {None, dpnp.ndarray, usm_ndarray}, optional + An (N,)-shaped array of values `w_i` weighing each sample + `(x_i, y_i, z_i, ...)`. + Weights are normalized to ``1`` if density is ``True``. + If density is ``False``, the values of the returned histogram + are equal to the sum of the weights belonging to the samples + falling into each bin. + If ``None`` all samples are assigned a weight of ``1``. + + Default: ``None``. Returns ------- @@ -993,7 +1148,7 @@ def histogramdd(sample, bins=10, range=None, weights=None, density=False): elif sample.ndim > 2: raise ValueError("sample must have no more than 2 dimensions") - ndim = sample.shape[1] if sample.size > 0 else 1 + ndim = sample.shape[1] _arrays = _histdd_extract_arrays(sample, weights, bins) usm_type, queue = get_usm_allocations(_arrays) diff --git a/dpnp/dpnp_utils/dpnp_utils_common.py b/dpnp/dpnp_utils/dpnp_utils_common.py index 087d916d0e3..ea8de9793f0 100644 --- a/dpnp/dpnp_utils/dpnp_utils_common.py +++ b/dpnp/dpnp_utils/dpnp_utils_common.py @@ -54,12 +54,25 @@ def to_supported_dtypes(dtypes, supported_types, device): def is_castable(dtype, stype): return _can_cast(dtype, stype, has_fp16, has_fp64) + if not isinstance(supported_types, Iterable): + supported_types = (supported_types,) # pragma: no cover + + if isinstance(dtypes, Iterable): + sdtypes_elem = supported_types[0] + if not isinstance(sdtypes_elem, Iterable): + raise ValueError( # pragma: no cover + "Input and supported types must have the same length" + ) + + typ = type(sdtypes_elem) + dtypes = typ(dtypes) + if dtypes in supported_types: return dtypes for stypes in supported_types: if not isinstance(dtypes, Iterable): - if isinstance(stypes, Iterable): + if isinstance(stypes, Iterable): # pragma: no cover raise ValueError( "Input and supported types must have the same length" ) @@ -67,7 +80,9 @@ def is_castable(dtype, stype): if is_castable(dtypes, stypes): return stypes else: - if not isinstance(stypes, Iterable) or len(dtypes) != len(stypes): + if not isinstance(stypes, Iterable) or len(dtypes) != len( + stypes + ): # pragma: no cover raise ValueError( "Input and supported types must have the same length" ) @@ -78,4 +93,7 @@ def is_castable(dtype, stype): ): return stypes - return None # pragma: no cover + if not isinstance(dtypes, Iterable): # pragma: no cover + return None # pragma: no cover + + return (None,) * len(dtypes) # pragma: no cover diff --git a/dpnp/tests/test_histogram.py b/dpnp/tests/test_histogram.py index 6a4e8abceb6..6d9f53ed920 100644 --- a/dpnp/tests/test_histogram.py +++ b/dpnp/tests/test_histogram.py @@ -371,6 +371,12 @@ def test_invalid_range(self, xp): with assert_raises_regex(ValueError, "max must be larger than"): xp.histogram(vals, range=[0.1, 0.01]) + @pytest.mark.parametrize("xp", [numpy, dpnp]) + def test_invalid_range_size(self, xp): + # range shape must be [2] + vals = xp.linspace(0.0, 1.0, num=100) + assert_raises(ValueError, xp.histogram, vals, range=[[0, 1, 2]]) + @pytest.mark.parametrize("xp", [numpy, dpnp]) @pytest.mark.parametrize("inf_val", [-numpy.inf, numpy.inf]) def test_infinite_edge(self, xp, inf_val): @@ -719,6 +725,18 @@ def test_invalid_range(self, xp): with assert_raises_regex(ValueError, "max must be larger than"): xp.histogramdd(vals, range=[[0.1, 0.01]]) + @pytest.mark.parametrize("xp", [numpy, dpnp]) + def test_invalid_range_dims(self, xp): + # start of range must be < end of range + vals = xp.linspace(0.0, 1.0, num=100) + assert_raises(ValueError, xp.histogramdd, vals, range=[[0, 1]] * 2) + + @pytest.mark.parametrize("xp", [numpy, dpnp]) + def test_invalid_range_size(self, xp): + # range shape must be [2, 2] + x = y = xp.linspace(0.0, 1.0, num=100) + assert_raises(ValueError, xp.histogramdd, x, y, range=[[0, 1, 2]]) + @pytest.mark.parametrize("xp", [numpy, dpnp]) @pytest.mark.parametrize("inf_val", [-numpy.inf, numpy.inf]) def test_infinite_edge(self, xp, inf_val): @@ -798,3 +816,245 @@ def test_different_bins_amount(self, bins_count): result_hist, result_edges = dpnp.histogramdd(iv, bins=[bins_count]) assert_array_equal(result_hist, expected_hist) assert_allclose(result_edges, expected_edges) + + +class TestHistogram2d: + @pytest.mark.usefixtures("suppress_complex_warning") + @pytest.mark.parametrize( + "dtype", get_all_dtypes(no_none=True, no_bool=True) + ) + def test_rand_data(self, dtype): + n = 100 + x, y = numpy.random.rand(2, n).astype(dtype=dtype) + ix = dpnp.array(x, dtype=dtype) + iy = dpnp.array(y, dtype=dtype) + + expected_hist, _, _ = numpy.histogram2d(x, y) + result_hist, _, _ = dpnp.histogram2d(ix, iy) + assert_array_equal(result_hist, expected_hist) + + @pytest.mark.usefixtures("suppress_complex_warning") + @pytest.mark.parametrize( + "dtype", get_all_dtypes(no_none=True, no_bool=True) + ) + def test_linspace_data(self, dtype): + n = 100 + x, y = numpy.linspace(0, 10, 2 * n, dtype=dtype).reshape(2, n) + ix = dpnp.array(x) + iy = dpnp.array(y) + + expected_hist, _, _ = numpy.histogram2d(x, y) + result_hist, _, _ = dpnp.histogram2d(ix, iy) + assert_array_equal(result_hist, expected_hist) + + @pytest.mark.parametrize("xp", [numpy, dpnp]) + def test_invalid_bin_float(self, xp): + x = y = xp.array([[1, 2]]) + assert_raises(ValueError, xp.histogram2d, x, y, bins=0.1) + + @pytest.mark.parametrize("xp", [numpy, dpnp]) + def test_invalid_bin_2d_array(self, xp): + x = y = xp.array([[1, 2]]) + assert_raises(ValueError, xp.histogram2d, x, y, bins=[10, 10, 10]) + + @pytest.mark.parametrize( + "bins", + [ + 11, + [11] * 2, + [[0, 20, 40, 60, 80, 100]] * 2, + [[0, 20, 40, 60, 80, 300]] * 2, + ], + ) + def test_bins(self, bins): + n = 100 + dims = 2 + x, y = numpy.arange(n * dims).reshape(dims, n) + ix = dpnp.array(x) + iy = dpnp.array(y) + + bins_dpnp = bins + if isinstance(bins, list): + if isinstance(bins[0], list): + bins = [numpy.array(b) for b in bins] + bins_dpnp = [dpnp.array(b) for b in bins] + + expected_hist, expected_edges_x, expected_edges_y = numpy.histogram2d( + x, y, bins + ) + result_hist, result_edges_x, result_edges_y = dpnp.histogram2d( + ix, iy, bins_dpnp + ) + assert_allclose(result_hist, expected_hist) + assert_allclose(result_edges_x, expected_edges_x) + assert_allclose(result_edges_y, expected_edges_y) + + def test_no_side_effects(self): + x = dpnp.array([1.3, 2.5, 2.3]) + y = dpnp.array([2.3, 3.5, 4.3]) + copy_x = x.copy() + copy_y = y.copy() + + # check that ensures that values passed to ``histogram2d`` are unchanged + _, _, _ = dpnp.histogram2d(x, y) + assert (x == copy_x).all() + assert (y == copy_y).all() + + def test_empty(self): + x = y = numpy.array([]) + ix = dpnp.array(x) + iy = dpnp.array(y) + + expected_hist, expected_edges_x, expected_edges_y = numpy.histogram2d( + x, y + ) + result_hist, result_edges_x, result_edges_y = dpnp.histogram2d(ix, iy) + + assert_allclose(result_hist, expected_hist) + assert_allclose(result_edges_x, expected_edges_x) + assert_allclose(result_edges_y, expected_edges_y) + + def test_0d(self): + x = dpnp.array(1) + y = dpnp.array(2) + + assert_raises(ValueError, dpnp.histogram2d, x, y) + + def test_2d(self): + x = dpnp.ones((10, 10)) + y = dpnp.ones((10, 10)) + + assert_raises(ValueError, dpnp.histogram2d, x, y) + + @pytest.mark.parametrize("xp", [numpy, dpnp]) + def test_finite_range(self, xp): + x = y = xp.linspace(0.0, 1.0, num=100) + + # normal ranges should be finite + _, _, _ = xp.histogram2d(x, y, range=[[0.25, 0.75]] * 2) + assert_raises( + ValueError, xp.histogram2d, x, y, range=[[xp.nan, 0.75]] * 2 + ) + assert_raises( + ValueError, xp.histogram2d, x, y, range=[[0.25, xp.inf]] * 2 + ) + + @pytest.mark.parametrize("xp", [numpy, dpnp]) + def test_invalid_range(self, xp): + # start of range must be < end of range + x = y = xp.linspace(0.0, 1.0, num=100) + with assert_raises_regex(ValueError, "max must be larger than"): + xp.histogram2d(x, y, range=[[0.1, 0.01]] * 2) + + @pytest.mark.parametrize("xp", [numpy, dpnp]) + def test_invalid_range_dims(self, xp): + # range shape must be [2, 2] + x = y = xp.linspace(0.0, 1.0, num=100) + assert_raises(ValueError, xp.histogram2d, x, y, range=[[0, 1]]) + + @pytest.mark.parametrize("xp", [numpy, dpnp]) + def test_invalid_range_size(self, xp): + # range shape must be [2, 2] + x = y = xp.linspace(0.0, 1.0, num=100) + assert_raises(ValueError, xp.histogram2d, x, y, range=[[0, 1, 2]] * 2) + + @pytest.mark.parametrize("xp", [numpy, dpnp]) + @pytest.mark.parametrize("inf_val", [-numpy.inf, numpy.inf]) + def test_infinite_edge(self, xp, inf_val): + x = y = xp.array([0.5, 1.5, inf_val]) + min, max = x.min(), x.max() + + # both first and last ranges must be finite + with assert_raises_regex( + ValueError, + f"autodetected range of \\[{min}, {max}\\] is not finite", + ): + xp.histogram2d(x, y) + + @pytest.mark.parametrize("xp", [numpy, dpnp]) + def test_unsigned_monotonicity_check(self, xp): + # bins must increase monotonically when bins contain unsigned values + x = y = xp.array([2]) + bins = [xp.array([1, 3, 1], dtype="uint64")] * 2 + with assert_raises(ValueError): + xp.histogram2d(x, y, bins=bins) + + def test_nan_values(self): + one_nan = numpy.array([0, 1, numpy.nan]) + all_nan = numpy.array([numpy.nan, numpy.nan]) + + ione_nan = dpnp.array(one_nan) + iall_nan = dpnp.array(all_nan) + + # NaN is not counted + expected_hist, expected_edges_x, expected_edges_y = numpy.histogram2d( + one_nan, one_nan, bins=[[0, 1]] * 2 + ) + result_hist, result_edges_x, result_edges_y = dpnp.histogram2d( + ione_nan, ione_nan, bins=[[0, 1]] * 2 + ) + assert_allclose(result_hist, expected_hist) + assert_allclose(result_edges_x, expected_edges_x) + assert_allclose(result_edges_y, expected_edges_y) + + # NaN is not counted + expected_hist, expected_edges_x, expected_edges_y = numpy.histogram2d( + all_nan, all_nan, bins=[[0, 1]] * 2 + ) + result_hist, result_edges_x, result_edges_y = dpnp.histogram2d( + iall_nan, iall_nan, bins=[[0, 1]] * 2 + ) + assert_allclose(result_hist, expected_hist) + assert_allclose(result_edges_x, expected_edges_x) + assert_allclose(result_edges_y, expected_edges_y) + + def test_bins_another_sycl_queue(self): + x = y = dpnp.arange(7, 12, sycl_queue=dpctl.SyclQueue()) + bins = dpnp.arange(4, sycl_queue=dpctl.SyclQueue()) + with assert_raises(ValueError): + dpnp.histogram2d(x, y, bins=[bins] * 2) + + def test_sample_array_like(self): + x = y = [0, 1, 2, 3, 4] + with assert_raises(TypeError): + dpnp.histogram2d(x, y) + + def test_weights_array_like(self): + x = y = dpnp.arange(5) + w = [1, 2, 3, 4, 5] + with assert_raises(TypeError): + dpnp.histogram2d(x, y, weights=w) + + def test_weights_another_sycl_queue(self): + x = y = dpnp.arange(5, sycl_queue=dpctl.SyclQueue()) + w = dpnp.arange(7, 12, sycl_queue=dpctl.SyclQueue()) + with assert_raises(ValueError): + dpnp.histogram2d(x, y, weights=w) + + @pytest.mark.parametrize("xp", [numpy, dpnp]) + def test_size_mismatch(self, xp): + # x and y must have same shape + x = xp.linspace(0.0, 1.0, num=10) + y = xp.linspace(0.0, 1.0, num=20) + assert_raises(ValueError, xp.histogram2d, x, y) + + @pytest.mark.parametrize( + "bins_count", + [10, 10**2, 10**3], + ) + def test_different_bins_amount(self, bins_count): + x, y = numpy.linspace( + 0, bins_count, 2 * bins_count, dtype=numpy.float32 + ).reshape(2, bins_count) + ix = dpnp.array(x) + iy = dpnp.array(y) + + expected_hist, expected_edges_x, expected_edges_y = numpy.histogram2d( + x, y, bins=bins_count + ) + result_hist, result_edges_x, result_edges_y = dpnp.histogram2d( + ix, iy, bins=bins_count + ) + assert_array_equal(result_hist, expected_hist) + assert_allclose(result_edges_x, expected_edges_x, rtol=1e-6) + assert_allclose(result_edges_y, expected_edges_y, rtol=1e-6) diff --git a/dpnp/tests/test_sycl_queue.py b/dpnp/tests/test_sycl_queue.py index 7e3218b2b41..6484699b26d 100644 --- a/dpnp/tests/test_sycl_queue.py +++ b/dpnp/tests/test_sycl_queue.py @@ -2699,6 +2699,39 @@ def test_histogram(weights, device): assert_sycl_queue_equal(edges_queue, iv.sycl_queue) +@pytest.mark.parametrize("weights", [None, numpy.arange(7, 12)]) +@pytest.mark.parametrize( + "device", + valid_devices, + ids=[device.filter_string for device in valid_devices], +) +def test_histogram2d(weights, device): + x = numpy.arange(5) + y = numpy.arange(5) + w = weights + + ix = dpnp.array(x, device=device) + iy = dpnp.array(y, device=device) + iw = None if weights is None else dpnp.array(w, sycl_queue=ix.sycl_queue) + + expected_hist, expected_edges_x, expected_edges_y = numpy.histogram2d( + x, y, weights=w + ) + result_hist, result_edges_x, result_edges_y = dpnp.histogram2d( + ix, iy, weights=iw + ) + assert_array_equal(result_hist, expected_hist) + assert_dtype_allclose(result_edges_x, expected_edges_x) + assert_dtype_allclose(result_edges_y, expected_edges_y) + + hist_queue = result_hist.sycl_queue + edges_x_queue = result_edges_x.sycl_queue + edges_y_queue = result_edges_y.sycl_queue + assert_sycl_queue_equal(hist_queue, ix.sycl_queue) + assert_sycl_queue_equal(edges_x_queue, ix.sycl_queue) + assert_sycl_queue_equal(edges_y_queue, ix.sycl_queue) + + @pytest.mark.parametrize("weights", [None, numpy.arange(7, 12)]) @pytest.mark.parametrize( "device", diff --git a/dpnp/tests/test_usm_type.py b/dpnp/tests/test_usm_type.py index f80b4bbedfd..1b5c8970d6d 100644 --- a/dpnp/tests/test_usm_type.py +++ b/dpnp/tests/test_usm_type.py @@ -1623,6 +1623,29 @@ def test_histogram(usm_type_v, usm_type_w): assert edges.usm_type == du.get_coerced_usm_type([usm_type_v, usm_type_w]) +@pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types) +@pytest.mark.parametrize("usm_type_y", list_of_usm_types, ids=list_of_usm_types) +@pytest.mark.parametrize("usm_type_w", list_of_usm_types, ids=list_of_usm_types) +def test_histogram2d(usm_type_x, usm_type_y, usm_type_w): + x = dp.arange(5, usm_type=usm_type_x) + y = dp.arange(5, usm_type=usm_type_y) + w = dp.arange(7, 12, usm_type=usm_type_w) + + hist, edges_x, edges_y = dp.histogram2d(x, y, weights=w) + assert x.usm_type == usm_type_x + assert y.usm_type == usm_type_y + assert w.usm_type == usm_type_w + assert hist.usm_type == du.get_coerced_usm_type( + [usm_type_x, usm_type_y, usm_type_w] + ) + assert edges_x.usm_type == du.get_coerced_usm_type( + [usm_type_x, usm_type_y, usm_type_w] + ) + assert edges_y.usm_type == du.get_coerced_usm_type( + [usm_type_x, usm_type_y, usm_type_w] + ) + + @pytest.mark.parametrize("usm_type_v", list_of_usm_types, ids=list_of_usm_types) @pytest.mark.parametrize("usm_type_w", list_of_usm_types, ids=list_of_usm_types) def test_bincount(usm_type_v, usm_type_w): diff --git a/dpnp/tests/third_party/cupy/statistics_tests/test_histogram.py b/dpnp/tests/third_party/cupy/statistics_tests/test_histogram.py index ff9480cf8fb..960d7cb07b7 100644 --- a/dpnp/tests/third_party/cupy/statistics_tests/test_histogram.py +++ b/dpnp/tests/third_party/cupy/statistics_tests/test_histogram.py @@ -550,13 +550,11 @@ def test_histogramdd_invalid_range(self): y, bin_edges = xp.histogramdd(x, range=r) -@pytest.mark.skip("histogram2d() is not implemented yet") -# @pytest.mark.skip(reason="XXX: NP2.0: histogram2d dtype") @testing.parameterize( *testing.product( { "weights": [None, 1, 2], - "weights_dtype": [numpy.int32, numpy.float64], + "weights_dtype": [numpy.int32, numpy.float32], "density": [True, False], "bins": [10, (8, 16), (16, 8), "array_list", "array"], "range": [None, ((20, 50), (10, 100))], @@ -566,7 +564,11 @@ def test_histogramdd_invalid_range(self): class TestHistogram2d: @testing.for_all_dtypes(no_bool=True, no_complex=True) - @testing.numpy_cupy_allclose(atol=1e-2, rtol=1e-7) + @testing.numpy_cupy_allclose( + atol=1e-2, + rtol=1e-7, + type_check=has_support_aspect64() and numpy_version() < "2.0.0", + ) def test_histogram2d(self, xp, dtype): x = testing.shaped_random((100,), xp, dtype, scale=100) y = testing.shaped_random((100,), xp, dtype, scale=100) @@ -592,7 +594,6 @@ def test_histogram2d(self, xp, dtype): return y, edges0, edges1 -@pytest.mark.skip("histogram2d() is not implemented yet") class TestHistogram2dErrors(unittest.TestCase): def test_histogram2d_disallow_arraylike_bins(self):