From 3b3a233b35ae1a9d7fd7e29c7c1eb6a397c2759a Mon Sep 17 00:00:00 2001 From: Alexander Kalistratov Date: Fri, 3 Jan 2025 19:01:30 +0100 Subject: [PATCH 1/8] initial --- dpnp/dpnp_iface_histograms.py | 162 ++++++++++++++++++++++++++++++++++ 1 file changed, 162 insertions(+) diff --git a/dpnp/dpnp_iface_histograms.py b/dpnp/dpnp_iface_histograms.py index adf44a5d535..6fb49cdd8b3 100644 --- a/dpnp/dpnp_iface_histograms.py +++ b/dpnp/dpnp_iface_histograms.py @@ -57,6 +57,7 @@ "digitize", "histogram", "histogram_bin_edges", + "histogram2d" "histogramdd", ] @@ -751,6 +752,167 @@ def histogram_bin_edges(a, bins=10, range=None, weights=None): return bin_edges +def histogram2d(x, y, bins=10, range=None, density=None, weights=None): + """ + Compute the bi-dimensional histogram of two data samples. + + Parameters + ---------- + x : {dpnp.ndarray, usm_ndarray} of shape (N,) + An array containing the x coordinates of the points to be + histogrammed. + y : {dpnp.ndarray, usm_ndarray} of shape (N,) + An array containing the y coordinates of the points to be + histogrammed. + bins : {int, list of dpnp.ndarray, list of usm_ndarray, sequence of scalars}, optional + The bin specification: + + * If int, the number of bins for the two dimensions (nx=ny=bins). + * If array, the bin edges for the two dimensions + (x_edges=y_edges=bins). + * If [int, int], the number of bins in each dimension + (nx, ny = bins). + * If [array, array], the bin edges in each dimension + (x_edges, y_edges = bins). + * A combination [int, array] or [array, int], where int + is the number of bins and array is the bin edges. + + range : {dpnp.ndarray, usm_ndarray} of shape (2,2), optional + The leftmost and rightmost edges of the bins along each dimension + (if not specified explicitly in the `bins` parameters): + ``[[xmin, xmax], [ymin, ymax]]``. All values outside of this range + will be considered outliers and not tallied in the histogram. + density : bool, optional + If ``False``, the default, returns the number of samples in each bin. + If ``True``, returns the probability *density* function at the bin, + ``bin_count / sample_count / bin_area``. + weights : {dpnp.ndarray, usm_ndarray} of shape(N,), optional + An array of values ``w_i`` weighing each sample ``(x_i, y_i)``. + Weights are normalized to 1 if `density` is True. If `density` is + False, the values of the returned histogram are equal to the sum of + the weights belonging to the samples falling into each bin. + + Returns + ------- + H : ndarray, shape(nx, ny) + The bi-dimensional histogram of samples `x` and `y`. Values in `x` + are histogrammed along the first dimension and values in `y` are + histogrammed along the second dimension. + xedges : dpnp.ndarray, shape(nx+1,) + The bin edges along the first dimension. + yedges : dpnp.ndarray, shape(ny+1,) + The bin edges along the second dimension. + + See Also + -------- + :obj:`dpnp.histogram` : 1D histogram + :obj:`dpnp.histogramdd` : Multidimensional histogram + + Notes + ----- + When `density` is True, then the returned histogram is the sample + density, defined such that the sum over bins of the product + ``bin_value * bin_area`` is 1. + + Please note that the histogram does not follow the Cartesian convention + where `x` values are on the abscissa and `y` values on the ordinate + axis. Rather, `x` is histogrammed along the first dimension of the + array (vertical), and `y` along the second dimension of the array + (horizontal). This ensures compatibility with `histogramdd`. + + Examples + -------- + >>> import numpy as np + >>> from matplotlib.image import NonUniformImage + >>> import matplotlib.pyplot as plt + + Construct a 2-D histogram with variable bin width. First define the bin + edges: + + >>> xedges = [0, 1, 3, 5] + >>> yedges = [0, 2, 3, 4, 6] + + Next we create a histogram H with random bin content: + + >>> x = np.random.normal(2, 1, 100) + >>> y = np.random.normal(1, 1, 100) + >>> H, xedges, yedges = np.histogram2d(x, y, bins=(xedges, yedges)) + >>> # Histogram does not follow Cartesian convention (see Notes), + >>> # therefore transpose H for visualization purposes. + >>> H = H.T + + :func:`imshow ` can only display square bins: + + >>> fig = plt.figure(figsize=(7, 3)) + >>> ax = fig.add_subplot(131, title='imshow: square bins') + >>> plt.imshow(H, interpolation='nearest', origin='lower', + ... extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]]) + + + :func:`pcolormesh ` can display actual edges: + + >>> ax = fig.add_subplot(132, title='pcolormesh: actual edges', + ... aspect='equal') + >>> X, Y = np.meshgrid(xedges, yedges) + >>> ax.pcolormesh(X, Y, H) + + + :class:`NonUniformImage ` can be used to + display actual bin edges with interpolation: + + >>> ax = fig.add_subplot(133, title='NonUniformImage: interpolated', + ... aspect='equal', xlim=xedges[[0, -1]], ylim=yedges[[0, -1]]) + >>> im = NonUniformImage(ax, interpolation='bilinear') + >>> xcenters = (xedges[:-1] + xedges[1:]) / 2 + >>> ycenters = (yedges[:-1] + yedges[1:]) / 2 + >>> im.set_data(xcenters, ycenters, H) + >>> ax.add_image(im) + >>> plt.show() + + It is also possible to construct a 2-D histogram without specifying bin + edges: + + >>> # Generate non-symmetric test data + >>> n = 10000 + >>> x = np.linspace(1, 100, n) + >>> y = 2*np.log(x) + np.random.rand(n) - 0.5 + >>> # Compute 2d histogram. Note the order of x/y and xedges/yedges + >>> H, yedges, xedges = np.histogram2d(y, x, bins=20) + + Now we can plot the histogram using + :func:`pcolormesh `, and a + :func:`hexbin ` for comparison. + + >>> # Plot histogram using pcolormesh + >>> fig, (ax1, ax2) = plt.subplots(ncols=2, sharey=True) + >>> ax1.pcolormesh(xedges, yedges, H, cmap='rainbow') + >>> ax1.plot(x, 2*np.log(x), 'k-') + >>> ax1.set_xlim(x.min(), x.max()) + >>> ax1.set_ylim(y.min(), y.max()) + >>> ax1.set_xlabel('x') + >>> ax1.set_ylabel('y') + >>> ax1.set_title('histogram2d') + >>> ax1.grid() + + >>> # Create hexbin plot for comparison + >>> ax2.hexbin(x, y, gridsize=20, cmap='rainbow') + >>> ax2.plot(x, 2*np.log(x), 'k-') + >>> ax2.set_title('hexbin') + >>> ax2.set_xlim(x.min(), x.max()) + >>> ax2.set_xlabel('x') + >>> ax2.grid() + + >>> plt.show() + """ + + if len(x) != len(y): + raise ValueError(f'x and y must have the same length. Got {len(x)} and {len(y)} respectively') + + + hist, edges = histogramdd([x, y], bins, range, density, weights) + return hist, edges[0], edges[1] + + def _histdd_validate_bins(bins): for i, b in enumerate(bins): if numpy.ndim(b) == 0: From a692ff559fd884ff7741197ec249c217d60a91e5 Mon Sep 17 00:00:00 2001 From: Alexander Kalistratov Date: Tue, 14 Jan 2025 17:07:49 +0100 Subject: [PATCH 2/8] Implementation of histogram2d --- dpnp/dpnp_iface_histograms.py | 161 +++++------ dpnp/tests/test_histogram.py | 260 ++++++++++++++++++ dpnp/tests/test_sycl_queue.py | 33 +++ dpnp/tests/test_usm_type.py | 23 ++ .../cupy/statistics_tests/test_histogram.py | 11 +- 5 files changed, 395 insertions(+), 93 deletions(-) diff --git a/dpnp/dpnp_iface_histograms.py b/dpnp/dpnp_iface_histograms.py index 6fb49cdd8b3..f96a91fb0e9 100644 --- a/dpnp/dpnp_iface_histograms.py +++ b/dpnp/dpnp_iface_histograms.py @@ -57,7 +57,7 @@ "digitize", "histogram", "histogram_bin_edges", - "histogram2d" + "histogram2d", "histogramdd", ] @@ -139,6 +139,9 @@ def _is_finite(a): return numpy.isfinite(a) if range is not None: + if len(range) != 2: + raise ValueError("range argument must consist of 2 elements.") + first_edge, last_edge = range if first_edge > last_edge: raise ValueError("max must be larger than min in range parameter.") @@ -753,6 +756,7 @@ def histogram_bin_edges(a, bins=10, range=None, weights=None): def histogram2d(x, y, bins=10, range=None, density=None, weights=None): + # pylint: disable=line-too-long """ Compute the bi-dimensional histogram of two data samples. @@ -764,8 +768,10 @@ def histogram2d(x, y, bins=10, range=None, density=None, weights=None): y : {dpnp.ndarray, usm_ndarray} of shape (N,) An array containing the y coordinates of the points to be histogrammed. - bins : {int, list of dpnp.ndarray, list of usm_ndarray, sequence of scalars}, optional - The bin specification: + bins : {int, list of dpnp.ndarray or usm_ndarray, sequence of scalars}, optional + Histogram bins. + + The bins specification: * If int, the number of bins for the two dimensions (nx=ny=bins). * If array, the bin edges for the two dimensions @@ -822,94 +828,73 @@ def histogram2d(x, y, bins=10, range=None, density=None, weights=None): Examples -------- - >>> import numpy as np - >>> from matplotlib.image import NonUniformImage - >>> import matplotlib.pyplot as plt - - Construct a 2-D histogram with variable bin width. First define the bin - edges: - - >>> xedges = [0, 1, 3, 5] - >>> yedges = [0, 2, 3, 4, 6] - - Next we create a histogram H with random bin content: - - >>> x = np.random.normal(2, 1, 100) - >>> y = np.random.normal(1, 1, 100) - >>> H, xedges, yedges = np.histogram2d(x, y, bins=(xedges, yedges)) - >>> # Histogram does not follow Cartesian convention (see Notes), - >>> # therefore transpose H for visualization purposes. - >>> H = H.T - - :func:`imshow ` can only display square bins: - - >>> fig = plt.figure(figsize=(7, 3)) - >>> ax = fig.add_subplot(131, title='imshow: square bins') - >>> plt.imshow(H, interpolation='nearest', origin='lower', - ... extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]]) - - - :func:`pcolormesh ` can display actual edges: - - >>> ax = fig.add_subplot(132, title='pcolormesh: actual edges', - ... aspect='equal') - >>> X, Y = np.meshgrid(xedges, yedges) - >>> ax.pcolormesh(X, Y, H) - - - :class:`NonUniformImage ` can be used to - display actual bin edges with interpolation: - - >>> ax = fig.add_subplot(133, title='NonUniformImage: interpolated', - ... aspect='equal', xlim=xedges[[0, -1]], ylim=yedges[[0, -1]]) - >>> im = NonUniformImage(ax, interpolation='bilinear') - >>> xcenters = (xedges[:-1] + xedges[1:]) / 2 - >>> ycenters = (yedges[:-1] + yedges[1:]) / 2 - >>> im.set_data(xcenters, ycenters, H) - >>> ax.add_image(im) - >>> plt.show() - - It is also possible to construct a 2-D histogram without specifying bin - edges: - - >>> # Generate non-symmetric test data - >>> n = 10000 - >>> x = np.linspace(1, 100, n) - >>> y = 2*np.log(x) + np.random.rand(n) - 0.5 - >>> # Compute 2d histogram. Note the order of x/y and xedges/yedges - >>> H, yedges, xedges = np.histogram2d(y, x, bins=20) - - Now we can plot the histogram using - :func:`pcolormesh `, and a - :func:`hexbin ` for comparison. - - >>> # Plot histogram using pcolormesh - >>> fig, (ax1, ax2) = plt.subplots(ncols=2, sharey=True) - >>> ax1.pcolormesh(xedges, yedges, H, cmap='rainbow') - >>> ax1.plot(x, 2*np.log(x), 'k-') - >>> ax1.set_xlim(x.min(), x.max()) - >>> ax1.set_ylim(y.min(), y.max()) - >>> ax1.set_xlabel('x') - >>> ax1.set_ylabel('y') - >>> ax1.set_title('histogram2d') - >>> ax1.grid() - - >>> # Create hexbin plot for comparison - >>> ax2.hexbin(x, y, gridsize=20, cmap='rainbow') - >>> ax2.plot(x, 2*np.log(x), 'k-') - >>> ax2.set_title('hexbin') - >>> ax2.set_xlim(x.min(), x.max()) - >>> ax2.set_xlabel('x') - >>> ax2.grid() - - >>> plt.show() + >>> import dpnp as np + >>> x = np.random.randn(20) + >>> y = np.random.randn(20) + >>> hist, edges_x, edges_y = np.histogram2d(x, y, bins=(4, 3)) + >>> hist + [[1. 0. 0.] + [0. 0. 0.] + [5. 6. 4.] + [1. 2. 1.]] + >>> edges_x + [-5.6575713 -3.5574734 -1.4573755 0.6427226 2.74282 ] + >>> edges_y + [-1.1889046 -0.07263839 1.0436279 2.159894 ] """ + # pylint: enable=line-too-long + + dpnp.check_supported_arrays_type(x, y) + if weights is not None: + dpnp.check_supported_arrays_type(weights) + + if x.ndim != 1 or y.ndim != 1: + raise ValueError( + f"x and y must be 1-dimensional arrays." + f"Got {x.ndim} and {y.ndim} respectively" + ) if len(x) != len(y): - raise ValueError(f'x and y must have the same length. Got {len(x)} and {len(y)} respectively') + raise ValueError( + f"x and y must have the same length." + f"Got {len(x)} and {len(y)} respectively" + ) + + usm_type, exec_q = get_usm_allocations([x, y, bins, range, weights]) + device = exec_q.sycl_device + + sample_dtype = _result_type_for_device([x.dtype, y.dtype], device) + + # Unlike histogramdd histogram2d accepts 1d bins and + # apply it to both dimensions + # at the same moment two elements bins should be interpreted as + # number of bins in each dimension and array-like bins with one element + # is not allowed + if isinstance(bins, Iterable) and len(bins) > 2: + bins = [bins] * 2 + + bins = _histdd_normalize_bins(bins, 2) + bins_dtypes = [sample_dtype] + bins_dtypes += [b.dtype for b in bins if hasattr(b, "dtype")] + + bins_dtype = _result_type_for_device(bins_dtypes, device) + hist_dtype = _histdd_hist_dtype(exec_q, weights) + supported_types = statistics_ext.histogramdd_dtypes() + + sample_dtype, _ = _align_dtypes( + sample_dtype, bins_dtype, hist_dtype, supported_types, device + ) - hist, edges = histogramdd([x, y], bins, range, density, weights) + sample = dpnp.empty_like( + x, shape=x.shape + (2,), dtype=sample_dtype, usm_type=usm_type + ) + sample[:, 0] = x + sample[:, 1] = y + + hist, edges = histogramdd( + sample, bins=bins, range=range, density=density, weights=weights + ) return hist, edges[0], edges[1] @@ -1080,7 +1065,7 @@ def _histdd_extract_arrays(sample, weights, bins): return all_arrays -def histogramdd(sample, bins=10, range=None, weights=None, density=False): +def histogramdd(sample, bins=10, range=None, density=False, weights=None): """ Compute the multidimensional histogram of some data. @@ -1155,7 +1140,7 @@ def histogramdd(sample, bins=10, range=None, weights=None, density=False): elif sample.ndim > 2: raise ValueError("sample must have no more than 2 dimensions") - ndim = sample.shape[1] if sample.size > 0 else 1 + ndim = sample.shape[1] _arrays = _histdd_extract_arrays(sample, weights, bins) usm_type, queue = get_usm_allocations(_arrays) diff --git a/dpnp/tests/test_histogram.py b/dpnp/tests/test_histogram.py index 6a4e8abceb6..0c698dff129 100644 --- a/dpnp/tests/test_histogram.py +++ b/dpnp/tests/test_histogram.py @@ -371,6 +371,12 @@ def test_invalid_range(self, xp): with assert_raises_regex(ValueError, "max must be larger than"): xp.histogram(vals, range=[0.1, 0.01]) + @pytest.mark.parametrize("xp", [numpy, dpnp]) + def test_invalid_range_size(self, xp): + # range shape must be [2] + vals = xp.linspace(0.0, 1.0, num=100) + assert_raises(ValueError, xp.histogram, vals, range=[[0, 1, 2]]) + @pytest.mark.parametrize("xp", [numpy, dpnp]) @pytest.mark.parametrize("inf_val", [-numpy.inf, numpy.inf]) def test_infinite_edge(self, xp, inf_val): @@ -719,6 +725,18 @@ def test_invalid_range(self, xp): with assert_raises_regex(ValueError, "max must be larger than"): xp.histogramdd(vals, range=[[0.1, 0.01]]) + @pytest.mark.parametrize("xp", [numpy, dpnp]) + def test_invalid_range_dims(self, xp): + # start of range must be < end of range + vals = xp.linspace(0.0, 1.0, num=100) + assert_raises(ValueError, xp.histogramdd, vals, range=[[0, 1]] * 2) + + @pytest.mark.parametrize("xp", [numpy, dpnp]) + def test_invalid_range_size(self, xp): + # range shape must be [2, 2] + x = y = xp.linspace(0.0, 1.0, num=100) + assert_raises(ValueError, xp.histogramdd, x, y, range=[[0, 1, 2]]) + @pytest.mark.parametrize("xp", [numpy, dpnp]) @pytest.mark.parametrize("inf_val", [-numpy.inf, numpy.inf]) def test_infinite_edge(self, xp, inf_val): @@ -798,3 +816,245 @@ def test_different_bins_amount(self, bins_count): result_hist, result_edges = dpnp.histogramdd(iv, bins=[bins_count]) assert_array_equal(result_hist, expected_hist) assert_allclose(result_edges, expected_edges) + + +class TestHistogram2d: + @pytest.mark.usefixtures("suppress_complex_warning") + @pytest.mark.parametrize( + "dtype", get_all_dtypes(no_none=True, no_bool=True) + ) + def test_rand_data(self, dtype): + n = 100 + x, y = numpy.random.rand(2, n).astype(dtype=dtype) + ix = dpnp.array(x, dtype=dtype) + iy = dpnp.array(y, dtype=dtype) + + expected_hist, _, _ = numpy.histogram2d(x, y) + result_hist, _, _ = dpnp.histogram2d(ix, iy) + assert_array_equal(result_hist, expected_hist) + + @pytest.mark.usefixtures("suppress_complex_warning") + @pytest.mark.parametrize( + "dtype", get_all_dtypes(no_none=True, no_bool=True) + ) + def test_linspace_data(self, dtype): + n = 100 + x, y = numpy.linspace(0, 10, 2 * n, dtype=dtype).reshape(2, n) + ix = dpnp.array(x) + iy = dpnp.array(y) + + expected_hist, _, _ = numpy.histogram2d(x, y) + result_hist, _, _ = dpnp.histogram2d(ix, iy) + assert_array_equal(result_hist, expected_hist) + + @pytest.mark.parametrize("xp", [numpy, dpnp]) + def test_invalid_bin_float(self, xp): + x = y = xp.array([[1, 2]]) + assert_raises(ValueError, xp.histogram2d, x, y, bins=0.1) + + @pytest.mark.parametrize("xp", [numpy, dpnp]) + def test_invalid_bin_2d_array(self, xp): + x = y = xp.array([[1, 2]]) + assert_raises(ValueError, xp.histogram2d, x, y, bins=[10, 10, 10]) + + @pytest.mark.parametrize( + "bins", + [ + 11, + [11] * 2, + [[0, 20, 40, 60, 80, 100]] * 2, + [[0, 20, 40, 60, 80, 300]] * 2, + ], + ) + def test_bins(self, bins): + n = 100 + dims = 2 + x, y = numpy.arange(n * dims).reshape(dims, n) + ix = dpnp.array(x) + iy = dpnp.array(y) + + bins_dpnp = bins + if isinstance(bins, list): + if isinstance(bins[0], list): + bins = [numpy.array(b) for b in bins] + bins_dpnp = [dpnp.array(b) for b in bins] + + expected_hist, expected_edges_x, expected_edges_y = numpy.histogram2d( + x, y, bins + ) + result_hist, result_edges_x, result_edges_y = dpnp.histogram2d( + ix, iy, bins_dpnp + ) + assert_allclose(result_hist, expected_hist) + assert_allclose(result_edges_x, expected_edges_x) + assert_allclose(result_edges_y, expected_edges_y) + + def test_no_side_effects(self): + x = dpnp.array([1.3, 2.5, 2.3]) + y = dpnp.array([2.3, 3.5, 4.3]) + copy_x = x.copy() + copy_y = y.copy() + + # check that ensures that values passed to ``histogram2d`` are unchanged + _, _, _ = dpnp.histogram2d(x, y) + assert (x == copy_x).all() + assert (y == copy_y).all() + + def test_empty(self): + x = y = numpy.array([]) + ix = dpnp.array(x) + iy = dpnp.array(y) + + expected_hist, expected_edges_x, expected_edges_y = numpy.histogram2d( + x, y + ) + result_hist, result_edges_x, result_edges_y = dpnp.histogram2d(ix, iy) + + assert_allclose(result_hist, expected_hist) + assert_allclose(result_edges_x, expected_edges_x) + assert_allclose(result_edges_y, expected_edges_y) + + def test_0d(self): + x = dpnp.array(1) + y = dpnp.array(2) + + assert_raises(ValueError, dpnp.histogram2d, x, y) + + def test_2d(self): + x = dpnp.ones((10, 10)) + y = dpnp.ones((10, 10)) + + assert_raises(ValueError, dpnp.histogram2d, x, y) + + @pytest.mark.parametrize("xp", [numpy, dpnp]) + def test_finite_range(self, xp): + x = y = xp.linspace(0.0, 1.0, num=100) + + # normal ranges should be fine + _, _, _ = xp.histogram2d(x, y, range=[[0.25, 0.75]] * 2) + assert_raises( + ValueError, xp.histogram2d, x, y, range=[[xp.nan, 0.75]] * 2 + ) + assert_raises( + ValueError, xp.histogram2d, x, y, range=[[0.25, xp.inf]] * 2 + ) + + @pytest.mark.parametrize("xp", [numpy, dpnp]) + def test_invalid_range(self, xp): + # start of range must be < end of range + x = y = xp.linspace(0.0, 1.0, num=100) + with assert_raises_regex(ValueError, "max must be larger than"): + xp.histogram2d(x, y, range=[[0.1, 0.01]] * 2) + + @pytest.mark.parametrize("xp", [numpy, dpnp]) + def test_invalid_range_dims(self, xp): + # range shape must be [2, 2] + x = y = xp.linspace(0.0, 1.0, num=100) + assert_raises(ValueError, xp.histogram2d, x, y, range=[[0, 1]]) + + @pytest.mark.parametrize("xp", [numpy, dpnp]) + def test_invalid_range_size(self, xp): + # range shape must be [2, 2] + x = y = xp.linspace(0.0, 1.0, num=100) + assert_raises(ValueError, xp.histogram2d, x, y, range=[[0, 1, 2]] * 2) + + @pytest.mark.parametrize("xp", [numpy, dpnp]) + @pytest.mark.parametrize("inf_val", [-numpy.inf, numpy.inf]) + def test_infinite_edge(self, xp, inf_val): + x = y = xp.array([0.5, 1.5, inf_val]) + min, max = x.min(), x.max() + + # both first and last ranges must be finite + with assert_raises_regex( + ValueError, + f"autodetected range of \\[{min}, {max}\\] is not finite", + ): + xp.histogram2d(x, y) + + @pytest.mark.parametrize("xp", [numpy, dpnp]) + def test_unsigned_monotonicity_check(self, xp): + # bins must increase monotonically when bins contain unsigned values + x = y = xp.array([2]) + bins = [xp.array([1, 3, 1], dtype="uint64")] * 2 + with assert_raises(ValueError): + xp.histogram2d(x, y, bins=bins) + + def test_nan_values(self): + one_nan = numpy.array([0, 1, numpy.nan]) + all_nan = numpy.array([numpy.nan, numpy.nan]) + + ione_nan = dpnp.array(one_nan) + iall_nan = dpnp.array(all_nan) + + # NaN is not counted + expected_hist, expected_edges_x, expected_edges_y = numpy.histogram2d( + one_nan, one_nan, bins=[[0, 1]] * 2 + ) + result_hist, result_edges_x, result_edges_y = dpnp.histogram2d( + ione_nan, ione_nan, bins=[[0, 1]] * 2 + ) + assert_allclose(result_hist, expected_hist) + assert_allclose(result_edges_x, expected_edges_x) + assert_allclose(result_edges_y, expected_edges_y) + + # NaN is not counted + expected_hist, expected_edges_x, expected_edges_y = numpy.histogram2d( + all_nan, all_nan, bins=[[0, 1]] * 2 + ) + result_hist, result_edges_x, result_edges_y = dpnp.histogram2d( + iall_nan, iall_nan, bins=[[0, 1]] * 2 + ) + assert_allclose(result_hist, expected_hist) + assert_allclose(result_edges_x, expected_edges_x) + assert_allclose(result_edges_y, expected_edges_y) + + def test_bins_another_sycl_queue(self): + x = y = dpnp.arange(7, 12, sycl_queue=dpctl.SyclQueue()) + bins = dpnp.arange(4, sycl_queue=dpctl.SyclQueue()) + with assert_raises(ValueError): + dpnp.histogram2d(x, y, bins=[bins] * 2) + + def test_sample_array_like(self): + x = y = [0, 1, 2, 3, 4] + with assert_raises(TypeError): + dpnp.histogram2d(x, y) + + def test_weights_array_like(self): + x = y = dpnp.arange(5) + w = [1, 2, 3, 4, 5] + with assert_raises(TypeError): + dpnp.histogram2d(x, y, weights=w) + + def test_weights_another_sycl_queue(self): + x = y = dpnp.arange(5, sycl_queue=dpctl.SyclQueue()) + w = dpnp.arange(7, 12, sycl_queue=dpctl.SyclQueue()) + with assert_raises(ValueError): + dpnp.histogram2d(x, y, weights=w) + + @pytest.mark.parametrize("xp", [numpy, dpnp]) + def test_size_mismatch(self, xp): + # x and y must have same shape + x = xp.linspace(0.0, 1.0, num=10) + y = xp.linspace(0.0, 1.0, num=20) + assert_raises(ValueError, xp.histogram2d, x, y) + + @pytest.mark.parametrize( + "bins_count", + [10, 10**2, 10**3], + ) + def test_different_bins_amount(self, bins_count): + x, y = numpy.linspace( + 0, bins_count, 2 * bins_count, dtype=numpy.float32 + ).reshape(2, bins_count) + ix = dpnp.array(x) + iy = dpnp.array(y) + + expected_hist, expected_edges_x, expected_edges_y = numpy.histogram2d( + x, y, bins=bins_count + ) + result_hist, result_edges_x, result_edges_y = dpnp.histogram2d( + ix, iy, bins=bins_count + ) + assert_array_equal(result_hist, expected_hist) + assert_allclose(result_edges_x, expected_edges_x, rtol=1e-6) + assert_allclose(result_edges_y, expected_edges_y, rtol=1e-6) diff --git a/dpnp/tests/test_sycl_queue.py b/dpnp/tests/test_sycl_queue.py index 9da87c3db86..053cbc333e3 100644 --- a/dpnp/tests/test_sycl_queue.py +++ b/dpnp/tests/test_sycl_queue.py @@ -2655,6 +2655,39 @@ def test_histogram(weights, device): assert_sycl_queue_equal(edges_queue, iv.sycl_queue) +@pytest.mark.parametrize("weights", [None, numpy.arange(7, 12)]) +@pytest.mark.parametrize( + "device", + valid_devices, + ids=[device.filter_string for device in valid_devices], +) +def test_histogram2d(weights, device): + x = numpy.arange(5) + y = numpy.arange(5) + w = weights + + ix = dpnp.array(x, device=device) + iy = dpnp.array(y, device=device) + iw = None if weights is None else dpnp.array(w, sycl_queue=ix.sycl_queue) + + expected_hist, expected_edges_x, expected_edges_y = numpy.histogram2d( + x, y, weights=w + ) + result_hist, result_edges_x, result_edges_y = dpnp.histogram2d( + ix, iy, weights=iw + ) + assert_array_equal(result_hist, expected_hist) + assert_dtype_allclose(result_edges_x, expected_edges_x) + assert_dtype_allclose(result_edges_y, expected_edges_y) + + hist_queue = result_hist.sycl_queue + edges_x_queue = result_edges_x.sycl_queue + edges_y_queue = result_edges_y.sycl_queue + assert_sycl_queue_equal(hist_queue, ix.sycl_queue) + assert_sycl_queue_equal(edges_x_queue, ix.sycl_queue) + assert_sycl_queue_equal(edges_y_queue, ix.sycl_queue) + + @pytest.mark.parametrize("weights", [None, numpy.arange(7, 12)]) @pytest.mark.parametrize( "device", diff --git a/dpnp/tests/test_usm_type.py b/dpnp/tests/test_usm_type.py index e8e97d2ea38..def2bd39471 100644 --- a/dpnp/tests/test_usm_type.py +++ b/dpnp/tests/test_usm_type.py @@ -1619,6 +1619,29 @@ def test_histogram(usm_type_v, usm_type_w): assert edges.usm_type == du.get_coerced_usm_type([usm_type_v, usm_type_w]) +@pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types) +@pytest.mark.parametrize("usm_type_y", list_of_usm_types, ids=list_of_usm_types) +@pytest.mark.parametrize("usm_type_w", list_of_usm_types, ids=list_of_usm_types) +def test_histogram2d(usm_type_x, usm_type_y, usm_type_w): + x = dp.arange(5, usm_type=usm_type_x) + y = dp.arange(5, usm_type=usm_type_y) + w = dp.arange(7, 12, usm_type=usm_type_w) + + hist, edges_x, edges_y = dp.histogram2d(x, y, weights=w) + assert x.usm_type == usm_type_x + assert y.usm_type == usm_type_y + assert w.usm_type == usm_type_w + assert hist.usm_type == du.get_coerced_usm_type( + [usm_type_x, usm_type_y, usm_type_w] + ) + assert edges_x.usm_type == du.get_coerced_usm_type( + [usm_type_x, usm_type_y, usm_type_w] + ) + assert edges_y.usm_type == du.get_coerced_usm_type( + [usm_type_x, usm_type_y, usm_type_w] + ) + + @pytest.mark.parametrize("usm_type_v", list_of_usm_types, ids=list_of_usm_types) @pytest.mark.parametrize("usm_type_w", list_of_usm_types, ids=list_of_usm_types) def test_bincount(usm_type_v, usm_type_w): diff --git a/dpnp/tests/third_party/cupy/statistics_tests/test_histogram.py b/dpnp/tests/third_party/cupy/statistics_tests/test_histogram.py index 46294681b8b..b2bdbdafd14 100644 --- a/dpnp/tests/third_party/cupy/statistics_tests/test_histogram.py +++ b/dpnp/tests/third_party/cupy/statistics_tests/test_histogram.py @@ -548,13 +548,11 @@ def test_histogramdd_invalid_range(self): y, bin_edges = xp.histogramdd(x, range=r) -@pytest.mark.skip("histogram2d() is not implemented yet") -# @pytest.mark.skip(reason="XXX: NP2.0: histogram2d dtype") @testing.parameterize( *testing.product( { "weights": [None, 1, 2], - "weights_dtype": [numpy.int32, numpy.float64], + "weights_dtype": [numpy.int32, numpy.float32], "density": [True, False], "bins": [10, (8, 16), (16, 8), "array_list", "array"], "range": [None, ((20, 50), (10, 100))], @@ -564,7 +562,11 @@ def test_histogramdd_invalid_range(self): class TestHistogram2d: @testing.for_all_dtypes(no_bool=True, no_complex=True) - @testing.numpy_cupy_allclose(atol=1e-2, rtol=1e-7) + @testing.numpy_cupy_allclose( + atol=1e-2, + rtol=1e-7, + type_check=has_support_aspect64() and numpy_version() < "2.0.0", + ) def test_histogram2d(self, xp, dtype): x = testing.shaped_random((100,), xp, dtype, scale=100) y = testing.shaped_random((100,), xp, dtype, scale=100) @@ -590,7 +592,6 @@ def test_histogram2d(self, xp, dtype): return y, edges0, edges1 -@pytest.mark.skip("histogram2d() is not implemented yet") class TestHistogram2dErrors(unittest.TestCase): def test_histogram2d_disallow_arraylike_bins(self): From cbb3acf0d109cbab4597f93b215fee810b81a794 Mon Sep 17 00:00:00 2001 From: Alexander Kalistratov Date: Mon, 20 Jan 2025 18:12:20 +0100 Subject: [PATCH 3/8] Apply suggestions from code review Co-authored-by: Anton <100830759+antonwolfy@users.noreply.github.com> --- dpnp/dpnp_iface_histograms.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/dpnp/dpnp_iface_histograms.py b/dpnp/dpnp_iface_histograms.py index f96a91fb0e9..3b68ad2fc09 100644 --- a/dpnp/dpnp_iface_histograms.py +++ b/dpnp/dpnp_iface_histograms.py @@ -763,12 +763,13 @@ def histogram2d(x, y, bins=10, range=None, density=None, weights=None): Parameters ---------- x : {dpnp.ndarray, usm_ndarray} of shape (N,) - An array containing the x coordinates of the points to be + An array containing the `x` coordinates of the points to be histogrammed. y : {dpnp.ndarray, usm_ndarray} of shape (N,) - An array containing the y coordinates of the points to be + An array containing the `y` coordinates of the points to be histogrammed. - bins : {int, list of dpnp.ndarray or usm_ndarray, sequence of scalars}, optional + bins : {int, list of dpnp.ndarray or usm_ndarray, sequence of scalars}, \ + optional Histogram bins. The bins specification: @@ -788,19 +789,19 @@ def histogram2d(x, y, bins=10, range=None, density=None, weights=None): (if not specified explicitly in the `bins` parameters): ``[[xmin, xmax], [ymin, ymax]]``. All values outside of this range will be considered outliers and not tallied in the histogram. - density : bool, optional + density : {None, bool}, optional If ``False``, the default, returns the number of samples in each bin. If ``True``, returns the probability *density* function at the bin, ``bin_count / sample_count / bin_area``. - weights : {dpnp.ndarray, usm_ndarray} of shape(N,), optional + weights : {dpnp.ndarray, usm_ndarray} of shape (N,), optional An array of values ``w_i`` weighing each sample ``(x_i, y_i)``. - Weights are normalized to 1 if `density` is True. If `density` is - False, the values of the returned histogram are equal to the sum of + Weights are normalized to ``1`` if `density` is ``True``. If `density` is + ``False``, the values of the returned histogram are equal to the sum of the weights belonging to the samples falling into each bin. Returns ------- - H : ndarray, shape(nx, ny) + H : dpnp.ndarray of shape (nx, ny) The bi-dimensional histogram of samples `x` and `y`. Values in `x` are histogrammed along the first dimension and values in `y` are histogrammed along the second dimension. @@ -816,15 +817,15 @@ def histogram2d(x, y, bins=10, range=None, density=None, weights=None): Notes ----- - When `density` is True, then the returned histogram is the sample + When `density` is ``True``, then the returned histogram is the sample density, defined such that the sum over bins of the product ``bin_value * bin_area`` is 1. Please note that the histogram does not follow the Cartesian convention where `x` values are on the abscissa and `y` values on the ordinate - axis. Rather, `x` is histogrammed along the first dimension of the + axis. Rather, `x` is histogrammed along the first dimension of the array (vertical), and `y` along the second dimension of the array - (horizontal). This ensures compatibility with `histogramdd`. + (horizontal). This ensures compatibility with `histogramdd`. Examples -------- From 51da9b91f9f5f5d18531f121ccffd9dab439fabc Mon Sep 17 00:00:00 2001 From: Alexander Kalistratov Date: Mon, 20 Jan 2025 21:28:38 +0100 Subject: [PATCH 4/8] Fix docstring --- dpnp/dpnp_iface_histograms.py | 48 ++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 21 deletions(-) diff --git a/dpnp/dpnp_iface_histograms.py b/dpnp/dpnp_iface_histograms.py index 3b68ad2fc09..4c8a253093c 100644 --- a/dpnp/dpnp_iface_histograms.py +++ b/dpnp/dpnp_iface_histograms.py @@ -756,7 +756,6 @@ def histogram_bin_edges(a, bins=10, range=None, weights=None): def histogram2d(x, y, bins=10, range=None, density=None, weights=None): - # pylint: disable=line-too-long """ Compute the bi-dimensional histogram of two data samples. @@ -768,9 +767,8 @@ def histogram2d(x, y, bins=10, range=None, density=None, weights=None): y : {dpnp.ndarray, usm_ndarray} of shape (N,) An array containing the `y` coordinates of the points to be histogrammed. - bins : {int, list of dpnp.ndarray or usm_ndarray, sequence of scalars}, \ - optional - Histogram bins. + bins : {int, dpnp.ndarray, usm_ndarray, [int, int], [array, array], \ + [int, array], [array, int]}, optional The bins specification: @@ -784,30 +782,38 @@ def histogram2d(x, y, bins=10, range=None, density=None, weights=None): * A combination [int, array] or [array, int], where int is the number of bins and array is the bin edges. - range : {dpnp.ndarray, usm_ndarray} of shape (2,2), optional + Default: ``None`` + range : {None, dpnp.ndarray, usm_ndarray} of shape (2,2), optional The leftmost and rightmost edges of the bins along each dimension (if not specified explicitly in the `bins` parameters): ``[[xmin, xmax], [ymin, ymax]]``. All values outside of this range will be considered outliers and not tallied in the histogram. + + Default: ``None`` density : {None, bool}, optional - If ``False``, the default, returns the number of samples in each bin. + If ``False`` or ``None``, the default, returns the number of + samples in each bin. If ``True``, returns the probability *density* function at the bin, - ``bin_count / sample_count / bin_area``. - weights : {dpnp.ndarray, usm_ndarray} of shape (N,), optional + ``bin_count / sample_count / bin_volume``. + + Default: ``None`` + weights : {None, dpnp.ndarray, usm_ndarray} of shape (N,), optional An array of values ``w_i`` weighing each sample ``(x_i, y_i)``. - Weights are normalized to ``1`` if `density` is ``True``. If `density` is - ``False``, the values of the returned histogram are equal to the sum of - the weights belonging to the samples falling into each bin. + Weights are normalized to ``1`` if `density` is ``True``. + If `density` is ``False``, the values of the returned histogram + are equal to the sum of the weights belonging to the samples + falling into each bin. + Default: ``None`` Returns ------- H : dpnp.ndarray of shape (nx, ny) The bi-dimensional histogram of samples `x` and `y`. Values in `x` are histogrammed along the first dimension and values in `y` are histogrammed along the second dimension. - xedges : dpnp.ndarray, shape(nx+1,) + xedges : dpnp.ndarray of shape (nx+1,) The bin edges along the first dimension. - yedges : dpnp.ndarray, shape(ny+1,) + yedges : dpnp.ndarray of shape (ny+1,) The bin edges along the second dimension. See Also @@ -843,7 +849,6 @@ def histogram2d(x, y, bins=10, range=None, density=None, weights=None): >>> edges_y [-1.1889046 -0.07263839 1.0436279 2.159894 ] """ - # pylint: enable=line-too-long dpnp.check_supported_arrays_type(x, y) if weights is not None: @@ -1066,7 +1071,7 @@ def _histdd_extract_arrays(sample, weights, bins): return all_arrays -def histogramdd(sample, bins=10, range=None, density=False, weights=None): +def histogramdd(sample, bins=10, range=None, density=None, weights=None): """ Compute the multidimensional histogram of some data. @@ -1093,6 +1098,13 @@ def histogramdd(sample, bins=10, range=None, density=False, weights=None): values being used for the corresponding dimension. None is equivalent to passing a tuple of D None values. + Default: ``None`` + density : {None, bool}, optional + If ``False`` or ``None``, the default, returns the number of + samples in each bin. + If ``True``, returns the probability *density* function at the bin, + ``bin_count / sample_count / bin_volume``. + Default: ``None`` weights : {dpnp.ndarray, usm_ndarray}, optional An (N,)-shaped array of values `w_i` weighing each sample @@ -1102,12 +1114,6 @@ def histogramdd(sample, bins=10, range=None, density=False, weights=None): weights belonging to the samples falling into each bin. Default: ``None`` - density : bool, optional - If ``False``, the default, returns the number of samples in each bin. - If ``True``, returns the probability *density* function at the bin, - ``bin_count / sample_count / bin_volume``. - - Default: ``False`` Returns ------- From dbc2579f47e072894486e2a61b0a15bbef76057a Mon Sep 17 00:00:00 2001 From: Alexander Kalistratov Date: Mon, 20 Jan 2025 23:32:38 +0100 Subject: [PATCH 5/8] Fix bins default value in docs --- dpnp/dpnp_iface_histograms.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpnp/dpnp_iface_histograms.py b/dpnp/dpnp_iface_histograms.py index 4c8a253093c..e7fc6c16414 100644 --- a/dpnp/dpnp_iface_histograms.py +++ b/dpnp/dpnp_iface_histograms.py @@ -782,7 +782,7 @@ def histogram2d(x, y, bins=10, range=None, density=None, weights=None): * A combination [int, array] or [array, int], where int is the number of bins and array is the bin edges. - Default: ``None`` + Default: ``10`` range : {None, dpnp.ndarray, usm_ndarray} of shape (2,2), optional The leftmost and rightmost edges of the bins along each dimension (if not specified explicitly in the `bins` parameters): From 9fb1e59f4a9dffd32b6e6d2371340d9f04b35a25 Mon Sep 17 00:00:00 2001 From: Alexander Kalistratov Date: Wed, 22 Jan 2025 15:37:08 +0100 Subject: [PATCH 6/8] Apply review comments --- dpnp/dpnp_iface_histograms.py | 106 +++++++++++++-------------- dpnp/dpnp_utils/dpnp_utils_common.py | 18 ++++- dpnp/tests/test_histogram.py | 2 +- 3 files changed, 71 insertions(+), 55 deletions(-) diff --git a/dpnp/dpnp_iface_histograms.py b/dpnp/dpnp_iface_histograms.py index e7fc6c16414..a22cb52d446 100644 --- a/dpnp/dpnp_iface_histograms.py +++ b/dpnp/dpnp_iface_histograms.py @@ -42,15 +42,18 @@ import dpctl.utils as dpu import numpy -from dpctl.tensor._type_utils import _can_cast import dpnp # pylint: disable=no-name-in-module import dpnp.backend.extensions.statistics._statistics_impl as statistics_ext +from dpnp.dpnp_utils.dpnp_utils_common import ( + result_type_for_device, + to_supported_dtypes, +) # pylint: disable=no-name-in-module -from .dpnp_utils import get_usm_allocations, map_dtype_to_device +from .dpnp_utils import get_usm_allocations __all__ = [ "bincount", @@ -66,33 +69,15 @@ _range = range -def _result_type_for_device(dtypes, device): - rt = dpnp.result_type(*dtypes) - return map_dtype_to_device(rt, device) - - def _align_dtypes(a_dtype, bins_dtype, ntype, supported_types, device): - has_fp64 = device.has_aspect_fp64 - has_fp16 = device.has_aspect_fp16 - - a_bin_dtype = _result_type_for_device([a_dtype, bins_dtype], device) + a_bin_dtype = result_type_for_device([a_dtype, bins_dtype], device) # histogram implementation doesn't support uint64 as histogram type # we can use int64 instead. Result would be correct even in case of overflow if ntype == numpy.uint64: ntype = dpnp.int64 - if (a_bin_dtype, ntype) in supported_types: - return a_bin_dtype, ntype - - for sample_type, hist_type in supported_types: - if _can_cast( - a_bin_dtype, sample_type, has_fp16, has_fp64 - ) and _can_cast(ntype, hist_type, has_fp16, has_fp64): - return sample_type, hist_type - - # should not happen - return None, None + return to_supported_dtypes([a_bin_dtype, ntype], supported_types, device) def _ravel_check_a_and_weights(a, weights): @@ -524,6 +509,7 @@ def histogram(a, bins=10, range=None, density=None, weights=None): If `bins` is a sequence, it defines a monotonically increasing array of bin edges, including the rightmost edge, allowing for non-uniform bin widths. + Default: ``10``. range : {None, 2-tuple of float}, optional The lower and upper range of the bins. If not provided, range is simply @@ -532,6 +518,7 @@ def histogram(a, bins=10, range=None, density=None, weights=None): affects the automatic bin computation as well. While bin width is computed to be optimal based on the actual data within `range`, the bin count will fill the entire range including portions containing no data. + Default: ``None``. density : {None, bool}, optional If ``False`` or ``None``, the result will contain the number of samples @@ -540,6 +527,7 @@ def histogram(a, bins=10, range=None, density=None, weights=None): the range is ``1``. Note that the sum of the histogram values will not be equal to ``1`` unless bins of unity width are chosen; it is not a probability *mass* function. + Default: ``None``. weights : {None, dpnp.ndarray, usm_ndarray}, optional An array of weights, of the same shape as `a`. Each value in `a` only @@ -549,6 +537,7 @@ def histogram(a, bins=10, range=None, density=None, weights=None): Please note that the ``dtype`` of `weights` will also become the ``dtype`` of the returned accumulator (`hist`), so it must be large enough to hold accumulated values as well. + Default: ``None``. Returns @@ -782,29 +771,31 @@ def histogram2d(x, y, bins=10, range=None, density=None, weights=None): * A combination [int, array] or [array, int], where int is the number of bins and array is the bin edges. - Default: ``10`` + Default: ``10``. range : {None, dpnp.ndarray, usm_ndarray} of shape (2,2), optional The leftmost and rightmost edges of the bins along each dimension - (if not specified explicitly in the `bins` parameters): - ``[[xmin, xmax], [ymin, ymax]]``. All values outside of this range - will be considered outliers and not tallied in the histogram. + If ``None`` the ranges are + ``[[x.min(), x.max()], [y.min(), y.max()]]``. All values outside + of this range will be considered outliers and not tallied in the + histogram. - Default: ``None`` + Default: ``None``. density : {None, bool}, optional If ``False`` or ``None``, the default, returns the number of samples in each bin. If ``True``, returns the probability *density* function at the bin, ``bin_count / sample_count / bin_volume``. - Default: ``None`` + Default: ``None``. weights : {None, dpnp.ndarray, usm_ndarray} of shape (N,), optional An array of values ``w_i`` weighing each sample ``(x_i, y_i)``. Weights are normalized to ``1`` if `density` is ``True``. If `density` is ``False``, the values of the returned histogram are equal to the sum of the weights belonging to the samples falling into each bin. + If ``None`` all samples are assigned a weight of ``1``. - Default: ``None`` + Default: ``None``. Returns ------- H : dpnp.ndarray of shape (nx, ny) @@ -836,18 +827,27 @@ def histogram2d(x, y, bins=10, range=None, density=None, weights=None): Examples -------- >>> import dpnp as np - >>> x = np.random.randn(20) - >>> y = np.random.randn(20) + >>> x = np.random.randn(20).astype("float32") + >>> y = np.random.randn(20).astype("float32") >>> hist, edges_x, edges_y = np.histogram2d(x, y, bins=(4, 3)) + >>> hist.shape + (4, 3) >>> hist - [[1. 0. 0.] - [0. 0. 0.] - [5. 6. 4.] - [1. 2. 1.]] + array([[1., 2., 0.], + [0., 3., 1.], + [1., 4., 1.], + [1., 3., 3.]], dtype=float32) + >>> edges_x.shape + (5,) >>> edges_x - [-5.6575713 -3.5574734 -1.4573755 0.6427226 2.74282 ] + array([-1.7516936 , -0.96109843, -0.17050326, 0.62009203, 1.4106871 ], + dtype=float32) + >>> edges_y.shape + (4,) >>> edges_y - [-1.1889046 -0.07263839 1.0436279 2.159894 ] + array([-2.6604428 , -0.94615364, 0.76813555, 2.4824247 ], dtype=float32) + + Please note, that resulting values of histogram and edges would be different """ dpnp.check_supported_arrays_type(x, y) @@ -869,7 +869,7 @@ def histogram2d(x, y, bins=10, range=None, density=None, weights=None): usm_type, exec_q = get_usm_allocations([x, y, bins, range, weights]) device = exec_q.sycl_device - sample_dtype = _result_type_for_device([x.dtype, y.dtype], device) + sample_dtype = result_type_for_device([x.dtype, y.dtype], device) # Unlike histogramdd histogram2d accepts 1d bins and # apply it to both dimensions @@ -883,7 +883,7 @@ def histogram2d(x, y, bins=10, range=None, density=None, weights=None): bins_dtypes = [sample_dtype] bins_dtypes += [b.dtype for b in bins if hasattr(b, "dtype")] - bins_dtype = _result_type_for_device(bins_dtypes, device) + bins_dtype = result_type_for_device(bins_dtypes, device) hist_dtype = _histdd_hist_dtype(exec_q, weights) supported_types = statistics_ext.histogramdd_dtypes() @@ -1026,9 +1026,7 @@ def _histdd_hist_dtype(queue, weights): # hist_dtype is either float or complex, so it is ok # to calculate it as result type between default_float and # weights.dtype - hist_dtype = _result_type_for_device( - [hist_dtype, weights.dtype], device - ) + hist_dtype = result_type_for_device([hist_dtype, weights.dtype], device) return hist_dtype @@ -1039,7 +1037,7 @@ def _histdd_sample_dtype(queue, sample, bin_edges_list): dtypes_ = [bin_edges.dtype for bin_edges in bin_edges_list] dtypes_.append(sample.dtype) - return _result_type_for_device(dtypes_, device) + return result_type_for_device(dtypes_, device) def _histdd_supported_dtypes(sample, bin_edges_list, weights): @@ -1089,31 +1087,33 @@ def histogramdd(sample, bins=10, range=None, density=None, weights=None): * The number of bins for each dimension (nx, ny, ... =bins) * The number of bins for all dimensions (nx=ny=...=bins). - Default: ``10`` + Default: ``10``. range : {None, sequence}, optional A sequence of length D, each an optional (lower, upper) tuple giving the outer bin edges to be used if the edges are not given explicitly in `bins`. - An entry of None in the sequence results in the minimum and maximum + An entry of ``None`` in the sequence results in the minimum and maximum values being used for the corresponding dimension. - None is equivalent to passing a tuple of D None values. + ``None`` is equivalent to passing a tuple of D ``None`` values. - Default: ``None`` + Default: ``None``. density : {None, bool}, optional If ``False`` or ``None``, the default, returns the number of samples in each bin. If ``True``, returns the probability *density* function at the bin, ``bin_count / sample_count / bin_volume``. - Default: ``None`` - weights : {dpnp.ndarray, usm_ndarray}, optional + Default: ``None``. + weights : {None, dpnp.ndarray, usm_ndarray}, optional An (N,)-shaped array of values `w_i` weighing each sample `(x_i, y_i, z_i, ...)`. - Weights are normalized to 1 if density is True. If density is False, - the values of the returned histogram are equal to the sum of the - weights belonging to the samples falling into each bin. + Weights are normalized to ``1`` if density is ``True``. + If density is ``False``, the values of the returned histogram + are equal to the sum of the weights belonging to the samples + falling into each bin. + If ``None`` all samples are assigned a weight of ``1``. - Default: ``None`` + Default: ``None``. Returns ------- diff --git a/dpnp/dpnp_utils/dpnp_utils_common.py b/dpnp/dpnp_utils/dpnp_utils_common.py index 3cc2ebf4d31..97b5568f1d5 100644 --- a/dpnp/dpnp_utils/dpnp_utils_common.py +++ b/dpnp/dpnp_utils/dpnp_utils_common.py @@ -54,6 +54,19 @@ def to_supported_dtypes(dtypes, supported_types, device): def is_castable(dtype, stype): return _can_cast(dtype, stype, has_fp16, has_fp64) + if not isinstance(supported_types, Iterable): + supported_types = (supported_types,) + + if isinstance(dtypes, Iterable): + sdtypes_elem = supported_types[0] + if not isinstance(sdtypes_elem, Iterable): + raise ValueError( + "Input and supported types must have the same length" + ) + + typ = type(sdtypes_elem) + dtypes = typ(dtypes) + if dtypes in supported_types: return dtypes @@ -78,4 +91,7 @@ def is_castable(dtype, stype): ): return stypes - return None + if not isinstance(dtypes, Iterable): + return None + + return (None,) * len(dtypes) diff --git a/dpnp/tests/test_histogram.py b/dpnp/tests/test_histogram.py index 0c698dff129..6d9f53ed920 100644 --- a/dpnp/tests/test_histogram.py +++ b/dpnp/tests/test_histogram.py @@ -930,7 +930,7 @@ def test_2d(self): def test_finite_range(self, xp): x = y = xp.linspace(0.0, 1.0, num=100) - # normal ranges should be fine + # normal ranges should be finite _, _, _ = xp.histogram2d(x, y, range=[[0.25, 0.75]] * 2) assert_raises( ValueError, xp.histogram2d, x, y, range=[[xp.nan, 0.75]] * 2 From 2b97648c8a1cedb024bd14dc7602e11c92ba2fec Mon Sep 17 00:00:00 2001 From: Alexander Kalistratov Date: Thu, 23 Jan 2025 05:22:03 +0100 Subject: [PATCH 7/8] Update dpnp/dpnp_iface_histograms.py Co-authored-by: Anton <100830759+antonwolfy@users.noreply.github.com> --- dpnp/dpnp_iface_histograms.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dpnp/dpnp_iface_histograms.py b/dpnp/dpnp_iface_histograms.py index a22cb52d446..d30434b3d8e 100644 --- a/dpnp/dpnp_iface_histograms.py +++ b/dpnp/dpnp_iface_histograms.py @@ -847,7 +847,8 @@ def histogram2d(x, y, bins=10, range=None, density=None, weights=None): >>> edges_y array([-2.6604428 , -0.94615364, 0.76813555, 2.4824247 ], dtype=float32) - Please note, that resulting values of histogram and edges would be different + Please note, that resulting values of histogram and edges may vary. + """ dpnp.check_supported_arrays_type(x, y) From 1006a943fe51cd776c65555fc5f7fb41fd8c1b52 Mon Sep 17 00:00:00 2001 From: Alexander Kalistratov Date: Thu, 23 Jan 2025 17:47:02 +0100 Subject: [PATCH 8/8] add no cover --- dpnp/dpnp_utils/dpnp_utils_common.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/dpnp/dpnp_utils/dpnp_utils_common.py b/dpnp/dpnp_utils/dpnp_utils_common.py index b6048258e4c..ea8de9793f0 100644 --- a/dpnp/dpnp_utils/dpnp_utils_common.py +++ b/dpnp/dpnp_utils/dpnp_utils_common.py @@ -55,12 +55,12 @@ def is_castable(dtype, stype): return _can_cast(dtype, stype, has_fp16, has_fp64) if not isinstance(supported_types, Iterable): - supported_types = (supported_types,) + supported_types = (supported_types,) # pragma: no cover if isinstance(dtypes, Iterable): sdtypes_elem = supported_types[0] if not isinstance(sdtypes_elem, Iterable): - raise ValueError( + raise ValueError( # pragma: no cover "Input and supported types must have the same length" ) @@ -72,7 +72,7 @@ def is_castable(dtype, stype): for stypes in supported_types: if not isinstance(dtypes, Iterable): - if isinstance(stypes, Iterable): + if isinstance(stypes, Iterable): # pragma: no cover raise ValueError( "Input and supported types must have the same length" ) @@ -80,7 +80,9 @@ def is_castable(dtype, stype): if is_castable(dtypes, stypes): return stypes else: - if not isinstance(stypes, Iterable) or len(dtypes) != len(stypes): + if not isinstance(stypes, Iterable) or len(dtypes) != len( + stypes + ): # pragma: no cover raise ValueError( "Input and supported types must have the same length" ) @@ -91,7 +93,7 @@ def is_castable(dtype, stype): ): return stypes - if not isinstance(dtypes, Iterable): # pragma: no cover - return None # pragma: no cover + if not isinstance(dtypes, Iterable): # pragma: no cover + return None # pragma: no cover - return (None,) * len(dtypes) # pragma: no cover + return (None,) * len(dtypes) # pragma: no cover