Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Histogram2d #2262

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
170 changes: 162 additions & 8 deletions dpnp/dpnp_iface_histograms.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
"digitize",
"histogram",
"histogram_bin_edges",
"histogram2d",
"histogramdd",
]

Expand Down Expand Up @@ -138,6 +139,9 @@ def _is_finite(a):
return numpy.isfinite(a)

if range is not None:
if len(range) != 2:
raise ValueError("range argument must consist of 2 elements.")

first_edge, last_edge = range
if first_edge > last_edge:
raise ValueError("max must be larger than min in range parameter.")
Expand Down Expand Up @@ -751,6 +755,155 @@ def histogram_bin_edges(a, bins=10, range=None, weights=None):
return bin_edges


def histogram2d(x, y, bins=10, range=None, density=None, weights=None):
"""
Compute the bi-dimensional histogram of two data samples.

Parameters
----------
x : {dpnp.ndarray, usm_ndarray} of shape (N,)
An array containing the `x` coordinates of the points to be
histogrammed.
y : {dpnp.ndarray, usm_ndarray} of shape (N,)
An array containing the `y` coordinates of the points to be
histogrammed.
bins : {int, dpnp.ndarray, usm_ndarray, [int, int], [array, array], \
[int, array], [array, int]}, optional

The bins specification:

* If int, the number of bins for the two dimensions (nx=ny=bins).
* If array, the bin edges for the two dimensions
(x_edges=y_edges=bins).
* If [int, int], the number of bins in each dimension
(nx, ny = bins).
* If [array, array], the bin edges in each dimension
(x_edges, y_edges = bins).
* A combination [int, array] or [array, int], where int
is the number of bins and array is the bin edges.

Default: ``10``
antonwolfy marked this conversation as resolved.
Show resolved Hide resolved
range : {None, dpnp.ndarray, usm_ndarray} of shape (2,2), optional
The leftmost and rightmost edges of the bins along each dimension
(if not specified explicitly in the `bins` parameters):
``[[xmin, xmax], [ymin, ymax]]``. All values outside of this range
will be considered outliers and not tallied in the histogram.

Default: ``None``
density : {None, bool}, optional
If ``False`` or ``None``, the default, returns the number of
samples in each bin.
If ``True``, returns the probability *density* function at the bin,
``bin_count / sample_count / bin_volume``.

Default: ``None``
weights : {None, dpnp.ndarray, usm_ndarray} of shape (N,), optional
An array of values ``w_i`` weighing each sample ``(x_i, y_i)``.
Weights are normalized to ``1`` if `density` is ``True``.
If `density` is ``False``, the values of the returned histogram
antonwolfy marked this conversation as resolved.
Show resolved Hide resolved
are equal to the sum of the weights belonging to the samples
falling into each bin.

Default: ``None``
Returns
-------
H : dpnp.ndarray of shape (nx, ny)
The bi-dimensional histogram of samples `x` and `y`. Values in `x`
are histogrammed along the first dimension and values in `y` are
histogrammed along the second dimension.
xedges : dpnp.ndarray of shape (nx+1,)
The bin edges along the first dimension.
yedges : dpnp.ndarray of shape (ny+1,)
The bin edges along the second dimension.

See Also
--------
:obj:`dpnp.histogram` : 1D histogram
:obj:`dpnp.histogramdd` : Multidimensional histogram

Notes
-----
When `density` is ``True``, then the returned histogram is the sample
density, defined such that the sum over bins of the product
``bin_value * bin_area`` is 1.

Please note that the histogram does not follow the Cartesian convention
where `x` values are on the abscissa and `y` values on the ordinate
axis. Rather, `x` is histogrammed along the first dimension of the
array (vertical), and `y` along the second dimension of the array
(horizontal). This ensures compatibility with `histogramdd`.

Examples
--------
>>> import dpnp as np
>>> x = np.random.randn(20)
>>> y = np.random.randn(20)
>>> hist, edges_x, edges_y = np.histogram2d(x, y, bins=(4, 3))
>>> hist
[[1. 0. 0.]
antonwolfy marked this conversation as resolved.
Show resolved Hide resolved
[0. 0. 0.]
[5. 6. 4.]
[1. 2. 1.]]
>>> edges_x
[-5.6575713 -3.5574734 -1.4573755 0.6427226 2.74282 ]
>>> edges_y
[-1.1889046 -0.07263839 1.0436279 2.159894 ]
"""

dpnp.check_supported_arrays_type(x, y)
if weights is not None:
dpnp.check_supported_arrays_type(weights)

if x.ndim != 1 or y.ndim != 1:
raise ValueError(
f"x and y must be 1-dimensional arrays."
f"Got {x.ndim} and {y.ndim} respectively"
)

if len(x) != len(y):
raise ValueError(
f"x and y must have the same length."
f"Got {len(x)} and {len(y)} respectively"
)

usm_type, exec_q = get_usm_allocations([x, y, bins, range, weights])
device = exec_q.sycl_device

sample_dtype = _result_type_for_device([x.dtype, y.dtype], device)

# Unlike histogramdd histogram2d accepts 1d bins and
# apply it to both dimensions
# at the same moment two elements bins should be interpreted as
# number of bins in each dimension and array-like bins with one element
# is not allowed
if isinstance(bins, Iterable) and len(bins) > 2:
bins = [bins] * 2

bins = _histdd_normalize_bins(bins, 2)
bins_dtypes = [sample_dtype]
bins_dtypes += [b.dtype for b in bins if hasattr(b, "dtype")]

bins_dtype = _result_type_for_device(bins_dtypes, device)
hist_dtype = _histdd_hist_dtype(exec_q, weights)

supported_types = statistics_ext.histogramdd_dtypes()

sample_dtype, _ = _align_dtypes(
sample_dtype, bins_dtype, hist_dtype, supported_types, device
)

sample = dpnp.empty_like(
x, shape=x.shape + (2,), dtype=sample_dtype, usm_type=usm_type
)
sample[:, 0] = x
sample[:, 1] = y

hist, edges = histogramdd(
sample, bins=bins, range=range, density=density, weights=weights
)
return hist, edges[0], edges[1]


def _histdd_validate_bins(bins):
for i, b in enumerate(bins):
if numpy.ndim(b) == 0:
Expand Down Expand Up @@ -918,7 +1071,7 @@ def _histdd_extract_arrays(sample, weights, bins):
return all_arrays


def histogramdd(sample, bins=10, range=None, weights=None, density=False):
def histogramdd(sample, bins=10, range=None, density=None, weights=None):
"""
Compute the multidimensional histogram of some data.

Expand All @@ -945,6 +1098,13 @@ def histogramdd(sample, bins=10, range=None, weights=None, density=False):
values being used for the corresponding dimension.
None is equivalent to passing a tuple of D None values.

Default: ``None``
density : {None, bool}, optional
If ``False`` or ``None``, the default, returns the number of
samples in each bin.
If ``True``, returns the probability *density* function at the bin,
``bin_count / sample_count / bin_volume``.

Default: ``None``
weights : {dpnp.ndarray, usm_ndarray}, optional
antonwolfy marked this conversation as resolved.
Show resolved Hide resolved
An (N,)-shaped array of values `w_i` weighing each sample
Expand All @@ -954,12 +1114,6 @@ def histogramdd(sample, bins=10, range=None, weights=None, density=False):
weights belonging to the samples falling into each bin.

Default: ``None``
density : bool, optional
If ``False``, the default, returns the number of samples in each bin.
If ``True``, returns the probability *density* function at the bin,
``bin_count / sample_count / bin_volume``.

Default: ``False``

Returns
-------
Expand Down Expand Up @@ -993,7 +1147,7 @@ def histogramdd(sample, bins=10, range=None, weights=None, density=False):
elif sample.ndim > 2:
raise ValueError("sample must have no more than 2 dimensions")

ndim = sample.shape[1] if sample.size > 0 else 1
ndim = sample.shape[1]

_arrays = _histdd_extract_arrays(sample, weights, bins)
usm_type, queue = get_usm_allocations(_arrays)
Expand Down
Loading
Loading