Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Histogram2d #2262

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
151 changes: 149 additions & 2 deletions dpnp/dpnp_iface_histograms.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
"digitize",
"histogram",
"histogram_bin_edges",
"histogram2d",
"histogramdd",
]

Expand Down Expand Up @@ -138,6 +139,9 @@ def _is_finite(a):
return numpy.isfinite(a)

if range is not None:
if len(range) != 2:
raise ValueError("range argument must consist of 2 elements.")

first_edge, last_edge = range
if first_edge > last_edge:
raise ValueError("max must be larger than min in range parameter.")
Expand Down Expand Up @@ -751,6 +755,149 @@ def histogram_bin_edges(a, bins=10, range=None, weights=None):
return bin_edges


def histogram2d(x, y, bins=10, range=None, density=None, weights=None):
# pylint: disable=line-too-long
"""
Compute the bi-dimensional histogram of two data samples.
Parameters
----------
x : {dpnp.ndarray, usm_ndarray} of shape (N,)
An array containing the x coordinates of the points to be
AlexanderKalistratov marked this conversation as resolved.
Show resolved Hide resolved
histogrammed.
y : {dpnp.ndarray, usm_ndarray} of shape (N,)
An array containing the y coordinates of the points to be
AlexanderKalistratov marked this conversation as resolved.
Show resolved Hide resolved
histogrammed.
bins : {int, list of dpnp.ndarray or usm_ndarray, sequence of scalars}, optional
AlexanderKalistratov marked this conversation as resolved.
Show resolved Hide resolved
AlexanderKalistratov marked this conversation as resolved.
Show resolved Hide resolved
Histogram bins.
The bins specification:
* If int, the number of bins for the two dimensions (nx=ny=bins).
* If array, the bin edges for the two dimensions
(x_edges=y_edges=bins).
* If [int, int], the number of bins in each dimension
(nx, ny = bins).
* If [array, array], the bin edges in each dimension
(x_edges, y_edges = bins).
* A combination [int, array] or [array, int], where int
is the number of bins and array is the bin edges.
range : {dpnp.ndarray, usm_ndarray} of shape (2,2), optional
The leftmost and rightmost edges of the bins along each dimension
(if not specified explicitly in the `bins` parameters):
``[[xmin, xmax], [ymin, ymax]]``. All values outside of this range
will be considered outliers and not tallied in the histogram.
density : bool, optional
AlexanderKalistratov marked this conversation as resolved.
Show resolved Hide resolved
If ``False``, the default, returns the number of samples in each bin.
If ``True``, returns the probability *density* function at the bin,
``bin_count / sample_count / bin_area``.
AlexanderKalistratov marked this conversation as resolved.
Show resolved Hide resolved
weights : {dpnp.ndarray, usm_ndarray} of shape(N,), optional
AlexanderKalistratov marked this conversation as resolved.
Show resolved Hide resolved
An array of values ``w_i`` weighing each sample ``(x_i, y_i)``.
Weights are normalized to 1 if `density` is True. If `density` is
False, the values of the returned histogram are equal to the sum of
AlexanderKalistratov marked this conversation as resolved.
Show resolved Hide resolved
the weights belonging to the samples falling into each bin.
Returns
-------
H : ndarray, shape(nx, ny)
AlexanderKalistratov marked this conversation as resolved.
Show resolved Hide resolved
The bi-dimensional histogram of samples `x` and `y`. Values in `x`
are histogrammed along the first dimension and values in `y` are
histogrammed along the second dimension.
xedges : dpnp.ndarray, shape(nx+1,)
The bin edges along the first dimension.
yedges : dpnp.ndarray, shape(ny+1,)
The bin edges along the second dimension.
See Also
--------
:obj:`dpnp.histogram` : 1D histogram
:obj:`dpnp.histogramdd` : Multidimensional histogram
Notes
-----
When `density` is True, then the returned histogram is the sample
AlexanderKalistratov marked this conversation as resolved.
Show resolved Hide resolved
density, defined such that the sum over bins of the product
``bin_value * bin_area`` is 1.
Please note that the histogram does not follow the Cartesian convention
where `x` values are on the abscissa and `y` values on the ordinate
axis. Rather, `x` is histogrammed along the first dimension of the
AlexanderKalistratov marked this conversation as resolved.
Show resolved Hide resolved
array (vertical), and `y` along the second dimension of the array
(horizontal). This ensures compatibility with `histogramdd`.
AlexanderKalistratov marked this conversation as resolved.
Show resolved Hide resolved
Examples
--------
>>> import dpnp as np
>>> x = np.random.randn(20)
>>> y = np.random.randn(20)
>>> hist, edges_x, edges_y = np.histogram2d(x, y, bins=(4, 3))
>>> hist
[[1. 0. 0.]
antonwolfy marked this conversation as resolved.
Show resolved Hide resolved
[0. 0. 0.]
[5. 6. 4.]
[1. 2. 1.]]
>>> edges_x
[-5.6575713 -3.5574734 -1.4573755 0.6427226 2.74282 ]
>>> edges_y
[-1.1889046 -0.07263839 1.0436279 2.159894 ]
"""
# pylint: enable=line-too-long

dpnp.check_supported_arrays_type(x, y)
if weights is not None:
dpnp.check_supported_arrays_type(weights)

if x.ndim != 1 or y.ndim != 1:
raise ValueError(
f"x and y must be 1-dimensional arrays."
f"Got {x.ndim} and {y.ndim} respectively"
)

if len(x) != len(y):
raise ValueError(
f"x and y must have the same length."
f"Got {len(x)} and {len(y)} respectively"
)

usm_type, exec_q = get_usm_allocations([x, y, bins, range, weights])
device = exec_q.sycl_device

sample_dtype = _result_type_for_device([x.dtype, y.dtype], device)

# Unlike histogramdd histogram2d accepts 1d bins and
# apply it to both dimensions
# at the same moment two elements bins should be interpreted as
# number of bins in each dimension and array-like bins with one element
# is not allowed
if isinstance(bins, Iterable) and len(bins) > 2:
bins = [bins] * 2

bins = _histdd_normalize_bins(bins, 2)
bins_dtypes = [sample_dtype]
bins_dtypes += [b.dtype for b in bins if hasattr(b, "dtype")]

bins_dtype = _result_type_for_device(bins_dtypes, device)
hist_dtype = _histdd_hist_dtype(exec_q, weights)

supported_types = statistics_ext.histogramdd_dtypes()

sample_dtype, _ = _align_dtypes(
sample_dtype, bins_dtype, hist_dtype, supported_types, device
)

sample = dpnp.empty_like(
x, shape=x.shape + (2,), dtype=sample_dtype, usm_type=usm_type
)
sample[:, 0] = x
sample[:, 1] = y

hist, edges = histogramdd(
sample, bins=bins, range=range, density=density, weights=weights
)
return hist, edges[0], edges[1]


def _histdd_validate_bins(bins):
for i, b in enumerate(bins):
if numpy.ndim(b) == 0:
Expand Down Expand Up @@ -918,7 +1065,7 @@ def _histdd_extract_arrays(sample, weights, bins):
return all_arrays


def histogramdd(sample, bins=10, range=None, weights=None, density=False):
def histogramdd(sample, bins=10, range=None, density=False, weights=None):
"""
Compute the multidimensional histogram of some data.
Expand Down Expand Up @@ -993,7 +1140,7 @@ def histogramdd(sample, bins=10, range=None, weights=None, density=False):
elif sample.ndim > 2:
raise ValueError("sample must have no more than 2 dimensions")

ndim = sample.shape[1] if sample.size > 0 else 1
ndim = sample.shape[1]

_arrays = _histdd_extract_arrays(sample, weights, bins)
usm_type, queue = get_usm_allocations(_arrays)
Expand Down
Loading
Loading