Skip to content

Commit

Permalink
Implementation of histogram2d
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexanderKalistratov committed Jan 15, 2025
1 parent 3b3a233 commit a692ff5
Show file tree
Hide file tree
Showing 5 changed files with 395 additions and 93 deletions.
161 changes: 73 additions & 88 deletions dpnp/dpnp_iface_histograms.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@
"digitize",
"histogram",
"histogram_bin_edges",
"histogram2d"
"histogram2d",
"histogramdd",
]

Expand Down Expand Up @@ -139,6 +139,9 @@ def _is_finite(a):
return numpy.isfinite(a)

if range is not None:
if len(range) != 2:
raise ValueError("range argument must consist of 2 elements.")

first_edge, last_edge = range
if first_edge > last_edge:
raise ValueError("max must be larger than min in range parameter.")
Expand Down Expand Up @@ -753,6 +756,7 @@ def histogram_bin_edges(a, bins=10, range=None, weights=None):


def histogram2d(x, y, bins=10, range=None, density=None, weights=None):
# pylint: disable=line-too-long
"""
Compute the bi-dimensional histogram of two data samples.
Expand All @@ -764,8 +768,10 @@ def histogram2d(x, y, bins=10, range=None, density=None, weights=None):
y : {dpnp.ndarray, usm_ndarray} of shape (N,)
An array containing the y coordinates of the points to be
histogrammed.
bins : {int, list of dpnp.ndarray, list of usm_ndarray, sequence of scalars}, optional
The bin specification:
bins : {int, list of dpnp.ndarray or usm_ndarray, sequence of scalars}, optional
Histogram bins.
The bins specification:
* If int, the number of bins for the two dimensions (nx=ny=bins).
* If array, the bin edges for the two dimensions
Expand Down Expand Up @@ -822,94 +828,73 @@ def histogram2d(x, y, bins=10, range=None, density=None, weights=None):
Examples
--------
>>> import numpy as np
>>> from matplotlib.image import NonUniformImage
>>> import matplotlib.pyplot as plt
Construct a 2-D histogram with variable bin width. First define the bin
edges:
>>> xedges = [0, 1, 3, 5]
>>> yedges = [0, 2, 3, 4, 6]
Next we create a histogram H with random bin content:
>>> x = np.random.normal(2, 1, 100)
>>> y = np.random.normal(1, 1, 100)
>>> H, xedges, yedges = np.histogram2d(x, y, bins=(xedges, yedges))
>>> # Histogram does not follow Cartesian convention (see Notes),
>>> # therefore transpose H for visualization purposes.
>>> H = H.T
:func:`imshow <matplotlib.pyplot.imshow>` can only display square bins:
>>> fig = plt.figure(figsize=(7, 3))
>>> ax = fig.add_subplot(131, title='imshow: square bins')
>>> plt.imshow(H, interpolation='nearest', origin='lower',
... extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]])
<matplotlib.image.AxesImage object at 0x...>
:func:`pcolormesh <matplotlib.pyplot.pcolormesh>` can display actual edges:
>>> ax = fig.add_subplot(132, title='pcolormesh: actual edges',
... aspect='equal')
>>> X, Y = np.meshgrid(xedges, yedges)
>>> ax.pcolormesh(X, Y, H)
<matplotlib.collections.QuadMesh object at 0x...>
:class:`NonUniformImage <matplotlib.image.NonUniformImage>` can be used to
display actual bin edges with interpolation:
>>> ax = fig.add_subplot(133, title='NonUniformImage: interpolated',
... aspect='equal', xlim=xedges[[0, -1]], ylim=yedges[[0, -1]])
>>> im = NonUniformImage(ax, interpolation='bilinear')
>>> xcenters = (xedges[:-1] + xedges[1:]) / 2
>>> ycenters = (yedges[:-1] + yedges[1:]) / 2
>>> im.set_data(xcenters, ycenters, H)
>>> ax.add_image(im)
>>> plt.show()
It is also possible to construct a 2-D histogram without specifying bin
edges:
>>> # Generate non-symmetric test data
>>> n = 10000
>>> x = np.linspace(1, 100, n)
>>> y = 2*np.log(x) + np.random.rand(n) - 0.5
>>> # Compute 2d histogram. Note the order of x/y and xedges/yedges
>>> H, yedges, xedges = np.histogram2d(y, x, bins=20)
Now we can plot the histogram using
:func:`pcolormesh <matplotlib.pyplot.pcolormesh>`, and a
:func:`hexbin <matplotlib.pyplot.hexbin>` for comparison.
>>> # Plot histogram using pcolormesh
>>> fig, (ax1, ax2) = plt.subplots(ncols=2, sharey=True)
>>> ax1.pcolormesh(xedges, yedges, H, cmap='rainbow')
>>> ax1.plot(x, 2*np.log(x), 'k-')
>>> ax1.set_xlim(x.min(), x.max())
>>> ax1.set_ylim(y.min(), y.max())
>>> ax1.set_xlabel('x')
>>> ax1.set_ylabel('y')
>>> ax1.set_title('histogram2d')
>>> ax1.grid()
>>> # Create hexbin plot for comparison
>>> ax2.hexbin(x, y, gridsize=20, cmap='rainbow')
>>> ax2.plot(x, 2*np.log(x), 'k-')
>>> ax2.set_title('hexbin')
>>> ax2.set_xlim(x.min(), x.max())
>>> ax2.set_xlabel('x')
>>> ax2.grid()
>>> plt.show()
>>> import dpnp as np
>>> x = np.random.randn(20)
>>> y = np.random.randn(20)
>>> hist, edges_x, edges_y = np.histogram2d(x, y, bins=(4, 3))
>>> hist
[[1. 0. 0.]
[0. 0. 0.]
[5. 6. 4.]
[1. 2. 1.]]
>>> edges_x
[-5.6575713 -3.5574734 -1.4573755 0.6427226 2.74282 ]
>>> edges_y
[-1.1889046 -0.07263839 1.0436279 2.159894 ]
"""
# pylint: enable=line-too-long

dpnp.check_supported_arrays_type(x, y)
if weights is not None:
dpnp.check_supported_arrays_type(weights)

if x.ndim != 1 or y.ndim != 1:
raise ValueError(
f"x and y must be 1-dimensional arrays."
f"Got {x.ndim} and {y.ndim} respectively"
)

if len(x) != len(y):
raise ValueError(f'x and y must have the same length. Got {len(x)} and {len(y)} respectively')
raise ValueError(
f"x and y must have the same length."
f"Got {len(x)} and {len(y)} respectively"
)

usm_type, exec_q = get_usm_allocations([x, y, bins, range, weights])
device = exec_q.sycl_device

sample_dtype = _result_type_for_device([x.dtype, y.dtype], device)

# Unlike histogramdd histogram2d accepts 1d bins and
# apply it to both dimensions
# at the same moment two elements bins should be interpreted as
# number of bins in each dimension and array-like bins with one element
# is not allowed
if isinstance(bins, Iterable) and len(bins) > 2:
bins = [bins] * 2

bins = _histdd_normalize_bins(bins, 2)
bins_dtypes = [sample_dtype]
bins_dtypes += [b.dtype for b in bins if hasattr(b, "dtype")]

bins_dtype = _result_type_for_device(bins_dtypes, device)
hist_dtype = _histdd_hist_dtype(exec_q, weights)

supported_types = statistics_ext.histogramdd_dtypes()

sample_dtype, _ = _align_dtypes(
sample_dtype, bins_dtype, hist_dtype, supported_types, device
)

hist, edges = histogramdd([x, y], bins, range, density, weights)
sample = dpnp.empty_like(
x, shape=x.shape + (2,), dtype=sample_dtype, usm_type=usm_type
)
sample[:, 0] = x
sample[:, 1] = y

hist, edges = histogramdd(
sample, bins=bins, range=range, density=density, weights=weights
)
return hist, edges[0], edges[1]


Expand Down Expand Up @@ -1080,7 +1065,7 @@ def _histdd_extract_arrays(sample, weights, bins):
return all_arrays


def histogramdd(sample, bins=10, range=None, weights=None, density=False):
def histogramdd(sample, bins=10, range=None, density=False, weights=None):
"""
Compute the multidimensional histogram of some data.
Expand Down Expand Up @@ -1155,7 +1140,7 @@ def histogramdd(sample, bins=10, range=None, weights=None, density=False):
elif sample.ndim > 2:
raise ValueError("sample must have no more than 2 dimensions")

ndim = sample.shape[1] if sample.size > 0 else 1
ndim = sample.shape[1]

_arrays = _histdd_extract_arrays(sample, weights, bins)
usm_type, queue = get_usm_allocations(_arrays)
Expand Down
Loading

0 comments on commit a692ff5

Please sign in to comment.