Skip to content

Commit

Permalink
Implementation of histogram2d
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexanderKalistratov committed Jan 14, 2025
1 parent 3b3a233 commit ced0c40
Show file tree
Hide file tree
Showing 5 changed files with 356 additions and 87 deletions.
154 changes: 71 additions & 83 deletions dpnp/dpnp_iface_histograms.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@
"digitize",
"histogram",
"histogram_bin_edges",
"histogram2d"
"histogram2d",
"histogramdd",
]

Expand Down Expand Up @@ -764,7 +764,8 @@ def histogram2d(x, y, bins=10, range=None, density=None, weights=None):
y : {dpnp.ndarray, usm_ndarray} of shape (N,)
An array containing the y coordinates of the points to be
histogrammed.
bins : {int, list of dpnp.ndarray, list of usm_ndarray, sequence of scalars}, optional
bins : {int, list of dpnp.ndarray or usm_ndarray,
sequence of scalars}, optional
The bin specification:
* If int, the number of bins for the two dimensions (nx=ny=bins).
Expand Down Expand Up @@ -826,90 +827,77 @@ def histogram2d(x, y, bins=10, range=None, density=None, weights=None):
>>> from matplotlib.image import NonUniformImage
>>> import matplotlib.pyplot as plt
Construct a 2-D histogram with variable bin width. First define the bin
edges:
>>> xedges = [0, 1, 3, 5]
>>> yedges = [0, 2, 3, 4, 6]
Next we create a histogram H with random bin content:
>>> x = np.random.normal(2, 1, 100)
>>> y = np.random.normal(1, 1, 100)
>>> H, xedges, yedges = np.histogram2d(x, y, bins=(xedges, yedges))
>>> # Histogram does not follow Cartesian convention (see Notes),
>>> # therefore transpose H for visualization purposes.
>>> H = H.T
:func:`imshow <matplotlib.pyplot.imshow>` can only display square bins:
>>> fig = plt.figure(figsize=(7, 3))
>>> ax = fig.add_subplot(131, title='imshow: square bins')
>>> plt.imshow(H, interpolation='nearest', origin='lower',
... extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]])
<matplotlib.image.AxesImage object at 0x...>
:func:`pcolormesh <matplotlib.pyplot.pcolormesh>` can display actual edges:
>>> ax = fig.add_subplot(132, title='pcolormesh: actual edges',
... aspect='equal')
>>> X, Y = np.meshgrid(xedges, yedges)
>>> ax.pcolormesh(X, Y, H)
<matplotlib.collections.QuadMesh object at 0x...>
:class:`NonUniformImage <matplotlib.image.NonUniformImage>` can be used to
display actual bin edges with interpolation:
>>> ax = fig.add_subplot(133, title='NonUniformImage: interpolated',
... aspect='equal', xlim=xedges[[0, -1]], ylim=yedges[[0, -1]])
>>> im = NonUniformImage(ax, interpolation='bilinear')
>>> xcenters = (xedges[:-1] + xedges[1:]) / 2
>>> ycenters = (yedges[:-1] + yedges[1:]) / 2
>>> im.set_data(xcenters, ycenters, H)
>>> ax.add_image(im)
>>> plt.show()
It is also possible to construct a 2-D histogram without specifying bin
edges:
>>> # Generate non-symmetric test data
>>> n = 10000
>>> x = np.linspace(1, 100, n)
>>> y = 2*np.log(x) + np.random.rand(n) - 0.5
>>> # Compute 2d histogram. Note the order of x/y and xedges/yedges
>>> H, yedges, xedges = np.histogram2d(y, x, bins=20)
Now we can plot the histogram using
:func:`pcolormesh <matplotlib.pyplot.pcolormesh>`, and a
:func:`hexbin <matplotlib.pyplot.hexbin>` for comparison.
>>> # Plot histogram using pcolormesh
>>> fig, (ax1, ax2) = plt.subplots(ncols=2, sharey=True)
>>> ax1.pcolormesh(xedges, yedges, H, cmap='rainbow')
>>> ax1.plot(x, 2*np.log(x), 'k-')
>>> ax1.set_xlim(x.min(), x.max())
>>> ax1.set_ylim(y.min(), y.max())
>>> ax1.set_xlabel('x')
>>> ax1.set_ylabel('y')
>>> ax1.set_title('histogram2d')
>>> ax1.grid()
>>> # Create hexbin plot for comparison
>>> ax2.hexbin(x, y, gridsize=20, cmap='rainbow')
>>> ax2.plot(x, 2*np.log(x), 'k-')
>>> ax2.set_title('hexbin')
>>> ax2.set_xlim(x.min(), x.max())
>>> ax2.set_xlabel('x')
>>> ax2.grid()
>>> plt.show()
>>> import dpnp as np
>>> x = np.random.randn(20)
>>> y = np.random.randn(20)
>>> hist, edges_x, edges_y = np.histogram2d(x, y, bins=(4, 3))
>>> hist
[[1. 0. 0.]
[0. 0. 0.]
[5. 6. 4.]
[1. 2. 1.]]
>>> edges_x
[-5.6575713 -3.5574734 -1.4573755 0.6427226 2.74282 ]
>>> edges_y
[-1.1889046 -0.07263839 1.0436279 2.159894 ]
"""

dpnp.check_supported_arrays_type(x, y)
if weights is not None:
dpnp.check_supported_arrays_type(weights)

if x.ndim != 1 or y.ndim != 1:
raise ValueError(
f"x and y must be 1-dimensional arrays."
f"Got {x.ndim} and {y.ndim} respectively"
)

if len(x) != len(y):
raise ValueError(f'x and y must have the same length. Got {len(x)} and {len(y)} respectively')
raise ValueError(
f"x and y must have the same length."
f"Got {len(x)} and {len(y)} respectively"
)

usm_type, exec_q = get_usm_allocations([x, y, bins, range, weights])
device = exec_q.sycl_device

sample_dtype = _result_type_for_device([x.dtype, y.dtype], device)

# Unlike histogramdd histogram2d accepts 1d bins and
# apply it to both dimensions
# at the same moment two elements bins should be interpreted as
# number of bins in each dimension and array-like bins with one element
# is not allowed
if isinstance(bins, Iterable) and len(bins) > 2:
bins = [bins] * 2

hist, edges = histogramdd([x, y], bins, range, density, weights)
bins = _histdd_normalize_bins(bins, 2)
bins_dtypes = [sample_dtype]
bins_dtypes += [b.dtype for b in bins if hasattr(b, "dtype")]

bins_dtype = _result_type_for_device(bins_dtypes, device)
hist_dtype = _histdd_hist_dtype(exec_q, weights)

supported_types = statistics_ext.histogramdd_dtypes()

sample_dtype, _ = _align_dtypes(
sample_dtype, bins_dtype, hist_dtype, supported_types, device
)

sample = dpnp.empty_like(
x, shape=x.shape + (2,), dtype=sample_dtype, usm_type=usm_type
)
sample[:, 0] = x
sample[:, 1] = y

hist, edges = histogramdd(
sample, bins=bins, range=range, density=density, weights=weights
)
return hist, edges[0], edges[1]


Expand Down Expand Up @@ -1080,7 +1068,7 @@ def _histdd_extract_arrays(sample, weights, bins):
return all_arrays


def histogramdd(sample, bins=10, range=None, weights=None, density=False):
def histogramdd(sample, bins=10, range=None, density=False, weights=None):
"""
Compute the multidimensional histogram of some data.
Expand Down Expand Up @@ -1155,7 +1143,7 @@ def histogramdd(sample, bins=10, range=None, weights=None, density=False):
elif sample.ndim > 2:
raise ValueError("sample must have no more than 2 dimensions")

ndim = sample.shape[1] if sample.size > 0 else 1
ndim = sample.shape[1]

_arrays = _histdd_extract_arrays(sample, weights, bins)
usm_type, queue = get_usm_allocations(_arrays)
Expand Down
Loading

0 comments on commit ced0c40

Please sign in to comment.