From 0754e86068c09d4089cb1322ea51776eb5265b3b Mon Sep 17 00:00:00 2001 From: Elliott Sales de Andrade Date: Tue, 21 Jan 2025 04:11:54 -0500 Subject: [PATCH] Use unsigned bytes to back Buffer This makes compressors consistent with v2, and seems more correct than signed bytes. Fixes #2735 --- src/zarr/core/buffer/core.py | 4 ++-- src/zarr/core/buffer/cpu.py | 8 ++++---- src/zarr/core/buffer/gpu.py | 10 +++++----- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/zarr/core/buffer/core.py b/src/zarr/core/buffer/core.py index 85a7351fc7..3c373a67f4 100644 --- a/src/zarr/core/buffer/core.py +++ b/src/zarr/core/buffer/core.py @@ -139,7 +139,7 @@ class Buffer(ABC): def __init__(self, array_like: ArrayLike) -> None: if array_like.ndim != 1: raise ValueError("array_like: only 1-dim allowed") - if array_like.dtype != np.dtype("b"): + if array_like.dtype != np.dtype("B"): raise ValueError("array_like: only byte dtype allowed") self._data = array_like @@ -302,7 +302,7 @@ class NDBuffer: Notes ----- The two buffer classes Buffer and NDBuffer are very similar. In fact, Buffer - is a special case of NDBuffer where dim=1, stride=1, and dtype="b". However, + is a special case of NDBuffer where dim=1, stride=1, and dtype="B". However, in order to use Python's type system to differentiate between the contiguous Buffer and the n-dim (non-contiguous) NDBuffer, we keep the definition of the two classes separate. diff --git a/src/zarr/core/buffer/cpu.py b/src/zarr/core/buffer/cpu.py index 5019075496..dda2282bc1 100644 --- a/src/zarr/core/buffer/cpu.py +++ b/src/zarr/core/buffer/cpu.py @@ -49,7 +49,7 @@ def __init__(self, array_like: ArrayLike) -> None: @classmethod def create_zero_length(cls) -> Self: - return cls(np.array([], dtype="b")) + return cls(np.array([], dtype="B")) @classmethod def from_buffer(cls, buffer: core.Buffer) -> Self: @@ -92,7 +92,7 @@ def from_bytes(cls, bytes_like: BytesLike) -> Self: ------- New buffer representing `bytes_like` """ - return cls.from_array_like(np.frombuffer(bytes_like, dtype="b")) + return cls.from_array_like(np.frombuffer(bytes_like, dtype="B")) def as_numpy_array(self) -> npt.NDArray[Any]: """Returns the buffer as a NumPy array (host memory). @@ -111,7 +111,7 @@ def __add__(self, other: core.Buffer) -> Self: """Concatenate two buffers""" other_array = other.as_array_like() - assert other_array.dtype == np.dtype("b") + assert other_array.dtype == np.dtype("B") return self.__class__( np.concatenate((np.asanyarray(self._data), np.asanyarray(other_array))) ) @@ -131,7 +131,7 @@ class NDBuffer(core.NDBuffer): Notes ----- The two buffer classes Buffer and NDBuffer are very similar. In fact, Buffer - is a special case of NDBuffer where dim=1, stride=1, and dtype="b". However, + is a special case of NDBuffer where dim=1, stride=1, and dtype="B". However, in order to use Python's type system to differentiate between the contiguous Buffer and the n-dim (non-contiguous) NDBuffer, we keep the definition of the two classes separate. diff --git a/src/zarr/core/buffer/gpu.py b/src/zarr/core/buffer/gpu.py index 6941c8897e..fbfebedaf4 100644 --- a/src/zarr/core/buffer/gpu.py +++ b/src/zarr/core/buffer/gpu.py @@ -55,7 +55,7 @@ def __init__(self, array_like: ArrayLike) -> None: if array_like.ndim != 1: raise ValueError("array_like: only 1-dim allowed") - if array_like.dtype != np.dtype("b"): + if array_like.dtype != np.dtype("B"): raise ValueError("array_like: only byte dtype allowed") if not hasattr(array_like, "__cuda_array_interface__"): @@ -80,7 +80,7 @@ def create_zero_length(cls) -> Self: ------- New empty 0-length buffer """ - return cls(cp.array([], dtype="b")) + return cls(cp.array([], dtype="B")) @classmethod def from_buffer(cls, buffer: core.Buffer) -> Self: @@ -96,14 +96,14 @@ def from_buffer(cls, buffer: core.Buffer) -> Self: @classmethod def from_bytes(cls, bytes_like: BytesLike) -> Self: - return cls.from_array_like(cp.frombuffer(bytes_like, dtype="b")) + return cls.from_array_like(cp.frombuffer(bytes_like, dtype="B")) def as_numpy_array(self) -> npt.NDArray[Any]: return cast(npt.NDArray[Any], cp.asnumpy(self._data)) def __add__(self, other: core.Buffer) -> Self: other_array = other.as_array_like() - assert other_array.dtype == np.dtype("b") + assert other_array.dtype == np.dtype("B") gpu_other = Buffer(other_array) gpu_other_array = gpu_other.as_array_like() return self.__class__( @@ -125,7 +125,7 @@ class NDBuffer(core.NDBuffer): Notes ----- The two buffer classes Buffer and NDBuffer are very similar. In fact, Buffer - is a special case of NDBuffer where dim=1, stride=1, and dtype="b". However, + is a special case of NDBuffer where dim=1, stride=1, and dtype="B". However, in order to use Python's type system to differentiate between the contiguous Buffer and the n-dim (non-contiguous) NDBuffer, we keep the definition of the two classes separate.