Skip to content

Commit

Permalink
Use unsigned bytes to back Buffer
Browse files Browse the repository at this point in the history
This makes compressors consistent with v2, and seems more correct than
signed bytes.

Fixes zarr-developers#2735
  • Loading branch information
QuLogic committed Jan 21, 2025
1 parent e9772ac commit 01c6e35
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 12 deletions.
2 changes: 1 addition & 1 deletion src/zarr/codecs/bytes.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ async def _encode_single(

nd_array = chunk_array.as_ndarray_like()
# Flatten the nd-array (only copy if needed) and reinterpret as bytes
nd_array = nd_array.ravel().view(dtype="b")
nd_array = nd_array.ravel().view(dtype="B")
return chunk_spec.prototype.buffer.from_array_like(nd_array)

def compute_encoded_size(self, input_byte_length: int, _chunk_spec: ArraySpec) -> int:
Expand Down
4 changes: 2 additions & 2 deletions src/zarr/core/buffer/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ class Buffer(ABC):
def __init__(self, array_like: ArrayLike) -> None:
if array_like.ndim != 1:
raise ValueError("array_like: only 1-dim allowed")
if array_like.dtype != np.dtype("b"):
if array_like.dtype != np.dtype("B"):
raise ValueError("array_like: only byte dtype allowed")
self._data = array_like

Expand Down Expand Up @@ -302,7 +302,7 @@ class NDBuffer:
Notes
-----
The two buffer classes Buffer and NDBuffer are very similar. In fact, Buffer
is a special case of NDBuffer where dim=1, stride=1, and dtype="b". However,
is a special case of NDBuffer where dim=1, stride=1, and dtype="B". However,
in order to use Python's type system to differentiate between the contiguous
Buffer and the n-dim (non-contiguous) NDBuffer, we keep the definition of the
two classes separate.
Expand Down
8 changes: 4 additions & 4 deletions src/zarr/core/buffer/cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def __init__(self, array_like: ArrayLike) -> None:

@classmethod
def create_zero_length(cls) -> Self:
return cls(np.array([], dtype="b"))
return cls(np.array([], dtype="B"))

@classmethod
def from_buffer(cls, buffer: core.Buffer) -> Self:
Expand Down Expand Up @@ -92,7 +92,7 @@ def from_bytes(cls, bytes_like: BytesLike) -> Self:
-------
New buffer representing `bytes_like`
"""
return cls.from_array_like(np.frombuffer(bytes_like, dtype="b"))
return cls.from_array_like(np.frombuffer(bytes_like, dtype="B"))

def as_numpy_array(self) -> npt.NDArray[Any]:
"""Returns the buffer as a NumPy array (host memory).
Expand All @@ -111,7 +111,7 @@ def __add__(self, other: core.Buffer) -> Self:
"""Concatenate two buffers"""

other_array = other.as_array_like()
assert other_array.dtype == np.dtype("b")
assert other_array.dtype == np.dtype("B")
return self.__class__(
np.concatenate((np.asanyarray(self._data), np.asanyarray(other_array)))
)
Expand All @@ -131,7 +131,7 @@ class NDBuffer(core.NDBuffer):
Notes
-----
The two buffer classes Buffer and NDBuffer are very similar. In fact, Buffer
is a special case of NDBuffer where dim=1, stride=1, and dtype="b". However,
is a special case of NDBuffer where dim=1, stride=1, and dtype="B". However,
in order to use Python's type system to differentiate between the contiguous
Buffer and the n-dim (non-contiguous) NDBuffer, we keep the definition of the
two classes separate.
Expand Down
10 changes: 5 additions & 5 deletions src/zarr/core/buffer/gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def __init__(self, array_like: ArrayLike) -> None:

if array_like.ndim != 1:
raise ValueError("array_like: only 1-dim allowed")
if array_like.dtype != np.dtype("b"):
if array_like.dtype != np.dtype("B"):
raise ValueError("array_like: only byte dtype allowed")

if not hasattr(array_like, "__cuda_array_interface__"):
Expand All @@ -80,7 +80,7 @@ def create_zero_length(cls) -> Self:
-------
New empty 0-length buffer
"""
return cls(cp.array([], dtype="b"))
return cls(cp.array([], dtype="B"))

@classmethod
def from_buffer(cls, buffer: core.Buffer) -> Self:
Expand All @@ -96,14 +96,14 @@ def from_buffer(cls, buffer: core.Buffer) -> Self:

@classmethod
def from_bytes(cls, bytes_like: BytesLike) -> Self:
return cls.from_array_like(cp.frombuffer(bytes_like, dtype="b"))
return cls.from_array_like(cp.frombuffer(bytes_like, dtype="B"))

def as_numpy_array(self) -> npt.NDArray[Any]:
return cast(npt.NDArray[Any], cp.asnumpy(self._data))

def __add__(self, other: core.Buffer) -> Self:
other_array = other.as_array_like()
assert other_array.dtype == np.dtype("b")
assert other_array.dtype == np.dtype("B")
gpu_other = Buffer(other_array)
gpu_other_array = gpu_other.as_array_like()
return self.__class__(
Expand All @@ -125,7 +125,7 @@ class NDBuffer(core.NDBuffer):
Notes
-----
The two buffer classes Buffer and NDBuffer are very similar. In fact, Buffer
is a special case of NDBuffer where dim=1, stride=1, and dtype="b". However,
is a special case of NDBuffer where dim=1, stride=1, and dtype="B". However,
in order to use Python's type system to differentiate between the contiguous
Buffer and the n-dim (non-contiguous) NDBuffer, we keep the definition of the
two classes separate.
Expand Down

0 comments on commit 01c6e35

Please sign in to comment.