Skip to content

Commit

Permalink
Use unsigned bytes to back Buffer
Browse files Browse the repository at this point in the history
This makes compressors consistent with v2, and buffers consistents with
`bytes` types.

Fixes zarr-developers#2735
  • Loading branch information
QuLogic committed Jan 22, 2025
1 parent a260ae9 commit 610689e
Show file tree
Hide file tree
Showing 5 changed files with 13 additions and 13 deletions.
2 changes: 1 addition & 1 deletion src/zarr/codecs/bytes.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ async def _encode_single(

nd_array = chunk_array.as_ndarray_like()
# Flatten the nd-array (only copy if needed) and reinterpret as bytes
nd_array = nd_array.ravel().view(dtype="b")
nd_array = nd_array.ravel().view(dtype="B")
return chunk_spec.prototype.buffer.from_array_like(nd_array)

def compute_encoded_size(self, input_byte_length: int, _chunk_spec: ArraySpec) -> int:
Expand Down
2 changes: 1 addition & 1 deletion src/zarr/codecs/crc32c_.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ async def _encode_single(
# Calculate the checksum and "cast" it to a numpy array
checksum = np.array([crc32c(cast(typing_extensions.Buffer, data))], dtype=np.uint32)
# Append the checksum (as bytes) to the data
return chunk_spec.prototype.buffer.from_array_like(np.append(data, checksum.view("b")))
return chunk_spec.prototype.buffer.from_array_like(np.append(data, checksum.view("B")))

def compute_encoded_size(self, input_byte_length: int, _chunk_spec: ArraySpec) -> int:
return input_byte_length + 4
Expand Down
4 changes: 2 additions & 2 deletions src/zarr/core/buffer/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ class Buffer(ABC):
def __init__(self, array_like: ArrayLike) -> None:
if array_like.ndim != 1:
raise ValueError("array_like: only 1-dim allowed")
if array_like.dtype != np.dtype("b"):
if array_like.dtype != np.dtype("B"):
raise ValueError("array_like: only byte dtype allowed")
self._data = array_like

Expand Down Expand Up @@ -302,7 +302,7 @@ class NDBuffer:
Notes
-----
The two buffer classes Buffer and NDBuffer are very similar. In fact, Buffer
is a special case of NDBuffer where dim=1, stride=1, and dtype="b". However,
is a special case of NDBuffer where dim=1, stride=1, and dtype="B". However,
in order to use Python's type system to differentiate between the contiguous
Buffer and the n-dim (non-contiguous) NDBuffer, we keep the definition of the
two classes separate.
Expand Down
8 changes: 4 additions & 4 deletions src/zarr/core/buffer/cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def __init__(self, array_like: ArrayLike) -> None:

@classmethod
def create_zero_length(cls) -> Self:
return cls(np.array([], dtype="b"))
return cls(np.array([], dtype="B"))

@classmethod
def from_buffer(cls, buffer: core.Buffer) -> Self:
Expand Down Expand Up @@ -92,7 +92,7 @@ def from_bytes(cls, bytes_like: BytesLike) -> Self:
-------
New buffer representing `bytes_like`
"""
return cls.from_array_like(np.frombuffer(bytes_like, dtype="b"))
return cls.from_array_like(np.frombuffer(bytes_like, dtype="B"))

def as_numpy_array(self) -> npt.NDArray[Any]:
"""Returns the buffer as a NumPy array (host memory).
Expand All @@ -111,7 +111,7 @@ def __add__(self, other: core.Buffer) -> Self:
"""Concatenate two buffers"""

other_array = other.as_array_like()
assert other_array.dtype == np.dtype("b")
assert other_array.dtype == np.dtype("B")
return self.__class__(
np.concatenate((np.asanyarray(self._data), np.asanyarray(other_array)))
)
Expand All @@ -131,7 +131,7 @@ class NDBuffer(core.NDBuffer):
Notes
-----
The two buffer classes Buffer and NDBuffer are very similar. In fact, Buffer
is a special case of NDBuffer where dim=1, stride=1, and dtype="b". However,
is a special case of NDBuffer where dim=1, stride=1, and dtype="B". However,
in order to use Python's type system to differentiate between the contiguous
Buffer and the n-dim (non-contiguous) NDBuffer, we keep the definition of the
two classes separate.
Expand Down
10 changes: 5 additions & 5 deletions src/zarr/core/buffer/gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def __init__(self, array_like: ArrayLike) -> None:

if array_like.ndim != 1:
raise ValueError("array_like: only 1-dim allowed")
if array_like.dtype != np.dtype("b"):
if array_like.dtype != np.dtype("B"):
raise ValueError("array_like: only byte dtype allowed")

if not hasattr(array_like, "__cuda_array_interface__"):
Expand All @@ -80,7 +80,7 @@ def create_zero_length(cls) -> Self:
-------
New empty 0-length buffer
"""
return cls(cp.array([], dtype="b"))
return cls(cp.array([], dtype="B"))

@classmethod
def from_buffer(cls, buffer: core.Buffer) -> Self:
Expand All @@ -96,14 +96,14 @@ def from_buffer(cls, buffer: core.Buffer) -> Self:

@classmethod
def from_bytes(cls, bytes_like: BytesLike) -> Self:
return cls.from_array_like(cp.frombuffer(bytes_like, dtype="b"))
return cls.from_array_like(cp.frombuffer(bytes_like, dtype="B"))

def as_numpy_array(self) -> npt.NDArray[Any]:
return cast(npt.NDArray[Any], cp.asnumpy(self._data))

def __add__(self, other: core.Buffer) -> Self:
other_array = other.as_array_like()
assert other_array.dtype == np.dtype("b")
assert other_array.dtype == np.dtype("B")
gpu_other = Buffer(other_array)
gpu_other_array = gpu_other.as_array_like()
return self.__class__(
Expand All @@ -125,7 +125,7 @@ class NDBuffer(core.NDBuffer):
Notes
-----
The two buffer classes Buffer and NDBuffer are very similar. In fact, Buffer
is a special case of NDBuffer where dim=1, stride=1, and dtype="b". However,
is a special case of NDBuffer where dim=1, stride=1, and dtype="B". However,
in order to use Python's type system to differentiate between the contiguous
Buffer and the n-dim (non-contiguous) NDBuffer, we keep the definition of the
two classes separate.
Expand Down

0 comments on commit 610689e

Please sign in to comment.