From 0754e86068c09d4089cb1322ea51776eb5265b3b Mon Sep 17 00:00:00 2001
From: Elliott Sales de Andrade <quantum.analyst@gmail.com>
Date: Tue, 21 Jan 2025 04:11:54 -0500
Subject: [PATCH] Use unsigned bytes to back Buffer

This makes compressors consistent with v2, and seems more correct than
signed bytes.

Fixes #2735
---
 src/zarr/core/buffer/core.py |  4 ++--
 src/zarr/core/buffer/cpu.py  |  8 ++++----
 src/zarr/core/buffer/gpu.py  | 10 +++++-----
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/zarr/core/buffer/core.py b/src/zarr/core/buffer/core.py
index 85a7351fc7..3c373a67f4 100644
--- a/src/zarr/core/buffer/core.py
+++ b/src/zarr/core/buffer/core.py
@@ -139,7 +139,7 @@ class Buffer(ABC):
     def __init__(self, array_like: ArrayLike) -> None:
         if array_like.ndim != 1:
             raise ValueError("array_like: only 1-dim allowed")
-        if array_like.dtype != np.dtype("b"):
+        if array_like.dtype != np.dtype("B"):
             raise ValueError("array_like: only byte dtype allowed")
         self._data = array_like
 
@@ -302,7 +302,7 @@ class NDBuffer:
     Notes
     -----
     The two buffer classes Buffer and NDBuffer are very similar. In fact, Buffer
-    is a special case of NDBuffer where dim=1, stride=1, and dtype="b". However,
+    is a special case of NDBuffer where dim=1, stride=1, and dtype="B". However,
     in order to use Python's type system to differentiate between the contiguous
     Buffer and the n-dim (non-contiguous) NDBuffer, we keep the definition of the
     two classes separate.
diff --git a/src/zarr/core/buffer/cpu.py b/src/zarr/core/buffer/cpu.py
index 5019075496..dda2282bc1 100644
--- a/src/zarr/core/buffer/cpu.py
+++ b/src/zarr/core/buffer/cpu.py
@@ -49,7 +49,7 @@ def __init__(self, array_like: ArrayLike) -> None:
 
     @classmethod
     def create_zero_length(cls) -> Self:
-        return cls(np.array([], dtype="b"))
+        return cls(np.array([], dtype="B"))
 
     @classmethod
     def from_buffer(cls, buffer: core.Buffer) -> Self:
@@ -92,7 +92,7 @@ def from_bytes(cls, bytes_like: BytesLike) -> Self:
         -------
             New buffer representing `bytes_like`
         """
-        return cls.from_array_like(np.frombuffer(bytes_like, dtype="b"))
+        return cls.from_array_like(np.frombuffer(bytes_like, dtype="B"))
 
     def as_numpy_array(self) -> npt.NDArray[Any]:
         """Returns the buffer as a NumPy array (host memory).
@@ -111,7 +111,7 @@ def __add__(self, other: core.Buffer) -> Self:
         """Concatenate two buffers"""
 
         other_array = other.as_array_like()
-        assert other_array.dtype == np.dtype("b")
+        assert other_array.dtype == np.dtype("B")
         return self.__class__(
             np.concatenate((np.asanyarray(self._data), np.asanyarray(other_array)))
         )
@@ -131,7 +131,7 @@ class NDBuffer(core.NDBuffer):
     Notes
     -----
     The two buffer classes Buffer and NDBuffer are very similar. In fact, Buffer
-    is a special case of NDBuffer where dim=1, stride=1, and dtype="b". However,
+    is a special case of NDBuffer where dim=1, stride=1, and dtype="B". However,
     in order to use Python's type system to differentiate between the contiguous
     Buffer and the n-dim (non-contiguous) NDBuffer, we keep the definition of the
     two classes separate.
diff --git a/src/zarr/core/buffer/gpu.py b/src/zarr/core/buffer/gpu.py
index 6941c8897e..fbfebedaf4 100644
--- a/src/zarr/core/buffer/gpu.py
+++ b/src/zarr/core/buffer/gpu.py
@@ -55,7 +55,7 @@ def __init__(self, array_like: ArrayLike) -> None:
 
         if array_like.ndim != 1:
             raise ValueError("array_like: only 1-dim allowed")
-        if array_like.dtype != np.dtype("b"):
+        if array_like.dtype != np.dtype("B"):
             raise ValueError("array_like: only byte dtype allowed")
 
         if not hasattr(array_like, "__cuda_array_interface__"):
@@ -80,7 +80,7 @@ def create_zero_length(cls) -> Self:
         -------
             New empty 0-length buffer
         """
-        return cls(cp.array([], dtype="b"))
+        return cls(cp.array([], dtype="B"))
 
     @classmethod
     def from_buffer(cls, buffer: core.Buffer) -> Self:
@@ -96,14 +96,14 @@ def from_buffer(cls, buffer: core.Buffer) -> Self:
 
     @classmethod
     def from_bytes(cls, bytes_like: BytesLike) -> Self:
-        return cls.from_array_like(cp.frombuffer(bytes_like, dtype="b"))
+        return cls.from_array_like(cp.frombuffer(bytes_like, dtype="B"))
 
     def as_numpy_array(self) -> npt.NDArray[Any]:
         return cast(npt.NDArray[Any], cp.asnumpy(self._data))
 
     def __add__(self, other: core.Buffer) -> Self:
         other_array = other.as_array_like()
-        assert other_array.dtype == np.dtype("b")
+        assert other_array.dtype == np.dtype("B")
         gpu_other = Buffer(other_array)
         gpu_other_array = gpu_other.as_array_like()
         return self.__class__(
@@ -125,7 +125,7 @@ class NDBuffer(core.NDBuffer):
     Notes
     -----
     The two buffer classes Buffer and NDBuffer are very similar. In fact, Buffer
-    is a special case of NDBuffer where dim=1, stride=1, and dtype="b". However,
+    is a special case of NDBuffer where dim=1, stride=1, and dtype="B". However,
     in order to use Python's type system to differentiate between the contiguous
     Buffer and the n-dim (non-contiguous) NDBuffer, we keep the definition of the
     two classes separate.