Skip to content

Commit

Permalink
Add 64 bit atomics
Browse files Browse the repository at this point in the history
  • Loading branch information
atlv24 committed Mar 13, 2024
1 parent 6040820 commit 88e1094
Show file tree
Hide file tree
Showing 14 changed files with 327 additions and 7 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ By @cwfitzgerald in [#5325](https://github.com/gfx-rs/wgpu/pull/5325).

By @ErichDonGubler in [#5146](https://github.com/gfx-rs/wgpu/pull/5146), [#5046](https://github.com/gfx-rs/wgpu/pull/5046).
- Signed and unsigned 64 bit integer support in shaders. By @rodolphito and @cwfitzgerald in [#5154](https://github.com/gfx-rs/wgpu/pull/5154)
- 64 bit integer atomic support in shaders. By @rodolphito and @JMS55 in [#5383](https://github.com/gfx-rs/wgpu/pull/5383)
- `wgpu::Instance` can now report which `wgpu::Backends` are available based on the build configuration. By @wumpf [#5167](https://github.com/gfx-rs/wgpu/pull/5167)
```diff
-wgpu::Instance::any_backend_feature_enabled()
Expand Down
4 changes: 3 additions & 1 deletion naga/src/valid/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ bitflags::bitflags! {
#[cfg_attr(feature = "serialize", derive(serde::Serialize))]
#[cfg_attr(feature = "deserialize", derive(serde::Deserialize))]
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct Capabilities: u16 {
pub struct Capabilities: u32 {
/// Support for [`AddressSpace:PushConstant`].
const PUSH_CONSTANT = 0x1;
/// Float values with width = 8.
Expand Down Expand Up @@ -110,6 +110,8 @@ bitflags::bitflags! {
const CUBE_ARRAY_TEXTURES = 0x4000;
/// Support for 64-bit signed and unsigned integers.
const SHADER_INT64 = 0x8000;
/// Support for 64-bit signed and unsigned integers.
const SHADER_INT64_ATOMIC = 0x10000;
}
}

Expand Down
25 changes: 19 additions & 6 deletions naga/src/valid/type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -353,16 +353,29 @@ impl super::Validator {
)
}
Ti::Atomic(crate::Scalar { kind, width }) => {
let good = match kind {
match kind {
crate::ScalarKind::Bool
| crate::ScalarKind::Float
| crate::ScalarKind::AbstractInt
| crate::ScalarKind::AbstractFloat => false,
crate::ScalarKind::Sint | crate::ScalarKind::Uint => width == 4,
| crate::ScalarKind::AbstractFloat => {
return Err(TypeError::InvalidAtomicWidth(kind, width))
}
crate::ScalarKind::Sint | crate::ScalarKind::Uint => {
if width == 8 {
if self
.capabilities
.contains(Capabilities::SHADER_INT64_ATOMIC)
{
} else {
return Err(TypeError::MissingCapability(
Capabilities::SHADER_INT64_ATOMIC,
));
}
} else if width != 4 {
return Err(TypeError::InvalidAtomicWidth(kind, width));
}
}
};
if !good {
return Err(TypeError::InvalidAtomicWidth(kind, width));
}
TypeInfo::new(
TypeFlags::DATA | TypeFlags::SIZED | TypeFlags::HOST_SHAREABLE,
Alignment::from_width(width),
Expand Down
23 changes: 23 additions & 0 deletions naga/tests/in/atomicCompareExchange-int64.param.ron
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
(
god_mode: true,
spv: (
version: (1, 0),
capabilities: [ Int64, Int64ImageEXT, Int64Atomics ],
),
hlsl: (
shader_model: V6_0,
binding_map: {},
fake_missing_bindings: true,
special_constants_binding: Some((space: 1, register: 0)),
push_constants_target: Some((space: 0, register: 0)),
zero_initialize_workgroup_memory: true,
),
msl: (
lang_version: (3, 1),
per_entry_point_map: {},
inline_samplers: [],
spirv_cross_compatibility: false,
fake_missing_bindings: true,
zero_initialize_workgroup_memory: true,
),
)
34 changes: 34 additions & 0 deletions naga/tests/in/atomicCompareExchange-int64.wgsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
const SIZE: u64 = 128u;

@group(0) @binding(0)
var<storage,read_write> arr_i64: array<atomic<i64>, SIZE>;
@group(0) @binding(1)
var<storage,read_write> arr_u64: array<atomic<u64>, SIZE>;

@compute @workgroup_size(1)
fn test_atomic_compare_exchange_i64() {
for(var i = 0u; i < SIZE; i++) {
var old = atomicLoad(&arr_i64[i]);
var exchanged = false;
while(!exchanged) {
let new_ = bitcast<i64>(bitcast<f32>(old) + 1.0);
let result = atomicCompareExchangeWeak(&arr_i64[i], old, new_);
old = result.old_value;
exchanged = result.exchanged;
}
}
}

@compute @workgroup_size(1)
fn test_atomic_compare_exchange_u64() {
for(var i = 0u; i < SIZE; i++) {
var old = atomicLoad(&arr_u64[i]);
var exchanged = false;
while(!exchanged) {
let new_ = bitcast<u64>(bitcast<f32>(old) + 1.0);
let result = atomicCompareExchangeWeak(&arr_u64[i], old, new_);
old = result.old_value;
exchanged = result.exchanged;
}
}
}
23 changes: 23 additions & 0 deletions naga/tests/in/atomicOps-int64.param.ron
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
(
god_mode: true,
spv: (
version: (1, 0),
capabilities: [ Int64, Int64ImageEXT, Int64Atomics ],
),
hlsl: (
shader_model: V6_0,
binding_map: {},
fake_missing_bindings: true,
special_constants_binding: Some((space: 1, register: 0)),
push_constants_target: Some((space: 0, register: 0)),
zero_initialize_workgroup_memory: true,
),
msl: (
lang_version: (3, 1),
per_entry_point_map: {},
inline_samplers: [],
spirv_cross_compatibility: false,
fake_missing_bindings: true,
zero_initialize_workgroup_memory: true,
),
)
141 changes: 141 additions & 0 deletions naga/tests/in/atomicOps-int64.wgsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
// This test covers the cross product of:
//
// * All atomic operations.
// * On all applicable scopes (storage read-write, workgroup).
// * For all shapes of modeling atomic data.

struct Struct {
atomic_scalar: atomic<u64>,
atomic_arr: array<atomic<i64>, 2>,
}

@group(0) @binding(0)
var<storage, read_write> storage_atomic_scalar: atomic<u64>;
@group(0) @binding(1)
var<storage, read_write> storage_atomic_arr: array<atomic<i64>, 2>;
@group(0) @binding(2)
var<storage, read_write> storage_struct: Struct;

var<workgroup> workgroup_atomic_scalar: atomic<u64>;
var<workgroup> workgroup_atomic_arr: array<atomic<i64>, 2>;
var<workgroup> workgroup_struct: Struct;

@compute
@workgroup_size(2)
fn cs_main(@builtin(local_invocation_id) id: vec3<u64>) {
atomicStore(&storage_atomic_scalar, 1lu);
atomicStore(&storage_atomic_arr[1], 1li);
atomicStore(&storage_struct.atomic_scalar, 1lu);
atomicStore(&storage_struct.atomic_arr[1], 1li);
atomicStore(&workgroup_atomic_scalar, 1lu);
atomicStore(&workgroup_atomic_arr[1], 1li);
atomicStore(&workgroup_struct.atomic_scalar, 1lu);
atomicStore(&workgroup_struct.atomic_arr[1], 1li);

workgroupBarrier();

let l0 = atomicLoad(&storage_atomic_scalar);
let l1 = atomicLoad(&storage_atomic_arr[1]);
let l2 = atomicLoad(&storage_struct.atomic_scalar);
let l3 = atomicLoad(&storage_struct.atomic_arr[1]);
let l4 = atomicLoad(&workgroup_atomic_scalar);
let l5 = atomicLoad(&workgroup_atomic_arr[1]);
let l6 = atomicLoad(&workgroup_struct.atomic_scalar);
let l7 = atomicLoad(&workgroup_struct.atomic_arr[1]);

workgroupBarrier();

atomicAdd(&storage_atomic_scalar, 1lu);
atomicAdd(&storage_atomic_arr[1], 1li);
atomicAdd(&storage_struct.atomic_scalar, 1lu);
atomicAdd(&storage_struct.atomic_arr[1], 1li);
atomicAdd(&workgroup_atomic_scalar, 1lu);
atomicAdd(&workgroup_atomic_arr[1], 1li);
atomicAdd(&workgroup_struct.atomic_scalar, 1lu);
atomicAdd(&workgroup_struct.atomic_arr[1], 1li);

workgroupBarrier();

atomicSub(&storage_atomic_scalar, 1lu);
atomicSub(&storage_atomic_arr[1], 1li);
atomicSub(&storage_struct.atomic_scalar, 1lu);
atomicSub(&storage_struct.atomic_arr[1], 1li);
atomicSub(&workgroup_atomic_scalar, 1lu);
atomicSub(&workgroup_atomic_arr[1], 1li);
atomicSub(&workgroup_struct.atomic_scalar, 1lu);
atomicSub(&workgroup_struct.atomic_arr[1], 1li);

workgroupBarrier();

atomicMax(&storage_atomic_scalar, 1lu);
atomicMax(&storage_atomic_arr[1], 1li);
atomicMax(&storage_struct.atomic_scalar, 1lu);
atomicMax(&storage_struct.atomic_arr[1], 1li);
atomicMax(&workgroup_atomic_scalar, 1lu);
atomicMax(&workgroup_atomic_arr[1], 1li);
atomicMax(&workgroup_struct.atomic_scalar, 1lu);
atomicMax(&workgroup_struct.atomic_arr[1], 1li);

workgroupBarrier();

atomicMin(&storage_atomic_scalar, 1lu);
atomicMin(&storage_atomic_arr[1], 1li);
atomicMin(&storage_struct.atomic_scalar, 1lu);
atomicMin(&storage_struct.atomic_arr[1], 1li);
atomicMin(&workgroup_atomic_scalar, 1lu);
atomicMin(&workgroup_atomic_arr[1], 1li);
atomicMin(&workgroup_struct.atomic_scalar, 1lu);
atomicMin(&workgroup_struct.atomic_arr[1], 1li);

workgroupBarrier();

atomicAnd(&storage_atomic_scalar, 1lu);
atomicAnd(&storage_atomic_arr[1], 1li);
atomicAnd(&storage_struct.atomic_scalar, 1lu);
atomicAnd(&storage_struct.atomic_arr[1], 1li);
atomicAnd(&workgroup_atomic_scalar, 1lu);
atomicAnd(&workgroup_atomic_arr[1], 1li);
atomicAnd(&workgroup_struct.atomic_scalar, 1lu);
atomicAnd(&workgroup_struct.atomic_arr[1], 1li);

workgroupBarrier();

atomicOr(&storage_atomic_scalar, 1lu);
atomicOr(&storage_atomic_arr[1], 1li);
atomicOr(&storage_struct.atomic_scalar, 1lu);
atomicOr(&storage_struct.atomic_arr[1], 1li);
atomicOr(&workgroup_atomic_scalar, 1lu);
atomicOr(&workgroup_atomic_arr[1], 1li);
atomicOr(&workgroup_struct.atomic_scalar, 1lu);
atomicOr(&workgroup_struct.atomic_arr[1], 1li);

workgroupBarrier();

atomicXor(&storage_atomic_scalar, 1lu);
atomicXor(&storage_atomic_arr[1], 1li);
atomicXor(&storage_struct.atomic_scalar, 1lu);
atomicXor(&storage_struct.atomic_arr[1], 1li);
atomicXor(&workgroup_atomic_scalar, 1lu);
atomicXor(&workgroup_atomic_arr[1], 1li);
atomicXor(&workgroup_struct.atomic_scalar, 1lu);
atomicXor(&workgroup_struct.atomic_arr[1], 1li);

atomicExchange(&storage_atomic_scalar, 1lu);
atomicExchange(&storage_atomic_arr[1], 1li);
atomicExchange(&storage_struct.atomic_scalar, 1lu);
atomicExchange(&storage_struct.atomic_arr[1], 1li);
atomicExchange(&workgroup_atomic_scalar, 1lu);
atomicExchange(&workgroup_atomic_arr[1], 1li);
atomicExchange(&workgroup_struct.atomic_scalar, 1lu);
atomicExchange(&workgroup_struct.atomic_arr[1], 1li);

// // TODO: https://github.com/gpuweb/gpuweb/issues/2021
// atomicCompareExchangeWeak(&storage_atomic_scalar, 1lu);
// atomicCompareExchangeWeak(&storage_atomic_arr[1], 1li);
// atomicCompareExchangeWeak(&storage_struct.atomic_scalar, 1lu);
// atomicCompareExchangeWeak(&storage_struct.atomic_arr[1], 1li);
// atomicCompareExchangeWeak(&workgroup_atomic_scalar, 1lu);
// atomicCompareExchangeWeak(&workgroup_atomic_arr[1], 1li);
// atomicCompareExchangeWeak(&workgroup_struct.atomic_scalar, 1lu);
// atomicCompareExchangeWeak(&workgroup_struct.atomic_arr[1], 1li);
}
1 change: 1 addition & 0 deletions naga/tests/in/int64.param.ron
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
god_mode: true,
spv: (
version: (1, 0),
capabilities: [ Int64 ],
),
hlsl: (
shader_model: V6_0,
Expand Down
4 changes: 4 additions & 0 deletions wgpu-core/src/device/resource.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1515,6 +1515,10 @@ impl<A: HalApi> Device<A> {
Caps::SHADER_INT64,
self.features.contains(wgt::Features::SHADER_INT64),
);
caps.set(
Caps::SHADER_INT64_ATOMIC,
self.features.contains(wgt::Features::SHADER_INT64_ATOMIC),
);
caps.set(
Caps::MULTISAMPLED_SHADING,
self.downlevel
Expand Down
17 changes: 17 additions & 0 deletions wgpu-hal/src/dx12/adapter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,23 @@ impl super::Adapter {
};
features.set(wgt::Features::SHADER_INT64, int64_shader_ops_supported);

let atomic_int64_on_typed_resource_supported = {
let mut features9: crate::dx12::types::D3D12_FEATURE_DATA_D3D12_OPTIONS9 =
unsafe { mem::zeroed() };
let hr = unsafe {
device.CheckFeatureSupport(
37, // D3D12_FEATURE_D3D12_OPTIONS9
&mut features9 as *mut _ as *mut _,
mem::size_of::<crate::dx12::types::D3D12_FEATURE_DATA_D3D12_OPTIONS9>() as _,
)
};
hr == 0 && features9.AtomicInt64OnTypedResourceSupported != 0
};
features.set(
wgt::Features::SHADER_INT64_ATOMIC,
atomic_int64_on_typed_resource_supported,
);

// float32-filterable should always be available on d3d12
features.set(wgt::Features::FLOAT32_FILTERABLE, true);

Expand Down
18 changes: 18 additions & 0 deletions wgpu-hal/src/dx12/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,13 @@ winapi::ENUM! {
}
}

winapi::ENUM! {
enum D3D12_WAVE_MMA_TIER {
D3D12_WAVE_MMA_TIER_NOT_SUPPORTED = 0,
D3D12_WAVE_MMA_TIER_1_0 = 10,
}
}

winapi::STRUCT! {
struct D3D12_FEATURE_DATA_D3D12_OPTIONS3 {
CopyQueueTimestampQueriesSupported: winapi::shared::minwindef::BOOL,
Expand All @@ -41,3 +48,14 @@ winapi::STRUCT! {
BarycentricsSupported: winapi::shared::minwindef::BOOL,
}
}

winapi::STRUCT! {
struct D3D12_FEATURE_DATA_D3D12_OPTIONS9 {
MeshShaderPipelineStatsSupported: winapi::shared::minwindef::BOOL,
MeshShaderSupportsFullRangeRenderTargetArrayIndex: winapi::shared::minwindef::BOOL,
AtomicInt64OnTypedResourceSupported: winapi::shared::minwindef::BOOL,
AtomicInt64OnGroupSharedSupported: winapi::shared::minwindef::BOOL,
DerivativesInMeshAndAmplificationShadersSupported: winapi::shared::minwindef::BOOL,
WaveMMATier: D3D12_WAVE_MMA_TIER,
}
}
4 changes: 4 additions & 0 deletions wgpu-hal/src/metal/adapter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -882,6 +882,10 @@ impl super::PrivateCapabilities {
F::SHADER_INT64,
self.msl_version >= MTLLanguageVersion::V2_3,
);
features.set(
F::SHADER_INT64_ATOMIC,
self.msl_version >= MTLLanguageVersion::V3_1,
);

features.set(
F::ADDRESS_MODE_CLAMP_TO_BORDER,
Expand Down
Loading

0 comments on commit 88e1094

Please sign in to comment.