From 351995340ccb3dadaebc7fb28cdcac5aa6b58406 Mon Sep 17 00:00:00 2001 From: Martijn Courteaux Date: Wed, 15 Jan 2025 23:23:49 +0100 Subject: [PATCH] Clang-format and improved shuffle-test. --- src/CodeGen_GPU_Dev.cpp | 50 ++++++++++++++++++------------------ test/correctness/shuffle.cpp | 47 ++++++++++++++++++--------------- 2 files changed, 52 insertions(+), 45 deletions(-) diff --git a/src/CodeGen_GPU_Dev.cpp b/src/CodeGen_GPU_Dev.cpp index 5a80cc7bd7b6..ab27e1c41c57 100644 --- a/src/CodeGen_GPU_Dev.cpp +++ b/src/CodeGen_GPU_Dev.cpp @@ -168,15 +168,15 @@ void CodeGen_GPU_C::visit(const Shuffle *op) { std::ostringstream rhs; std::string storage_name = unique_name('_'); switch (vector_declaration_style) { - case VectorDeclarationStyle::OpenCLSyntax: - rhs << "(" << print_type(op->type) << ")("; - break; - case VectorDeclarationStyle::WGSLSyntax: - rhs << print_type(op->type) << "("; - break; - case VectorDeclarationStyle::CLikeSyntax: - rhs << "{"; - break; + case VectorDeclarationStyle::OpenCLSyntax: + rhs << "(" << print_type(op->type) << ")("; + break; + case VectorDeclarationStyle::WGSLSyntax: + rhs << print_type(op->type) << "("; + break; + case VectorDeclarationStyle::CLikeSyntax: + rhs << "{"; + break; } int elem_num = 0; for (int i : op->indices) { @@ -192,13 +192,13 @@ void CodeGen_GPU_C::visit(const Shuffle *op) { rhs << vecs[vector_idx]; if (op->vectors[vector_idx].type().lanes() > 1) { switch (vector_declaration_style) { - case VectorDeclarationStyle::OpenCLSyntax: - rhs << ".s" << lane_idx; - break; - case VectorDeclarationStyle::WGSLSyntax: - case VectorDeclarationStyle::CLikeSyntax: - rhs << "[" << lane_idx << "]"; - break; + case VectorDeclarationStyle::OpenCLSyntax: + rhs << ".s" << lane_idx; + break; + case VectorDeclarationStyle::WGSLSyntax: + case VectorDeclarationStyle::CLikeSyntax: + rhs << "[" << lane_idx << "]"; + break; } } if (elem_num < (int)(op->indices.size() - 1)) { @@ -207,15 +207,15 @@ void CodeGen_GPU_C::visit(const Shuffle *op) { elem_num++; } switch (vector_declaration_style) { - case VectorDeclarationStyle::OpenCLSyntax: - rhs << ")"; - break; - case VectorDeclarationStyle::WGSLSyntax: - rhs << ")"; - break; - case VectorDeclarationStyle::CLikeSyntax: - rhs << "}"; - break; + case VectorDeclarationStyle::OpenCLSyntax: + rhs << ")"; + break; + case VectorDeclarationStyle::WGSLSyntax: + rhs << ")"; + break; + case VectorDeclarationStyle::CLikeSyntax: + rhs << "}"; + break; } print_assignment(op->type, rhs.str()); } diff --git a/test/correctness/shuffle.cpp b/test/correctness/shuffle.cpp index 81eeb5a299b2..6381ddacb929 100644 --- a/test/correctness/shuffle.cpp +++ b/test/correctness/shuffle.cpp @@ -6,8 +6,8 @@ using namespace Halide; int main(int argc, char **argv) { Target target = get_jit_target_from_environment(); if (target.has_feature(Target::Feature::Vulkan)) { - std::printf("[SKIP] Vulkan seems to be not working.\n"); - return 0; + std::printf("[SKIP] Vulkan seems to be not working.\n"); + return 0; } Var x{"x"}, y{"y"}; @@ -15,10 +15,13 @@ int main(int argc, char **argv) { Func f0{"f0"}, f1{"f1"}, g{"g"}; f0(x, y) = x * (y + 1); f1(x, y) = x * (y + 3); - Expr vec1 = Internal::Shuffle::make_concat({f0(x, 3), f0(x, 1), f0(x, 2), f0(x, 0)}); - Expr vec2 = Internal::Shuffle::make_concat({f1(x, 4), f1(x, 7), f1(x, 5), f1(x, 6)}); - Expr shuffle = Halide::Internal::Shuffle::make({vec1, vec2}, {3, 1, 6, 7, 2, 4, 0, 5}); - Expr result = shuffle * shuffle; + Expr vec1 = Internal::Shuffle::make_concat({f0(x, 0), f0(x, 1), f0(x, 2), f0(x, 3)}); + Expr vec2 = Internal::Shuffle::make_concat({f1(x, 4), f1(x, 5), f1(x, 6), f1(x, 7)}); + std::vector indices0 = {3, 1, 6, 7, 2, 4, 0, 5}; + std::vector indices1 = {1, 0, 3, 4, 7, 0, 5, 2}; + Expr shuffle1 = Internal::Shuffle::make({vec1, vec2}, indices0); + Expr shuffle2 = Internal::Shuffle::make({vec1, vec2}, indices1); + Expr result = shuffle1 * shuffle2; // Manual logarithmic reduce. Expr a_half1 = Halide::Internal::Shuffle::make_slice(result, 0, 1, 4); @@ -27,28 +30,32 @@ int main(int argc, char **argv) { Expr b_half1 = Halide::Internal::Shuffle::make_slice(a_sumhalves, 0, 1, 2); Expr b_half2 = Halide::Internal::Shuffle::make_slice(a_sumhalves, 2, 1, 2); Expr b_sumhalves = b_half1 + b_half2; - g(x) = Internal::Shuffle::make_extract_element(b_sumhalves, 0) + Halide::Internal::Shuffle::make_extract_element(b_sumhalves, 1); + g(x) = Internal::Shuffle::make_extract_element(b_sumhalves, 0) + + Internal::Shuffle::make_extract_element(b_sumhalves, 1); f0.compute_root(); f1.compute_root(); if (target.has_gpu_feature()) { - Var xo,xi; + Var xo, xi; g.gpu_tile(x, xo, xi, 8).never_partition_all(); } Buffer im = g.realize({32}, target); - for (int i = 0; i < 32; i++) { - int fv0[4] = {i * 1, i * 2, i * 3, i * 4}; - int fv1[4] = {i * 7, i * 8, i * 9, i * 10}; - int exp = 0; - for (int i = 0; i < 4; ++i) { - exp += fv0[i] * fv0[i]; - exp += fv1[i] * fv1[i]; - } - if (im(i) != exp) { - printf("im[%d] = %d (expected %d)\n", i, im(i), exp); - return 1; - } + im.copy_to_host(); + for (int x = 0; x < 32; x++) { + int fv0[8], fv1[8]; + for (int i = 0; i < 8; ++i) { + fv0[i] = x * (indices0[i] + (indices0[i] >= 4 ? 3 : 1)); + fv1[i] = x * (indices1[i] + (indices1[i] >= 4 ? 3 : 1)); + } + int exp = 0; + for (int i = 0; i < 8; ++i) { + exp += fv0[i] * fv1[i]; + } + if (im(x) != exp) { + printf("im[%d] = %d (expected %d)\n", x, im(x), exp); + return 1; + } } printf("Success!\n"); return 0;