diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 0c894e46862..c162cb04111 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -18,6 +18,7 @@ jobs: if: needs.check_changes.outputs.has_non_docs_changes == 'true' steps: - uses: actions/checkout@v4 + # If we add ccache back, don't forget to update cleanup-cache.yml #- name: Set Up Cache # uses: actions/cache@v3 # with: @@ -66,6 +67,7 @@ jobs: if: needs.check_changes.outputs.has_non_docs_changes == 'true' steps: - uses: actions/checkout@v4 + # If we add ccache back, don't forget to update cleanup-cache.yml #- name: Set Up Cache # uses: actions/cache@v3 # with: @@ -144,4 +146,32 @@ jobs: set "PATH=%PATH%;D:\\a\amrex\amrex\installdir\bin" cmake --build build --config Release --target test_install - # If we add ccache back, don't forget to update cleanup-cache.yml + tests_cuda: + name: CUDA on Windows + runs-on: windows-latest + needs: check_changes + if: needs.check_changes.outputs.has_non_docs_changes == 'true' + steps: + - uses: Jimver/cuda-toolkit@v0.2.19 + id: cuda-toolkit + with: + cuda: '12.6.1' + use-github-cache: 'false' + - uses: actions/checkout@v4 + - name: Compile + run: | + cmake -S . -B build ` + -DCUDAToolkit_ROOT="$Env:CUDA_PATH" ` + -DCMAKE_VERBOSE_MAKEFILE=ON ` + -DCMAKE_BUILD_TYPE=Release ` + -DAMReX_GPU_BACKEND=CUDA ` + -DAMReX_CUDA_ARCH="8.0" ` + -DAMReX_ENABLE_TESTS=ON ` + -DAMReX_EB=ON ` + -DAMReX_FFT=ON ` + -DAMReX_LINEAR_SOLVERS=ON ` + -DAMReX_PARTICLES=ON ` + -DAMReX_FORTRAN=OFF ` + -DAMReX_MPI=OFF + cmake --build build --config Release -j 4 + cmake --build build --config Release --target install diff --git a/Src/Base/AMReX_BoxList.cpp b/Src/Base/AMReX_BoxList.cpp index 4459b24dba9..b5fbd58a556 100644 --- a/Src/Base/AMReX_BoxList.cpp +++ b/Src/Base/AMReX_BoxList.cpp @@ -188,7 +188,7 @@ BoxList::BoxList(const Box& bx, const IntVect& tilesize) ntiles *= nt[d]; } - IntVect small, big, ijk; // note that the initial values are all zero. + IntVect sml, big, ijk; // note that the initial values are all zero. ijk[0] = -1; for (int t=0; t::value,int> = 0> - void prefetchToHost (const MFIter& mfi) const noexcept; + void prefetchToHost (const MFIter& mfi) const noexcept + { +#ifdef AMREX_USE_CUDA + this->fabPtr(mfi)->prefetchToHost(); +#else + amrex::ignore_unused(mfi); +#endif + } template ::value,int> = 0> - void prefetchToDevice (const MFIter& mfi) const noexcept; + void prefetchToDevice (const MFIter& mfi) const noexcept + { +#ifdef AMREX_USE_CUDA + this->fabPtr(mfi)->prefetchToDevice(); +#else + amrex::ignore_unused(mfi); +#endif + } template ::value,int> = 0> - Array4::value_type const> array (const MFIter& mfi) const noexcept; - // + Array4::value_type const> array (const MFIter& mfi) const noexcept + { + return fabPtr(mfi)->const_array(); + } + template ::value,int> = 0> - Array4::value_type> array (const MFIter& mfi) noexcept; - // + Array4::value_type> array (const MFIter& mfi) noexcept + { + return fabPtr(mfi)->array(); + } + template ::value,int> = 0> - Array4::value_type const> array (int K) const noexcept; - // + Array4::value_type const> array (int K) const noexcept + { + return fabPtr(K)->const_array(); + } + template ::value,int> = 0> - Array4::value_type> array (int K) noexcept; + Array4::value_type> array (int K) noexcept + { + return fabPtr(K)->array(); + } template ::value,int> = 0> - Array4::value_type const> const_array (const MFIter& mfi) const noexcept; - // + Array4::value_type const> const_array (const MFIter& mfi) const noexcept + { + return fabPtr(mfi)->const_array(); + } + template ::value,int> = 0> - Array4::value_type const> const_array (int K) const noexcept; + Array4::value_type const> const_array (int K) const noexcept + { + return fabPtr(K)->const_array(); + } template ::value,int> = 0> - Array4::value_type const> array (const MFIter& mfi, int start_comp) const noexcept; - // + Array4::value_type const> array (const MFIter& mfi, int start_comp) const noexcept + { + return fabPtr(mfi)->const_array(start_comp); + } + template ::value,int> = 0> - Array4::value_type> array (const MFIter& mfi, int start_comp) noexcept; - // + Array4::value_type> array (const MFIter& mfi, int start_comp) noexcept + { + return fabPtr(mfi)->array(start_comp); + } + template ::value,int> = 0> - Array4::value_type const> array (int K, int start_comp) const noexcept; - // + Array4::value_type const> array (int K, int start_comp) const noexcept + { + return fabPtr(K)->const_array(start_comp); + } + template ::value,int> = 0> - Array4::value_type> array (int K, int start_comp) noexcept; + Array4::value_type> array (int K, int start_comp) noexcept + { + return fabPtr(K)->array(start_comp); + } template ::value,int> = 0> - Array4::value_type const> const_array (const MFIter& mfi, int start_comp) const noexcept; - // + Array4::value_type const> const_array (const MFIter& mfi, int start_comp) const noexcept + { + return fabPtr(mfi)->const_array(start_comp); + } + template ::value,int> = 0> - Array4::value_type const> const_array (int K, int start_comp) const noexcept; + Array4::value_type const> const_array (int K, int start_comp) const noexcept + { + return fabPtr(K)->const_array(start_comp); + } template ::value,int> = 0> - MultiArray4::value_type> arrays () noexcept; + MultiArray4::value_type> arrays () noexcept + { + build_arrays(); + return m_arrays; + } template ::value,int> = 0> - MultiArray4::value_type const> arrays () const noexcept; + MultiArray4::value_type const> arrays () const noexcept + { + build_arrays(); + return m_const_arrays; + } template ::value,int> = 0> - MultiArray4::value_type const> const_arrays () const noexcept; + MultiArray4::value_type const> const_arrays () const noexcept + { + build_arrays(); + return m_const_arrays; + } //! Explicitly set the Kth FAB in the FabArray to point to elem. void setFab (int boxno, std::unique_ptr elem); @@ -1531,153 +1593,6 @@ FabArray::fabPtr (int K) const noexcept return m_fabs_v[li]; } -template -template ::value,int>> -void -FabArray::prefetchToHost (const MFIter& mfi) const noexcept -{ -#ifdef AMREX_USE_CUDA - this->fabPtr(mfi)->prefetchToHost(); -#else - amrex::ignore_unused(mfi); -#endif -} - -template -template ::value,int>> -void -FabArray::prefetchToDevice (const MFIter& mfi) const noexcept -{ -#ifdef AMREX_USE_CUDA - this->fabPtr(mfi)->prefetchToDevice(); -#else - amrex::ignore_unused(mfi); -#endif -} - -template -template ::value,int>> -Array4::value_type const> -FabArray::array (const MFIter& mfi) const noexcept -{ - return fabPtr(mfi)->const_array(); -} - -template -template ::value,int>> -Array4::value_type> -FabArray::array (const MFIter& mfi) noexcept -{ - return fabPtr(mfi)->array(); -} - -template -template ::value,int>> -Array4::value_type const> -FabArray::array (int K) const noexcept -{ - return fabPtr(K)->const_array(); -} - -template -template ::value,int>> -Array4::value_type> -FabArray::array (int K) noexcept -{ - return fabPtr(K)->array(); -} - -template -template ::value,int>> -Array4::value_type const> -FabArray::const_array (const MFIter& mfi) const noexcept -{ - return fabPtr(mfi)->const_array(); -} - -template -template ::value,int>> -Array4::value_type const> -FabArray::const_array (int K) const noexcept -{ - return fabPtr(K)->const_array(); -} - -template -template ::value,int>> -Array4::value_type const> -FabArray::array (const MFIter& mfi, int start_comp) const noexcept -{ - return fabPtr(mfi)->const_array(start_comp); -} - -template -template ::value,int>> -Array4::value_type> -FabArray::array (const MFIter& mfi, int start_comp) noexcept -{ - return fabPtr(mfi)->array(start_comp); -} - -template -template ::value,int>> -Array4::value_type const> -FabArray::array (int K, int start_comp) const noexcept -{ - return fabPtr(K)->const_array(start_comp); -} - -template -template ::value,int>> -Array4::value_type> -FabArray::array (int K, int start_comp) noexcept -{ - return fabPtr(K)->array(start_comp); -} - -template -template ::value,int>> -Array4::value_type const> -FabArray::const_array (const MFIter& mfi, int start_comp) const noexcept -{ - return fabPtr(mfi)->const_array(start_comp); -} - -template -template ::value,int>> -Array4::value_type const> -FabArray::const_array (int K, int start_comp) const noexcept -{ - return fabPtr(K)->const_array(start_comp); -} - -template -template ::value,int>> -MultiArray4::value_type> -FabArray::arrays () noexcept -{ - build_arrays(); - return m_arrays; -} - -template -template ::value,int>> -MultiArray4::value_type const> -FabArray::arrays () const noexcept -{ - build_arrays(); - return m_const_arrays; -} - -template -template ::value,int>> -MultiArray4::value_type const> -FabArray::const_arrays () const noexcept -{ - build_arrays(); - return m_const_arrays; -} - template template ::value,int>> void diff --git a/Src/Base/AMReX_FabArrayBase.cpp b/Src/Base/AMReX_FabArrayBase.cpp index 9acf440d98f..5120e23be3f 100644 --- a/Src/Base/AMReX_FabArrayBase.cpp +++ b/Src/Base/AMReX_FabArrayBase.cpp @@ -2337,7 +2337,7 @@ FabArrayBase::buildTileArray (const IntVect& tileSize, TileArray& ta) const ntiles *= nt_in_fab[d]; } - IntVect small, big, ijk; // note that the initial values are all zero. + IntVect sml, big, ijk; // note that the initial values are all zero. ijk[0] = -1; for (int t = 0; t < ntiles; ++t) { ta.indexMap.push_back(K); @@ -2356,15 +2356,15 @@ FabArrayBase::buildTileArray (const IntVect& tileSize, TileArray& ta) const for (int d=0; d; \ @@ -465,7 +465,7 @@ } \ } #else -#define AMREX_GPU_HOST_DEVICE_FOR_1D(n,i,block) \ +#define AMREX_HOST_DEVICE_FOR_1D(n,i,block) \ { \ auto const& amrex_i_n = n; \ using amrex_i_inttype = std::remove_const_t; \ @@ -479,7 +479,39 @@ } #endif -#define AMREX_GPU_DEVICE_FOR_1D(n,i,block) \ +#ifdef AMREX_USE_SYCL +#define AMREX_HOST_DEVICE_PARALLEL_FOR_1D(n,i,block) \ +{ \ + auto const& amrex_i_n = n; \ + using amrex_i_inttype = std::remove_const_t; \ + if (amrex::Gpu::inLaunchRegion()) { \ + amrex::ParallelFor(amrex_i_n,[=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept block); \ + } else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + } \ +} +#else +#define AMREX_HOST_DEVICE_PARALLEL_FOR_1D(n,i,block) \ +{ \ + auto const& amrex_i_n = n; \ + using amrex_i_inttype = std::remove_const_t; \ + if (amrex::Gpu::inLaunchRegion()) { \ + amrex::ParallelFor(amrex_i_n,[=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept block); \ + } else { \ + auto amrex_i_lambda = [=] (amrex_i_inttype i) noexcept block; \ + AMREX_PRAGMA_SIMD \ + for (amrex_i_inttype i = 0; i < amrex_i_n; ++i) amrex_i_lambda(i); \ + } \ +} +#endif + +#define AMREX_FOR_1D(n,i,block) \ +{ \ + using amrex_i_inttype = std::remove_const_t; \ + amrex::ParallelFor(n,[=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept block); \ +} + +#define AMREX_PARALLEL_FOR_1D(n,i,block) \ { \ using amrex_i_inttype = std::remove_const_t; \ amrex::ParallelFor(n,[=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept block); \ @@ -488,7 +520,7 @@ // FOR_3D #ifdef AMREX_USE_SYCL -#define AMREX_GPU_HOST_DEVICE_FOR_3D(box,i,j,k,block) \ +#define AMREX_HOST_DEVICE_FOR_3D(box,i,j,k,block) \ { \ auto const& amrex_i_box = box; \ if (amrex::Gpu::inLaunchRegion()) { \ @@ -498,7 +530,7 @@ } \ } #else -#define AMREX_GPU_HOST_DEVICE_FOR_3D(box,i,j,k,block) \ +#define AMREX_HOST_DEVICE_FOR_3D(box,i,j,k,block) \ { \ auto const& amrex_i_box = box; \ if (amrex::Gpu::inLaunchRegion()) { \ @@ -509,7 +541,34 @@ } #endif -#define AMREX_GPU_DEVICE_FOR_3D(box,i,j,k,block) \ +#ifdef AMREX_USE_SYCL +#define AMREX_HOST_DEVICE_PARALLEL_FOR_3D(box,i,j,k,block) \ +{ \ + auto const& amrex_i_box = box; \ + if (amrex::Gpu::inLaunchRegion()) { \ + amrex::ParallelFor(amrex_i_box,[=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept block); \ + } else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + } \ +} +#else +#define AMREX_HOST_DEVICE_PARALLEL_FOR_3D(box,i,j,k,block) \ +{ \ + auto const& amrex_i_box = box; \ + if (amrex::Gpu::inLaunchRegion()) { \ + amrex::ParallelFor(amrex_i_box,[=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept block); \ + } else { \ + amrex::LoopConcurrentOnCpu(amrex_i_box,[=] (int i, int j, int k) noexcept block); \ + } \ +} +#endif + +#define AMREX_FOR_3D(box,i,j,k,block) \ +{ \ + amrex::ParallelFor(box,[=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept block); \ +} + +#define AMREX_PARALLEL_FOR_3D(box,i,j,k,block) \ { \ amrex::ParallelFor(box,[=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept block); \ } @@ -517,7 +576,7 @@ // FOR_4D #ifdef AMREX_USE_SYCL -#define AMREX_GPU_HOST_DEVICE_FOR_4D(box,ncomp,i,j,k,n,block) \ +#define AMREX_HOST_DEVICE_FOR_4D(box,ncomp,i,j,k,n,block) \ { \ auto const& amrex_i_box = box; \ auto const& amrex_i_ncomp = ncomp; \ @@ -528,7 +587,7 @@ } \ } #else -#define AMREX_GPU_HOST_DEVICE_FOR_4D(box,ncomp,i,j,k,n,block) \ +#define AMREX_HOST_DEVICE_FOR_4D(box,ncomp,i,j,k,n,block) \ { \ auto const& amrex_i_box = box; \ auto const& amrex_i_ncomp = ncomp; \ @@ -540,15 +599,36 @@ } #endif -#define AMREX_GPU_DEVICE_FOR_4D(box,ncomp,i,j,k,n,block) \ +#ifdef AMREX_USE_SYCL +#define AMREX_HOST_DEVICE_PARALLEL_FOR_4D(box,ncomp,i,j,k,n,block) \ { \ - amrex::ParallelFor(box,ncomp,[=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept block); \ + auto const& amrex_i_box = box; \ + auto const& amrex_i_ncomp = ncomp; \ + if (amrex::Gpu::inLaunchRegion()) { \ + amrex::ParallelFor(amrex_i_box,amrex_i_ncomp,[=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept block); \ + } else { \ + amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \ + } \ } +#else +#define AMREX_HOST_DEVICE_PARALLEL_FOR_4D(box,ncomp,i,j,k,n,block) \ +{ \ + auto const& amrex_i_box = box; \ + auto const& amrex_i_ncomp = ncomp; \ + if (amrex::Gpu::inLaunchRegion()) { \ + amrex::ParallelFor(amrex_i_box,amrex_i_ncomp,[=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept block); \ + } else { \ + amrex::LoopConcurrentOnCpu(amrex_i_box,amrex_i_ncomp,[=] (int i, int j, int k, int n) noexcept block); \ + } \ +} +#endif -#define AMREX_GPU_DEVICE_PARALLEL_FOR_1D(...) AMREX_GPU_DEVICE_FOR_1D(__VA_ARGS__) -#define AMREX_GPU_DEVICE_PARALLEL_FOR_3D(...) AMREX_GPU_DEVICE_FOR_3D(__VA_ARGS__) -#define AMREX_GPU_DEVICE_PARALLEL_FOR_4D(...) AMREX_GPU_DEVICE_FOR_4D(__VA_ARGS__) +#define AMREX_FOR_4D(box,ncomp,i,j,k,n,block) \ +{ \ + amrex::ParallelFor(box,ncomp,[=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept block); \ +} -#define AMREX_GPU_HOST_DEVICE_PARALLEL_FOR_1D(...) AMREX_GPU_HOST_DEVICE_FOR_1D(__VA_ARGS__) -#define AMREX_GPU_HOST_DEVICE_PARALLEL_FOR_3D(...) AMREX_GPU_HOST_DEVICE_FOR_3D(__VA_ARGS__) -#define AMREX_GPU_HOST_DEVICE_PARALLEL_FOR_4D(...) AMREX_GPU_HOST_DEVICE_FOR_4D(__VA_ARGS__) +#define AMREX_PARALLEL_FOR_4D(box,ncomp,i,j,k,n,block) \ +{ \ + amrex::ParallelFor(box,ncomp,[=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept block); \ +} diff --git a/Src/Base/AMReX_GpuUtility.H b/Src/Base/AMReX_GpuUtility.H index 4adc111f5e2..fa988e52521 100644 --- a/Src/Base/AMReX_GpuUtility.H +++ b/Src/Base/AMReX_GpuUtility.H @@ -149,7 +149,10 @@ namespace Gpu { AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE bool isnan (T m) noexcept { -#if defined(AMREX_USE_SYCL) +#if defined(_WIN32) + AMREX_IF_ON_DEVICE((return m != m;)) + AMREX_IF_ON_HOST((return std::isnan(m);)) +#elif defined(AMREX_USE_SYCL) return sycl::isnan(m); #else return std::isnan(m); @@ -160,7 +163,10 @@ namespace Gpu { AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE bool isinf (T m) noexcept { -#if defined(AMREX_USE_SYCL) +#if defined(_WIN32) + AMREX_IF_ON_DEVICE((return (2*m == m) && (m != 0);)) + AMREX_IF_ON_HOST((return std::isinf(m);)) +#elif defined(AMREX_USE_SYCL) return sycl::isinf(m); #else return std::isinf(m); diff --git a/Src/Base/AMReX_MultiFab.cpp b/Src/Base/AMReX_MultiFab.cpp index 2ba4aa7bc48..9059c1d5e68 100644 --- a/Src/Base/AMReX_MultiFab.cpp +++ b/Src/Base/AMReX_MultiFab.cpp @@ -628,7 +628,11 @@ MultiFab::is_finite (int scomp, int ncomp, const IntVect& ngrow, bool local) con r = ParReduce(TypeList{}, TypeList{}, *this, ngrow, ncomp, [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept -> GpuTuple { +#if defined(_WIN32) + return Gpu::isnan(ma[box_no](i,j,k,n+scomp)) || Gpu::isinf(ma[box_no](i,j,k,n+scomp)); +#else return !amrex::Math::isfinite(ma[box_no](i,j,k,n+scomp)); +#endif }); } else #endif diff --git a/Src/Base/AMReX_NonLocalBC.H b/Src/Base/AMReX_NonLocalBC.H index f88e1d9aca4..3da847d0bf0 100644 --- a/Src/Base/AMReX_NonLocalBC.H +++ b/Src/Base/AMReX_NonLocalBC.H @@ -602,8 +602,10 @@ UnpackRecvBuffers (const PackComponents& components, FabArray& dest, const } #endif // AMREX_USE_MPI +#ifndef _WIN32 static_assert(IsDataPacking(), // NOLINT(bugprone-throw-keyword-missing) "PackComponents is expected to satisfy the concept DataPacking."); +#endif //////////////////////////////////////////////////////////////////////////////////// // [DataPacking.ApplyDtosAndProjectionOnReciever] @@ -663,8 +665,10 @@ UnpackRecvBuffers (const ApplyDtosAndProjectionOnReciever& packin } #endif // AMREX_USE_MPI +#ifndef _WIN32 static_assert(IsDataPacking, FArrayBox>(), // NOLINT(bugprone-throw-keyword-missing) "ApplyDtosAndProjectionOnReciever<> is expected to satisfy the DataPacking concept."); +#endif //////////////////////////////////////////////////////////////////////////////////// // [ParallelCopy_nowait] diff --git a/Src/Base/AMReX_PODVector.H b/Src/Base/AMReX_PODVector.H index 464bb552ee1..e4df8562c1d 100644 --- a/Src/Base/AMReX_PODVector.H +++ b/Src/Base/AMReX_PODVector.H @@ -47,7 +47,11 @@ namespace amrex [[maybe_unused]] Allocator const& allocator) { #ifdef AMREX_USE_GPU +#ifdef _WIN32 + if (RunOnGpu>::value) +#else if constexpr (RunOnGpu>::value) +#endif { amrex::ParallelFor(count, [=] AMREX_GPU_DEVICE (Size i) noexcept { data[i] = value; @@ -55,7 +59,11 @@ namespace amrex Gpu::streamSynchronize(); return; } +#ifdef _WIN32 + else if (IsPolymorphicArenaAllocator>::value) +#else else if constexpr (IsPolymorphicArenaAllocator>::value) +#endif { if (allocator.arena()->isManaged() || allocator.arena()->isDevice()) @@ -103,7 +111,11 @@ namespace amrex [[maybe_unused]] Allocator const& allocator) { #ifdef AMREX_USE_GPU +#ifdef _WIN32 + if (RunOnGpu>::value) +#else if constexpr (RunOnGpu>::value) +#endif { amrex::ParallelFor(count, [=] AMREX_GPU_DEVICE (Size i) noexcept { dst[i] = src[i]; @@ -111,7 +123,11 @@ namespace amrex Gpu::Device::streamSynchronize(); return; } +#ifdef _WIN32 + else if (IsPolymorphicArenaAllocator>::value) +#else else if constexpr (IsPolymorphicArenaAllocator>::value) +#endif { if (allocator.arena()->isManaged() || allocator.arena()->isDevice()) diff --git a/Src/EB/AMReX_EB2_2D_C.cpp b/Src/EB/AMReX_EB2_2D_C.cpp index 231faf0cb88..2795a07d000 100644 --- a/Src/EB/AMReX_EB2_2D_C.cpp +++ b/Src/EB/AMReX_EB2_2D_C.cpp @@ -13,11 +13,11 @@ void set_eb_data (const int i, const int j, { #ifdef AMREX_USE_FLOAT constexpr Real almostone = 1.0_rt-1.e-6_rt; - constexpr Real small = 1.e-5_rt; + constexpr Real sml = 1.e-5_rt; constexpr Real tiny = 1.e-6_rt; #else constexpr Real almostone = 1.0-1.e-15; - constexpr Real small = 1.e-14; + constexpr Real sml = 1.e-14; constexpr Real tiny = 1.e-15; #endif @@ -117,11 +117,11 @@ void set_eb_data (const int i, const int j, vfrac(i,j,0) = 0.5_rt*(af1+af2)/(dx[0]*dx[1]); - if (vfrac(i,j,0) > 1.0_rt-small) { + if (vfrac(i,j,0) > 1.0_rt-sml) { vfrac(i,j,0) = 1.0_rt; vcent(i,j,0,0) = 0.0_rt; vcent(i,j,0,1) = 0.0_rt; - } else if (vfrac(i,j,0) < small) { + } else if (vfrac(i,j,0) < sml) { vfrac(i,j,0) = 0.0_rt; vcent(i,j,0,0) = 0.0_rt; vcent(i,j,0,1) = 0.0_rt; @@ -206,9 +206,9 @@ int build_faces (Box const& bx, Array4 const& cell, bool cover_multiple_cuts, int& nsmallfaces) noexcept { #ifdef AMREX_USE_FLOAT - constexpr Real small = 1.e-5_rt; + constexpr Real sml = 1.e-5_rt; #else - constexpr Real small = 1.e-14; + constexpr Real sml = 1.e-14; #endif const Real dxinv = 1.0_rt/dx[0]; const Real dyinv = 1.0_rt/dx[1]; @@ -236,11 +236,11 @@ int build_faces (Box const& bx, Array4 const& cell, fcx(i,j,0) = 0.5_rt - 0.5_rt*apx(i,j,0); } - if (apx(i,j,0) > 1.0_rt-small) { + if (apx(i,j,0) > 1.0_rt-sml) { apx(i,j,0) = 1.0_rt; fcx(i,j,0) = 0.0_rt; fx(i,j,0) = Type::regular; - } else if (apx(i,j,0) < small) { + } else if (apx(i,j,0) < sml) { apx(i,j,0) = 0.0_rt; fcx(i,j,0) = 0.0_rt; fx(i,j,0) = Type::covered; @@ -269,11 +269,11 @@ int build_faces (Box const& bx, Array4 const& cell, fcy(i,j,0) = 0.5_rt - 0.5_rt*apy(i,j,0); } - if (apy(i,j,0) > 1.0_rt-small) { + if (apy(i,j,0) > 1.0_rt-sml) { apy(i,j,0) = 1.0_rt; fcy(i,j,0) = 0.0_rt; fy(i,j,0) = Type::regular; - } else if (apy(i,j,0) < small) { + } else if (apy(i,j,0) < sml) { apy(i,j,0) = 0.0_rt; fcy(i,j,0) = 0.0_rt; fy(i,j,0) = Type::covered; diff --git a/Src/EB/AMReX_EB2_3D_C.cpp b/Src/EB/AMReX_EB2_3D_C.cpp index ec7d643391e..bf160f16ed6 100644 --- a/Src/EB/AMReX_EB2_3D_C.cpp +++ b/Src/EB/AMReX_EB2_3D_C.cpp @@ -199,10 +199,10 @@ void cut_face_2d (Real& areafrac, Real& centx, Real& centy, Real bcx, Real bcy) noexcept { #ifdef AMREX_USE_FLOAT - constexpr Real small = 1.e-5_rt; + constexpr Real sml = 1.e-5_rt; constexpr Real tiny = 1.e-6_rt; #else - constexpr Real small = 1.e-14; + constexpr Real sml = 1.e-14; constexpr Real tiny = 1.e-15; #endif Real apnorm = std::hypot(axm-axp,aym-ayp); @@ -214,13 +214,13 @@ void cut_face_2d (Real& areafrac, Real& centx, Real& centy, if (nxabs < tiny || nyabs > 1.0_rt-tiny) { areafrac = 0.5_rt*(axm+axp); - if (areafrac > 1.0_rt-small) { + if (areafrac > 1.0_rt-sml) { areafrac = 1.0_rt; centx = 0.0_rt; centy = 0.0_rt; Sx2 = Sy2 = 1.0_rt/12._rt; Sxy = 0.0_rt; - } else if (areafrac < small) { + } else if (areafrac < sml) { areafrac = 0.0_rt; centx = 0.0_rt; centy = 0.0_rt; @@ -236,13 +236,13 @@ void cut_face_2d (Real& areafrac, Real& centx, Real& centy, } } else if (nyabs < tiny || nxabs > 1.0_rt-tiny) { areafrac = 0.5_rt*(aym+ayp); - if (areafrac > 1.0_rt-small) { + if (areafrac > 1.0_rt-sml) { areafrac = 1.0_rt; centx = 0.0_rt; centy = 0.0_rt; Sx2 = Sy2 = 1.0_rt/12._rt; Sxy = 0.0_rt; - } else if (areafrac < small) { + } else if (areafrac < sml) { areafrac = 0.0_rt; centx = 0.0_rt; centy = 0.0_rt; @@ -289,13 +289,13 @@ void cut_face_2d (Real& areafrac, Real& centx, Real& centy, : -signx*(1.0_rt/16._rt)*dx2 + 0.5_rt*ny*S_b; areafrac = 0.5_rt*(af1+af2); - if (areafrac > 1.0_rt-small) { + if (areafrac > 1.0_rt-sml) { areafrac = 1.0_rt; centx = 0.0_rt; centy = 0.0_rt; Sx2 = Sy2 = 1.0_rt/12._rt; Sxy = 0.0_rt; - } else if (areafrac < small) { + } else if (areafrac < sml) { areafrac = 0.0_rt; centx = 0.0_rt; centy = 0.0_rt; @@ -377,9 +377,9 @@ int build_faces (Box const& bx, Array4 const& cell, int* dp = nmulticuts.data(); #ifdef AMREX_USE_FLOAT - constexpr Real small = 1.e-5_rt; + constexpr Real sml = 1.e-5_rt; #else - constexpr Real small = 1.e-14; + constexpr Real sml = 1.e-14; #endif const Real dxinv = 1.0_rt/dx[0]; const Real dyinv = 1.0_rt/dx[1]; @@ -464,7 +464,7 @@ int build_faces (Box const& bx, Array4 const& cell, Gpu::Atomic::Add(dp,1); } - if ((ncuts > 2) || (lym <= small && lyp <= small && lzm <= small && lzp <= small)) { + if ((ncuts > 2) || (lym <= sml && lyp <= sml && lzm <= sml && lzp <= sml)) { apx(i,j,k) = 0.0_rt; fcx(i,j,k,0) = 0.0_rt; fcx(i,j,k,1) = 0.0_rt; @@ -572,7 +572,7 @@ int build_faces (Box const& bx, Array4 const& cell, Gpu::Atomic::Add(dp,1); } - if ((ncuts > 2) || (lxm <= small && lxp <= small && lzm <= small && lzp <= small)) { + if ((ncuts > 2) || (lxm <= sml && lxp <= sml && lzm <= sml && lzp <= sml)) { apy(i,j,k) = 0.0_rt; fcy(i,j,k,0) = 0.0_rt; fcy(i,j,k,1) = 0.0_rt; @@ -680,7 +680,7 @@ int build_faces (Box const& bx, Array4 const& cell, Gpu::Atomic::Add(dp,1); } - if ((ncuts > 2) || (lxm <= small && lxp <= small && lym <= small && lyp <= small)) { + if ((ncuts > 2) || (lxm <= sml && lxp <= sml && lym <= sml && lyp <= sml)) { apz(i,j,k) = 0.0_rt; fcz(i,j,k,0) = 0.0_rt; fcz(i,j,k,1) = 0.0_rt; diff --git a/Src/EB/AMReX_EBFluxRegister_3D_C.H b/Src/EB/AMReX_EBFluxRegister_3D_C.H index 99d5ca53c25..dd2ace93fea 100644 --- a/Src/EB/AMReX_EBFluxRegister_3D_C.H +++ b/Src/EB/AMReX_EBFluxRegister_3D_C.H @@ -57,7 +57,7 @@ void eb_flux_reg_crseadd_va(int i, int j, int k, Array4 const& d, AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Real eb_flux_reg_cvol (int i, int j, int k, Array4 const& vfrac, - Dim3 const& ratio, Real small) noexcept + Dim3 const& ratio, Real sml) noexcept { Real cvol = Real(0.0); for (int kk = k*ratio.z; kk < (k+1)*ratio.z; ++kk) { @@ -67,7 +67,7 @@ Real eb_flux_reg_cvol (int i, int j, int k, Array4 const& vfrac, } } } - return (cvol > small) ? Real(1.0)/cvol : Real(0.0); + return (cvol > sml) ? Real(1.0)/cvol : Real(0.0); } AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE diff --git a/Src/EB/AMReX_EB_STL_utils.cpp b/Src/EB/AMReX_EB_STL_utils.cpp index e4aea5a1eb3..f8cbc4d5b50 100644 --- a/Src/EB/AMReX_EB_STL_utils.cpp +++ b/Src/EB/AMReX_EB_STL_utils.cpp @@ -544,13 +544,13 @@ STLtools::build_bvh (Triangle* begin, Triangle* end, Gpu::PinnedVector& bv #else constexpr Real eps = Real(1.e-10); #endif - Real small = eps*std::max({AMREX_D_DECL(bbox.length(0), - bbox.length(1), - bbox.length(2))}); + Real sml = eps*std::max({AMREX_D_DECL(bbox.length(0), + bbox.length(1), + bbox.length(2))}); // Make bounding box slightly bigger for robustness. for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { - bbox.setLo(idim,bbox.lo(idim)-small); - bbox.setHi(idim,bbox.hi(idim)+small); + bbox.setLo(idim,bbox.lo(idim)-sml); + bbox.setHi(idim,bbox.hi(idim)+sml); } node.ntriangles = int(ntri); // NOLINT return; diff --git a/Src/EB/AMReX_EB_Slopes_2D_K.H b/Src/EB/AMReX_EB_Slopes_2D_K.H index 9854756050e..1df63d78db7 100644 --- a/Src/EB/AMReX_EB_Slopes_2D_K.H +++ b/Src/EB/AMReX_EB_Slopes_2D_K.H @@ -666,11 +666,11 @@ amrex_calc_alpha_stencil(amrex::Real q_hat, amrex::Real q_max, using namespace amrex::literals; auto alpha_temp = 0.0_rt; - auto small = 1.0e-13_rt; + auto sml = 1.0e-13_rt; - if ((q_hat-state) > small) { + if ((q_hat-state) > sml) { alpha_temp = amrex::min(1.0_rt,(q_max-state)/(q_hat-state)); - } else if ((q_hat-state) < -small) { + } else if ((q_hat-state) < -sml) { alpha_temp = amrex::min(1.0_rt,(q_min-state)/(q_hat-state)); } else { alpha_temp = 1.0_rt; diff --git a/Src/EB/AMReX_EB_Slopes_3D_K.H b/Src/EB/AMReX_EB_Slopes_3D_K.H index 46f89f4bfd1..5fc12838edb 100644 --- a/Src/EB/AMReX_EB_Slopes_3D_K.H +++ b/Src/EB/AMReX_EB_Slopes_3D_K.H @@ -808,11 +808,11 @@ amrex_calc_alpha_stencil(amrex::Real q_hat, amrex::Real q_max, using namespace amrex::literals; auto alpha_temp = amrex::Real(0.0); - auto small = amrex::Real(1.0e-13); + auto sml = amrex::Real(1.0e-13); - if ((q_hat-state) > small) { + if ((q_hat-state) > sml) { alpha_temp = amrex::min(1.0_rt,(q_max-state)/(q_hat-state)); - } else if ((q_hat-state) < -small) { + } else if ((q_hat-state) < -sml) { alpha_temp = amrex::min(1.0_rt,(q_min-state)/(q_hat-state)); } else { alpha_temp = 1.0_rt; diff --git a/Src/EB/AMReX_EB_StateRedistSlopeLimiter_K.H b/Src/EB/AMReX_EB_StateRedistSlopeLimiter_K.H index 90ce4a07016..e0c80f159b5 100644 --- a/Src/EB/AMReX_EB_StateRedistSlopeLimiter_K.H +++ b/Src/EB/AMReX_EB_StateRedistSlopeLimiter_K.H @@ -13,12 +13,12 @@ amrex_calc_alpha_stencil(Real q_hat, Real q_max, Real q_min, Real state) noexcep constexpr Real epsilon = 1.e-12; #endif - const Real small = epsilon*amrex::max(amrex::Math::abs(q_max),amrex::Math::abs(q_min)); + const Real sml = epsilon*amrex::max(amrex::Math::abs(q_max),amrex::Math::abs(q_min)); Real alpha; - if ((q_hat-state) > small) { + if ((q_hat-state) > sml) { alpha = amrex::min(1.0_rt,(q_max-state)/(q_hat-state)); - } else if ((q_hat-state) < -small) { + } else if ((q_hat-state) < -sml) { alpha = amrex::min(1.0_rt,(q_min-state)/(q_hat-state)); } else { alpha = 1.0_rt; diff --git a/Src/LinearSolvers/AMReX_GMRES.H b/Src/LinearSolvers/AMReX_GMRES.H index 415050fc5bb..1aeedccaaf6 100644 --- a/Src/LinearSolvers/AMReX_GMRES.H +++ b/Src/LinearSolvers/AMReX_GMRES.H @@ -305,8 +305,8 @@ void GMRES::cycle (V& a_xx, int& a_status, int& a_itcount, RT& a_rnorm0) auto tt = m_linop->norm2(vv_it1); - auto const small = RT((sizeof(RT) == 8) ? 1.e-99 : 1.e-30); - bool happyend = (tt < small); + auto const sml = RT((sizeof(RT) == 8) ? 1.e-99 : 1.e-30); + bool happyend = (tt < sml); if (!happyend) { m_linop->scale(vv_it1, RT(1.0)/tt); } diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H b/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H index e46041a6f7e..d985c3d20d0 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H @@ -1648,7 +1648,11 @@ MLCellLinOpT::prepareForSolve () if (Gpu::inLaunchRegion()) { #ifdef AMREX_USE_EB if (factory && !factory->isAllRegular()) { +#if defined(AMREX_USE_CUDA) && defined(_WIN32) + if (!std::is_same()) { +#else if constexpr (!std::is_same()) { +#endif amrex::Abort("MLCellLinOp with EB only works with MultiFab"); } else { Vector> tags; @@ -2011,7 +2015,11 @@ MLCellLinOpT::normInf (int amrlev, MF const& mf, bool local) const -> RT #ifdef AMREX_USE_EB const auto *factory = dynamic_cast(this->Factory(amrlev)); if (factory && !factory->isAllRegular()) { +#if defined(AMREX_USE_CUDA) && defined(_WIN32) + if (!std::is_same()) { +#else if constexpr (!std::is_same()) { +#endif amrex::Abort("MLCellLinOpT with EB only works with MultiFab"); } else { const MultiFab& vfrac = factory->getVolFrac(); diff --git a/Src/Particle/AMReX_ParticleUtil.H b/Src/Particle/AMReX_ParticleUtil.H index aa061013d0e..b389001bc55 100644 --- a/Src/Particle/AMReX_ParticleUtil.H +++ b/Src/Particle/AMReX_ParticleUtil.H @@ -248,17 +248,17 @@ int getTileIndex (const IntVect& iv, const Box& box, const bool a_do_tiling, thi = tlo + ts_right - 1; } }; - const IntVect& small = box.smallEnd(); - const IntVect& big = box.bigEnd(); + const IntVect& sml = box.smallEnd(); + const IntVect& big = box.bigEnd(); IntVect ntiles, ivIndex, tilelo, tilehi; - AMREX_D_TERM(int iv0 = amrex::min(amrex::max(iv[0], small[0]), big[0]);, - int iv1 = amrex::min(amrex::max(iv[1], small[1]), big[1]);, - int iv2 = amrex::min(amrex::max(iv[2], small[2]), big[2]);); + AMREX_D_TERM(int iv0 = amrex::min(amrex::max(iv[0], sml[0]), big[0]);, + int iv1 = amrex::min(amrex::max(iv[1], sml[1]), big[1]);, + int iv2 = amrex::min(amrex::max(iv[2], sml[2]), big[2]);); - AMREX_D_TERM(tiling_1d(iv0, small[0], big[0], a_tile_size[0], ntiles[0], ivIndex[0], tilelo[0], tilehi[0]);, - tiling_1d(iv1, small[1], big[1], a_tile_size[1], ntiles[1], ivIndex[1], tilelo[1], tilehi[1]);, - tiling_1d(iv2, small[2], big[2], a_tile_size[2], ntiles[2], ivIndex[2], tilelo[2], tilehi[2]);); + AMREX_D_TERM(tiling_1d(iv0, sml[0], big[0], a_tile_size[0], ntiles[0], ivIndex[0], tilelo[0], tilehi[0]);, + tiling_1d(iv1, sml[1], big[1], a_tile_size[1], ntiles[1], ivIndex[1], tilelo[1], tilehi[1]);, + tiling_1d(iv2, sml[2], big[2], a_tile_size[2], ntiles[2], ivIndex[2], tilelo[2], tilehi[2]);); tbx = Box(tilelo, tilehi); @@ -280,13 +280,13 @@ int numTilesInBox (const Box& box, const bool a_do_tiling, const IntVect& a_tile ntile = amrex::max(ncells/tilesize, 1); }; - const IntVect& small = box.smallEnd(); - const IntVect& big = box.bigEnd(); + const IntVect& sml = box.smallEnd(); + const IntVect& big = box.bigEnd(); IntVect ntiles; - AMREX_D_TERM(tiling_1d(small[0], big[0], a_tile_size[0], ntiles[0]);, - tiling_1d(small[1], big[1], a_tile_size[1], ntiles[1]);, - tiling_1d(small[2], big[2], a_tile_size[2], ntiles[2]);); + AMREX_D_TERM(tiling_1d(sml[0], big[0], a_tile_size[0], ntiles[0]);, + tiling_1d(sml[1], big[1], a_tile_size[1], ntiles[1]);, + tiling_1d(sml[2], big[2], a_tile_size[2], ntiles[2]);); return AMREX_D_TERM(ntiles[0], *=ntiles[1], *=ntiles[2]); } diff --git a/Tests/DeviceGlobal/CMakeLists.txt b/Tests/DeviceGlobal/CMakeLists.txt index 990662d406b..ecc90373b13 100644 --- a/Tests/DeviceGlobal/CMakeLists.txt +++ b/Tests/DeviceGlobal/CMakeLists.txt @@ -4,6 +4,10 @@ if (( (AMReX_GPU_BACKEND STREQUAL "CUDA") OR return() endif () +if (WIN32) + return() +endif() + foreach(D IN LISTS AMReX_SPACEDIM) set(_sources main.cpp global_vars.cpp init.cpp work.cpp) set(_input_files) diff --git a/Tests/EB_CNS/Source/hydro/CNS_hydro_K.H b/Tests/EB_CNS/Source/hydro/CNS_hydro_K.H index 86af431d2ee..810778d8da8 100644 --- a/Tests/EB_CNS/Source/hydro/CNS_hydro_K.H +++ b/Tests/EB_CNS/Source/hydro/CNS_hydro_K.H @@ -240,7 +240,7 @@ riemann (const amrex::Real gamma, const amrex::Real smallp, const amrex::Real /* #endif constexpr Real weakwv = Real(1.e-3); - constexpr Real small = Real(1.e-6); + constexpr Real sml = Real(1.e-6); Real clsql = gamma*pl*rl; Real clsqr = gamma*pr*rr; @@ -248,7 +248,7 @@ riemann (const amrex::Real gamma, const amrex::Real smallp, const amrex::Real /* Real wr = std::sqrt(clsqr); Real cleft = wl/rl; Real cright = wr/rr; - Real ccsmall = small*(cleft+cright); + Real ccsmall = sml*(cleft+cright); Real pstar = (wl*pr + wr*pl - wr*wl*(ur-ul))/(wl+wr); pstar = amrex::max(pstar,smallp); diff --git a/Tests/GPU/CNS/Source/hydro/CNS_hydro_K.H b/Tests/GPU/CNS/Source/hydro/CNS_hydro_K.H index 2734976efcd..687b2a1255b 100644 --- a/Tests/GPU/CNS/Source/hydro/CNS_hydro_K.H +++ b/Tests/GPU/CNS/Source/hydro/CNS_hydro_K.H @@ -191,7 +191,7 @@ riemann (const amrex::Real gamma, const amrex::Real smallp, const amrex::Real /* using amrex::Real; constexpr Real weakwv = Real(1.e-3); - constexpr Real small = Real(1.e-6); + constexpr Real sml = Real(1.e-6); Real clsql = gamma*pl*rl; Real clsqr = gamma*pr*rr; @@ -199,7 +199,7 @@ riemann (const amrex::Real gamma, const amrex::Real smallp, const amrex::Real /* Real wr = std::sqrt(clsqr); Real cleft = wl/rl; Real cright = wr/rr; - Real ccsmall = small*(cleft+cright); + Real ccsmall = sml*(cleft+cright); Real pstar = (wl*pr + wr*pl - wr*wl*(ur-ul))/(wl+wr); pstar = amrex::max(pstar,smallp); diff --git a/Tools/CMake/AMReXOptions.cmake b/Tools/CMake/AMReXOptions.cmake index a7863f125e3..ef55a1053ca 100644 --- a/Tools/CMake/AMReXOptions.cmake +++ b/Tools/CMake/AMReXOptions.cmake @@ -482,7 +482,7 @@ option(AMReX_DIFFERENT_COMPILER "Allow an application to use a different compiler than the one used to build AMReX" OFF) print_option(AMReX_DIFFERENT_COMPILER) -if (AMReX_BUILD_SHARED_LIBS AND NOT (CMAKE_SYSTEM_NAME STREQUAL "Linux") ) +if ( NOT (CMAKE_SYSTEM_NAME STREQUAL "Linux") ) option(AMReX_PROBINIT "Enable support for probin file" OFF) else () cmake_dependent_option(AMReX_PROBINIT "Enable support for probin file" ON