diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml
index 0c894e46862..c162cb04111 100644
--- a/.github/workflows/windows.yml
+++ b/.github/workflows/windows.yml
@@ -18,6 +18,7 @@ jobs:
     if: needs.check_changes.outputs.has_non_docs_changes == 'true'
     steps:
     - uses: actions/checkout@v4
+    # If we add ccache back, don't forget to update cleanup-cache.yml
     #- name: Set Up Cache
     #  uses: actions/cache@v3
     #  with:
@@ -66,6 +67,7 @@ jobs:
     if: needs.check_changes.outputs.has_non_docs_changes == 'true'
     steps:
     - uses: actions/checkout@v4
+    # If we add ccache back, don't forget to update cleanup-cache.yml
     #- name: Set Up Cache
     #  uses: actions/cache@v3
     #  with:
@@ -144,4 +146,32 @@ jobs:
         set "PATH=%PATH%;D:\\a\amrex\amrex\installdir\bin"
         cmake --build build --config Release --target test_install
 
-  # If we add ccache back, don't forget to update cleanup-cache.yml
+  tests_cuda:
+    name: CUDA on Windows
+    runs-on: windows-latest
+    needs: check_changes
+    if: needs.check_changes.outputs.has_non_docs_changes == 'true'
+    steps:
+    - uses: Jimver/cuda-toolkit@v0.2.19
+      id: cuda-toolkit
+      with:
+        cuda: '12.6.1'
+        use-github-cache: 'false'
+    - uses: actions/checkout@v4
+    - name: Compile
+      run: |
+        cmake -S . -B build                       `
+              -DCUDAToolkit_ROOT="$Env:CUDA_PATH" `
+              -DCMAKE_VERBOSE_MAKEFILE=ON         `
+              -DCMAKE_BUILD_TYPE=Release          `
+              -DAMReX_GPU_BACKEND=CUDA            `
+              -DAMReX_CUDA_ARCH="8.0"             `
+              -DAMReX_ENABLE_TESTS=ON             `
+              -DAMReX_EB=ON                       `
+              -DAMReX_FFT=ON                      `
+              -DAMReX_LINEAR_SOLVERS=ON           `
+              -DAMReX_PARTICLES=ON                `
+              -DAMReX_FORTRAN=OFF                 `
+              -DAMReX_MPI=OFF
+        cmake --build build --config Release -j 4
+        cmake --build build --config Release --target install
diff --git a/Src/Base/AMReX_BoxList.cpp b/Src/Base/AMReX_BoxList.cpp
index 4459b24dba9..b5fbd58a556 100644
--- a/Src/Base/AMReX_BoxList.cpp
+++ b/Src/Base/AMReX_BoxList.cpp
@@ -188,7 +188,7 @@ BoxList::BoxList(const Box& bx, const IntVect& tilesize)
         ntiles *= nt[d];
     }
 
-    IntVect small, big, ijk;  // note that the initial values are all zero.
+    IntVect sml, big, ijk;  // note that the initial values are all zero.
     ijk[0] = -1;
     for (int t=0; t<ntiles; ++t) {
         for (int d=0; d<AMREX_SPACEDIM; d++) {
@@ -201,11 +201,11 @@ BoxList::BoxList(const Box& bx, const IntVect& tilesize)
         }
 
         for (int d=0; d<AMREX_SPACEDIM; d++) {
-            small[d] = ijk[d]*tilesize[d];
-            big[d] = std::min(small[d]+tilesize[d]-1, bx.length(d)-1);
+            sml[d] = ijk[d]*tilesize[d];
+            big[d] = std::min(sml[d]+tilesize[d]-1, bx.length(d)-1);
         }
 
-        Box tbx(small, big, btype);
+        Box tbx(sml, big, btype);
         tbx.shift(bx.smallEnd());
         push_back(tbx);
     }
diff --git a/Src/Base/AMReX_FabArray.H b/Src/Base/AMReX_FabArray.H
index a67a72f0a38..5cb640d2f13 100644
--- a/Src/Base/AMReX_FabArray.H
+++ b/Src/Base/AMReX_FabArray.H
@@ -537,55 +537,117 @@ public:
     FAB const* fabPtr (int K) const noexcept;
 
     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
-    void prefetchToHost (const MFIter& mfi) const noexcept;
+    void prefetchToHost (const MFIter& mfi) const noexcept
+    {
+#ifdef AMREX_USE_CUDA
+        this->fabPtr(mfi)->prefetchToHost();
+#else
+        amrex::ignore_unused(mfi);
+#endif
+    }
 
     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
-    void prefetchToDevice (const MFIter& mfi) const noexcept;
+    void prefetchToDevice (const MFIter& mfi) const noexcept
+    {
+#ifdef AMREX_USE_CUDA
+        this->fabPtr(mfi)->prefetchToDevice();
+#else
+        amrex::ignore_unused(mfi);
+#endif
+    }
 
     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
-    Array4<typename FabArray<FAB>::value_type const> array (const MFIter& mfi) const noexcept;
-    //
+    Array4<typename FabArray<FAB>::value_type const> array (const MFIter& mfi) const noexcept
+    {
+        return fabPtr(mfi)->const_array();
+    }
+
     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
-    Array4<typename FabArray<FAB>::value_type> array (const MFIter& mfi) noexcept;
-    //
+    Array4<typename FabArray<FAB>::value_type> array (const MFIter& mfi) noexcept
+    {
+        return fabPtr(mfi)->array();
+    }
+
     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
-    Array4<typename FabArray<FAB>::value_type const> array (int K) const noexcept;
-    //
+    Array4<typename FabArray<FAB>::value_type const> array (int K) const noexcept
+    {
+        return fabPtr(K)->const_array();
+    }
+
     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
-    Array4<typename FabArray<FAB>::value_type> array (int K) noexcept;
+    Array4<typename FabArray<FAB>::value_type> array (int K) noexcept
+    {
+        return fabPtr(K)->array();
+    }
 
     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
-    Array4<typename FabArray<FAB>::value_type const> const_array (const MFIter& mfi) const noexcept;
-    //
+    Array4<typename FabArray<FAB>::value_type const> const_array (const MFIter& mfi) const noexcept
+    {
+        return fabPtr(mfi)->const_array();
+    }
+
     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
-    Array4<typename FabArray<FAB>::value_type const> const_array (int K) const noexcept;
+    Array4<typename FabArray<FAB>::value_type const> const_array (int K) const noexcept
+    {
+        return fabPtr(K)->const_array();
+    }
 
     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
-    Array4<typename FabArray<FAB>::value_type const> array (const MFIter& mfi, int start_comp) const noexcept;
-    //
+    Array4<typename FabArray<FAB>::value_type const> array (const MFIter& mfi, int start_comp) const noexcept
+    {
+        return fabPtr(mfi)->const_array(start_comp);
+    }
+
     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
-    Array4<typename FabArray<FAB>::value_type> array (const MFIter& mfi, int start_comp) noexcept;
-    //
+    Array4<typename FabArray<FAB>::value_type> array (const MFIter& mfi, int start_comp) noexcept
+    {
+        return fabPtr(mfi)->array(start_comp);
+    }
+
     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
-    Array4<typename FabArray<FAB>::value_type const> array (int K, int start_comp) const noexcept;
-    //
+    Array4<typename FabArray<FAB>::value_type const> array (int K, int start_comp) const noexcept
+    {
+        return fabPtr(K)->const_array(start_comp);
+    }
+
     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
-    Array4<typename FabArray<FAB>::value_type> array (int K, int start_comp) noexcept;
+    Array4<typename FabArray<FAB>::value_type> array (int K, int start_comp) noexcept
+    {
+        return fabPtr(K)->array(start_comp);
+    }
 
     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
-    Array4<typename FabArray<FAB>::value_type const> const_array (const MFIter& mfi, int start_comp) const noexcept;
-    //
+    Array4<typename FabArray<FAB>::value_type const> const_array (const MFIter& mfi, int start_comp) const noexcept
+    {
+        return fabPtr(mfi)->const_array(start_comp);
+    }
+
     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
-    Array4<typename FabArray<FAB>::value_type const> const_array (int K, int start_comp) const noexcept;
+    Array4<typename FabArray<FAB>::value_type const> const_array (int K, int start_comp) const noexcept
+    {
+        return fabPtr(K)->const_array(start_comp);
+    }
 
     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
-    MultiArray4<typename FabArray<FAB>::value_type> arrays () noexcept;
+    MultiArray4<typename FabArray<FAB>::value_type> arrays () noexcept
+    {
+        build_arrays();
+        return m_arrays;
+    }
 
     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
-    MultiArray4<typename FabArray<FAB>::value_type const> arrays () const noexcept;
+    MultiArray4<typename FabArray<FAB>::value_type const> arrays () const noexcept
+    {
+        build_arrays();
+        return m_const_arrays;
+    }
 
     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
-    MultiArray4<typename FabArray<FAB>::value_type const> const_arrays () const noexcept;
+    MultiArray4<typename FabArray<FAB>::value_type const> const_arrays () const noexcept
+    {
+        build_arrays();
+        return m_const_arrays;
+    }
 
     //! Explicitly set the Kth FAB in the FabArray to point to elem.
     void setFab (int boxno, std::unique_ptr<FAB> elem);
@@ -1531,153 +1593,6 @@ FabArray<FAB>::fabPtr (int K) const noexcept
     return m_fabs_v[li];
 }
 
-template <class FAB>
-template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
-void
-FabArray<FAB>::prefetchToHost (const MFIter& mfi) const noexcept
-{
-#ifdef AMREX_USE_CUDA
-    this->fabPtr(mfi)->prefetchToHost();
-#else
-    amrex::ignore_unused(mfi);
-#endif
-}
-
-template <class FAB>
-template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
-void
-FabArray<FAB>::prefetchToDevice (const MFIter& mfi) const noexcept
-{
-#ifdef AMREX_USE_CUDA
-    this->fabPtr(mfi)->prefetchToDevice();
-#else
-    amrex::ignore_unused(mfi);
-#endif
-}
-
-template <class FAB>
-template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
-Array4<typename FabArray<FAB>::value_type const>
-FabArray<FAB>::array (const MFIter& mfi) const noexcept
-{
-    return fabPtr(mfi)->const_array();
-}
-
-template <class FAB>
-template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
-Array4<typename FabArray<FAB>::value_type>
-FabArray<FAB>::array (const MFIter& mfi) noexcept
-{
-    return fabPtr(mfi)->array();
-}
-
-template <class FAB>
-template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
-Array4<typename FabArray<FAB>::value_type const>
-FabArray<FAB>::array (int K) const noexcept
-{
-    return fabPtr(K)->const_array();
-}
-
-template <class FAB>
-template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
-Array4<typename FabArray<FAB>::value_type>
-FabArray<FAB>::array (int K) noexcept
-{
-    return fabPtr(K)->array();
-}
-
-template <class FAB>
-template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
-Array4<typename FabArray<FAB>::value_type const>
-FabArray<FAB>::const_array (const MFIter& mfi) const noexcept
-{
-    return fabPtr(mfi)->const_array();
-}
-
-template <class FAB>
-template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
-Array4<typename FabArray<FAB>::value_type const>
-FabArray<FAB>::const_array (int K) const noexcept
-{
-    return fabPtr(K)->const_array();
-}
-
-template <class FAB>
-template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
-Array4<typename FabArray<FAB>::value_type const>
-FabArray<FAB>::array (const MFIter& mfi, int start_comp) const noexcept
-{
-    return fabPtr(mfi)->const_array(start_comp);
-}
-
-template <class FAB>
-template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
-Array4<typename FabArray<FAB>::value_type>
-FabArray<FAB>::array (const MFIter& mfi, int start_comp) noexcept
-{
-    return fabPtr(mfi)->array(start_comp);
-}
-
-template <class FAB>
-template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
-Array4<typename FabArray<FAB>::value_type const>
-FabArray<FAB>::array (int K, int start_comp) const noexcept
-{
-    return fabPtr(K)->const_array(start_comp);
-}
-
-template <class FAB>
-template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
-Array4<typename FabArray<FAB>::value_type>
-FabArray<FAB>::array (int K, int start_comp) noexcept
-{
-    return fabPtr(K)->array(start_comp);
-}
-
-template <class FAB>
-template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
-Array4<typename FabArray<FAB>::value_type const>
-FabArray<FAB>::const_array (const MFIter& mfi, int start_comp) const noexcept
-{
-    return fabPtr(mfi)->const_array(start_comp);
-}
-
-template <class FAB>
-template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
-Array4<typename FabArray<FAB>::value_type const>
-FabArray<FAB>::const_array (int K, int start_comp) const noexcept
-{
-    return fabPtr(K)->const_array(start_comp);
-}
-
-template <class FAB>
-template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
-MultiArray4<typename FabArray<FAB>::value_type>
-FabArray<FAB>::arrays () noexcept
-{
-    build_arrays();
-    return m_arrays;
-}
-
-template <class FAB>
-template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
-MultiArray4<typename FabArray<FAB>::value_type const>
-FabArray<FAB>::arrays () const noexcept
-{
-    build_arrays();
-    return m_const_arrays;
-}
-
-template <class FAB>
-template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
-MultiArray4<typename FabArray<FAB>::value_type const>
-FabArray<FAB>::const_arrays () const noexcept
-{
-    build_arrays();
-    return m_const_arrays;
-}
-
 template <class FAB>
 template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
 void
diff --git a/Src/Base/AMReX_FabArrayBase.cpp b/Src/Base/AMReX_FabArrayBase.cpp
index 9acf440d98f..5120e23be3f 100644
--- a/Src/Base/AMReX_FabArrayBase.cpp
+++ b/Src/Base/AMReX_FabArrayBase.cpp
@@ -2337,7 +2337,7 @@ FabArrayBase::buildTileArray (const IntVect& tileSize, TileArray& ta) const
                 ntiles *= nt_in_fab[d];
             }
 
-            IntVect small, big, ijk;  // note that the initial values are all zero.
+            IntVect sml, big, ijk;  // note that the initial values are all zero.
             ijk[0] = -1;
             for (int t = 0; t < ntiles; ++t) {
                 ta.indexMap.push_back(K);
@@ -2356,15 +2356,15 @@ FabArrayBase::buildTileArray (const IntVect& tileSize, TileArray& ta) const
 
                 for (int d=0; d<AMREX_SPACEDIM; d++) {
                     if (ijk[d] < nleft[d]) {
-                        small[d] = ijk[d]*(tsize[d]+1);
-                        big[d] = small[d] + tsize[d];
+                        sml[d] = ijk[d]*(tsize[d]+1);
+                        big[d] = sml[d] + tsize[d];
                     } else {
-                        small[d] = ijk[d]*tsize[d] + nleft[d];
-                        big[d] = small[d] + tsize[d] - 1;
+                        sml[d] = ijk[d]*tsize[d] + nleft[d];
+                        big[d] = sml[d] + tsize[d] - 1;
                     }
                 }
 
-                Box tbx(small, big, IndexType::TheCellType());
+                Box tbx(sml, big, IndexType::TheCellType());
                 tbx.shift(bx.smallEnd());
 
                 ta.tileArray.push_back(tbx);
diff --git a/Src/Base/AMReX_GpuLaunch.nolint.H b/Src/Base/AMReX_GpuLaunch.nolint.H
index bb1bbb2453b..c09181cd729 100644
--- a/Src/Base/AMReX_GpuLaunch.nolint.H
+++ b/Src/Base/AMReX_GpuLaunch.nolint.H
@@ -38,22 +38,6 @@
 #define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM_FLAG(...) AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3_FLAG(__VA_ARGS__)
 #endif
 
-#define AMREX_FOR_1D(...)      AMREX_GPU_DEVICE_FOR_1D(__VA_ARGS__)
-#define AMREX_FOR_3D(...)      AMREX_GPU_DEVICE_FOR_3D(__VA_ARGS__)
-#define AMREX_FOR_4D(...)      AMREX_GPU_DEVICE_FOR_4D(__VA_ARGS__)
-
-#define AMREX_PARALLEL_FOR_1D(...)      AMREX_GPU_DEVICE_PARALLEL_FOR_1D(__VA_ARGS__)
-#define AMREX_PARALLEL_FOR_3D(...)      AMREX_GPU_DEVICE_PARALLEL_FOR_3D(__VA_ARGS__)
-#define AMREX_PARALLEL_FOR_4D(...)      AMREX_GPU_DEVICE_PARALLEL_FOR_4D(__VA_ARGS__)
-
-#define AMREX_HOST_DEVICE_FOR_1D(...) AMREX_GPU_HOST_DEVICE_FOR_1D(__VA_ARGS__)
-#define AMREX_HOST_DEVICE_FOR_3D(...) AMREX_GPU_HOST_DEVICE_FOR_3D(__VA_ARGS__)
-#define AMREX_HOST_DEVICE_FOR_4D(...) AMREX_GPU_HOST_DEVICE_FOR_4D(__VA_ARGS__)
-
-#define AMREX_HOST_DEVICE_PARALLEL_FOR_1D(...) AMREX_GPU_HOST_DEVICE_PARALLEL_FOR_1D(__VA_ARGS__)
-#define AMREX_HOST_DEVICE_PARALLEL_FOR_3D(...) AMREX_GPU_HOST_DEVICE_PARALLEL_FOR_3D(__VA_ARGS__)
-#define AMREX_HOST_DEVICE_PARALLEL_FOR_4D(...) AMREX_GPU_HOST_DEVICE_PARALLEL_FOR_4D(__VA_ARGS__)
-
 #ifdef AMREX_USE_GPU
 
 #ifndef AMREX_USE_SYCL
diff --git a/Src/Base/AMReX_GpuLaunchMacrosC.nolint.H b/Src/Base/AMReX_GpuLaunchMacrosC.nolint.H
index 8f95fc520d8..96b6ec5414e 100644
--- a/Src/Base/AMReX_GpuLaunchMacrosC.nolint.H
+++ b/Src/Base/AMReX_GpuLaunchMacrosC.nolint.H
@@ -35,18 +35,18 @@
 #define AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_2(...) AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_2(__VA_ARGS__)
 #define AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_3(...) AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3(__VA_ARGS__)
 
-#define AMREX_GPU_FOR_1D_IMPL(n,i,block) \
+#define AMREX_FOR_1D_IMPL(n,i,block) \
         for (auto i = decltype(n){0}; i < n; ++i) {     \
             block \
         }
 
-#define AMREX_GPU_PARALLEL_FOR_1D_IMPL(n,i,block) \
+#define AMREX_PARALLEL_FOR_1D_IMPL(n,i,block) \
         AMREX_PRAGMA_SIMD \
         for (auto i = decltype(n){0}; i < n; ++i) {     \
             block \
         }
 
-#define AMREX_GPU_FOR_3D_IMPL(box,i,j,k,block) \
+#define AMREX_FOR_3D_IMPL(box,i,j,k,block) \
     { \
         const auto amrex_i_lo = amrex::lbound(box); \
         const auto amrex_i_hi = amrex::ubound(box); \
@@ -57,7 +57,7 @@
         }}} \
     }
 
-#define AMREX_GPU_PARALLEL_FOR_3D_IMPL(box,i,j,k,block) \
+#define AMREX_PARALLEL_FOR_3D_IMPL(box,i,j,k,block) \
     { \
         const auto amrex_i_lo = amrex::lbound(box); \
         const auto amrex_i_hi = amrex::ubound(box); \
@@ -69,7 +69,7 @@
         }}} \
     }
 
-#define AMREX_GPU_FOR_4D_IMPL(box,ncomp,i,j,k,n,block) \
+#define AMREX_FOR_4D_IMPL(box,ncomp,i,j,k,n,block) \
     { \
         const auto amrex_i_lo = amrex::lbound(box); \
         const auto amrex_i_hi = amrex::ubound(box); \
@@ -81,7 +81,7 @@
         }}}} \
     }
 
-#define AMREX_GPU_PARALLEL_FOR_4D_IMPL(box,ncomp,i,j,k,n,block) \
+#define AMREX_PARALLEL_FOR_4D_IMPL(box,ncomp,i,j,k,n,block) \
     { \
         const auto amrex_i_lo = amrex::lbound(box); \
         const auto amrex_i_hi = amrex::ubound(box); \
@@ -94,19 +94,18 @@
         }}}} \
     }
 
-#define AMREX_GPU_HOST_DEVICE_FOR_1D(...) AMREX_GPU_FOR_1D_IMPL(__VA_ARGS__);
-#define AMREX_GPU_DEVICE_FOR_1D(...)      AMREX_GPU_FOR_1D_IMPL(__VA_ARGS__);
+#define AMREX_FOR_1D(...) AMREX_FOR_1D_IMPL(__VA_ARGS__);
+#define AMREX_FOR_3D(...) AMREX_FOR_3D_IMPL(__VA_ARGS__);
+#define AMREX_FOR_4D(...) AMREX_FOR_4D_IMPL(__VA_ARGS__);
 
-#define AMREX_GPU_HOST_DEVICE_FOR_3D(...) AMREX_GPU_FOR_3D_IMPL(__VA_ARGS__);
-#define AMREX_GPU_DEVICE_FOR_3D(...)      AMREX_GPU_FOR_3D_IMPL(__VA_ARGS__);
+#define AMREX_PARALLEL_FOR_1D(...) AMREX_PARALLEL_FOR_1D_IMPL(__VA_ARGS__);
+#define AMREX_PARALLEL_FOR_3D(...) AMREX_PARALLEL_FOR_3D_IMPL(__VA_ARGS__);
+#define AMREX_PARALLEL_FOR_4D(...) AMREX_PARALLEL_FOR_4D_IMPL(__VA_ARGS__);
 
-#define AMREX_GPU_HOST_DEVICE_FOR_4D(...) AMREX_GPU_FOR_4D_IMPL(__VA_ARGS__);
-#define AMREX_GPU_DEVICE_FOR_4D(...)      AMREX_GPU_FOR_4D_IMPL(__VA_ARGS__);
+#define AMREX_HOST_DEVICE_FOR_1D(...) AMREX_FOR_1D_IMPL(__VA_ARGS__);
+#define AMREX_HOST_DEVICE_FOR_3D(...) AMREX_FOR_3D_IMPL(__VA_ARGS__);
+#define AMREX_HOST_DEVICE_FOR_4D(...) AMREX_FOR_4D_IMPL(__VA_ARGS__);
 
-#define AMREX_GPU_DEVICE_PARALLEL_FOR_1D(...)      AMREX_GPU_PARALLEL_FOR_1D_IMPL(__VA_ARGS__);
-#define AMREX_GPU_DEVICE_PARALLEL_FOR_3D(...)      AMREX_GPU_PARALLEL_FOR_3D_IMPL(__VA_ARGS__);
-#define AMREX_GPU_DEVICE_PARALLEL_FOR_4D(...)      AMREX_GPU_PARALLEL_FOR_4D_IMPL(__VA_ARGS__);
-
-#define AMREX_GPU_HOST_DEVICE_PARALLEL_FOR_1D(...) AMREX_GPU_PARALLEL_FOR_1D_IMPL(__VA_ARGS__);
-#define AMREX_GPU_HOST_DEVICE_PARALLEL_FOR_3D(...) AMREX_GPU_PARALLEL_FOR_3D_IMPL(__VA_ARGS__);
-#define AMREX_GPU_HOST_DEVICE_PARALLEL_FOR_4D(...) AMREX_GPU_PARALLEL_FOR_4D_IMPL(__VA_ARGS__);
+#define AMREX_HOST_DEVICE_PARALLEL_FOR_1D(...) AMREX_PARALLEL_FOR_1D_IMPL(__VA_ARGS__);
+#define AMREX_HOST_DEVICE_PARALLEL_FOR_3D(...) AMREX_PARALLEL_FOR_3D_IMPL(__VA_ARGS__);
+#define AMREX_HOST_DEVICE_PARALLEL_FOR_4D(...) AMREX_PARALLEL_FOR_4D_IMPL(__VA_ARGS__);
diff --git a/Src/Base/AMReX_GpuLaunchMacrosG.nolint.H b/Src/Base/AMReX_GpuLaunchMacrosG.nolint.H
index 757463452b8..a1675e5b919 100644
--- a/Src/Base/AMReX_GpuLaunchMacrosG.nolint.H
+++ b/Src/Base/AMReX_GpuLaunchMacrosG.nolint.H
@@ -454,7 +454,7 @@
 // FOR_1D
 
 #ifdef AMREX_USE_SYCL
-#define AMREX_GPU_HOST_DEVICE_FOR_1D(n,i,block) \
+#define AMREX_HOST_DEVICE_FOR_1D(n,i,block) \
 { \
     auto const& amrex_i_n = n; \
     using amrex_i_inttype = std::remove_const_t<decltype(n)>; \
@@ -465,7 +465,7 @@
     } \
 }
 #else
-#define AMREX_GPU_HOST_DEVICE_FOR_1D(n,i,block) \
+#define AMREX_HOST_DEVICE_FOR_1D(n,i,block) \
 { \
     auto const& amrex_i_n = n; \
     using amrex_i_inttype = std::remove_const_t<decltype(n)>; \
@@ -479,7 +479,39 @@
 }
 #endif
 
-#define AMREX_GPU_DEVICE_FOR_1D(n,i,block) \
+#ifdef AMREX_USE_SYCL
+#define AMREX_HOST_DEVICE_PARALLEL_FOR_1D(n,i,block) \
+{ \
+    auto const& amrex_i_n = n; \
+    using amrex_i_inttype = std::remove_const_t<decltype(n)>; \
+    if (amrex::Gpu::inLaunchRegion()) { \
+        amrex::ParallelFor(amrex_i_n,[=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept block); \
+    } else { \
+        amrex::Abort("amrex:: HOST_DEVICE disabled for Intel.  It takes too long to compile"); \
+    } \
+}
+#else
+#define AMREX_HOST_DEVICE_PARALLEL_FOR_1D(n,i,block) \
+{ \
+    auto const& amrex_i_n = n; \
+    using amrex_i_inttype = std::remove_const_t<decltype(n)>; \
+    if (amrex::Gpu::inLaunchRegion()) { \
+        amrex::ParallelFor(amrex_i_n,[=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept block); \
+    } else { \
+        auto amrex_i_lambda = [=] (amrex_i_inttype i) noexcept block; \
+        AMREX_PRAGMA_SIMD \
+        for (amrex_i_inttype i = 0; i < amrex_i_n; ++i) amrex_i_lambda(i); \
+    } \
+}
+#endif
+
+#define AMREX_FOR_1D(n,i,block) \
+{ \
+    using amrex_i_inttype = std::remove_const_t<decltype(n)>; \
+    amrex::ParallelFor(n,[=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept block); \
+}
+
+#define AMREX_PARALLEL_FOR_1D(n,i,block) \
 { \
     using amrex_i_inttype = std::remove_const_t<decltype(n)>; \
     amrex::ParallelFor(n,[=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept block); \
@@ -488,7 +520,7 @@
 // FOR_3D
 
 #ifdef AMREX_USE_SYCL
-#define AMREX_GPU_HOST_DEVICE_FOR_3D(box,i,j,k,block) \
+#define AMREX_HOST_DEVICE_FOR_3D(box,i,j,k,block) \
 { \
     auto const& amrex_i_box = box; \
     if (amrex::Gpu::inLaunchRegion()) { \
@@ -498,7 +530,7 @@
     } \
 }
 #else
-#define AMREX_GPU_HOST_DEVICE_FOR_3D(box,i,j,k,block) \
+#define AMREX_HOST_DEVICE_FOR_3D(box,i,j,k,block) \
 { \
     auto const& amrex_i_box = box; \
     if (amrex::Gpu::inLaunchRegion()) { \
@@ -509,7 +541,34 @@
 }
 #endif
 
-#define AMREX_GPU_DEVICE_FOR_3D(box,i,j,k,block) \
+#ifdef AMREX_USE_SYCL
+#define AMREX_HOST_DEVICE_PARALLEL_FOR_3D(box,i,j,k,block) \
+{ \
+    auto const& amrex_i_box = box; \
+    if (amrex::Gpu::inLaunchRegion()) { \
+        amrex::ParallelFor(amrex_i_box,[=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept block); \
+    } else { \
+        amrex::Abort("amrex:: HOST_DEVICE disabled for Intel.  It takes too long to compile"); \
+    } \
+}
+#else
+#define AMREX_HOST_DEVICE_PARALLEL_FOR_3D(box,i,j,k,block) \
+{ \
+    auto const& amrex_i_box = box; \
+    if (amrex::Gpu::inLaunchRegion()) { \
+        amrex::ParallelFor(amrex_i_box,[=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept block); \
+    } else { \
+        amrex::LoopConcurrentOnCpu(amrex_i_box,[=] (int i, int j, int k) noexcept block); \
+    } \
+}
+#endif
+
+#define AMREX_FOR_3D(box,i,j,k,block) \
+{ \
+    amrex::ParallelFor(box,[=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept block); \
+}
+
+#define AMREX_PARALLEL_FOR_3D(box,i,j,k,block) \
 { \
     amrex::ParallelFor(box,[=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept block); \
 }
@@ -517,7 +576,7 @@
 // FOR_4D
 
 #ifdef AMREX_USE_SYCL
-#define AMREX_GPU_HOST_DEVICE_FOR_4D(box,ncomp,i,j,k,n,block) \
+#define AMREX_HOST_DEVICE_FOR_4D(box,ncomp,i,j,k,n,block) \
 { \
     auto const& amrex_i_box = box; \
     auto const& amrex_i_ncomp = ncomp; \
@@ -528,7 +587,7 @@
     } \
 }
 #else
-#define AMREX_GPU_HOST_DEVICE_FOR_4D(box,ncomp,i,j,k,n,block) \
+#define AMREX_HOST_DEVICE_FOR_4D(box,ncomp,i,j,k,n,block) \
 { \
     auto const& amrex_i_box = box; \
     auto const& amrex_i_ncomp = ncomp; \
@@ -540,15 +599,36 @@
 }
 #endif
 
-#define AMREX_GPU_DEVICE_FOR_4D(box,ncomp,i,j,k,n,block) \
+#ifdef AMREX_USE_SYCL
+#define AMREX_HOST_DEVICE_PARALLEL_FOR_4D(box,ncomp,i,j,k,n,block) \
 { \
-    amrex::ParallelFor(box,ncomp,[=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept block); \
+    auto const& amrex_i_box = box; \
+    auto const& amrex_i_ncomp = ncomp; \
+    if (amrex::Gpu::inLaunchRegion()) { \
+        amrex::ParallelFor(amrex_i_box,amrex_i_ncomp,[=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept block); \
+    } else { \
+        amrex::Abort("amrex:: HOST_DEVICE disabled for Intel.  It takes too long to compile"); \
+    } \
 }
+#else
+#define AMREX_HOST_DEVICE_PARALLEL_FOR_4D(box,ncomp,i,j,k,n,block) \
+{ \
+    auto const& amrex_i_box = box; \
+    auto const& amrex_i_ncomp = ncomp; \
+    if (amrex::Gpu::inLaunchRegion()) { \
+        amrex::ParallelFor(amrex_i_box,amrex_i_ncomp,[=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept block); \
+    } else { \
+        amrex::LoopConcurrentOnCpu(amrex_i_box,amrex_i_ncomp,[=] (int i, int j, int k, int n) noexcept block); \
+    } \
+}
+#endif
 
-#define AMREX_GPU_DEVICE_PARALLEL_FOR_1D(...) AMREX_GPU_DEVICE_FOR_1D(__VA_ARGS__)
-#define AMREX_GPU_DEVICE_PARALLEL_FOR_3D(...) AMREX_GPU_DEVICE_FOR_3D(__VA_ARGS__)
-#define AMREX_GPU_DEVICE_PARALLEL_FOR_4D(...) AMREX_GPU_DEVICE_FOR_4D(__VA_ARGS__)
+#define AMREX_FOR_4D(box,ncomp,i,j,k,n,block) \
+{ \
+    amrex::ParallelFor(box,ncomp,[=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept block); \
+}
 
-#define AMREX_GPU_HOST_DEVICE_PARALLEL_FOR_1D(...) AMREX_GPU_HOST_DEVICE_FOR_1D(__VA_ARGS__)
-#define AMREX_GPU_HOST_DEVICE_PARALLEL_FOR_3D(...) AMREX_GPU_HOST_DEVICE_FOR_3D(__VA_ARGS__)
-#define AMREX_GPU_HOST_DEVICE_PARALLEL_FOR_4D(...) AMREX_GPU_HOST_DEVICE_FOR_4D(__VA_ARGS__)
+#define AMREX_PARALLEL_FOR_4D(box,ncomp,i,j,k,n,block) \
+{ \
+    amrex::ParallelFor(box,ncomp,[=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept block); \
+}
diff --git a/Src/Base/AMReX_GpuUtility.H b/Src/Base/AMReX_GpuUtility.H
index 4adc111f5e2..fa988e52521 100644
--- a/Src/Base/AMReX_GpuUtility.H
+++ b/Src/Base/AMReX_GpuUtility.H
@@ -149,7 +149,10 @@ namespace Gpu {
     AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
     bool isnan (T m) noexcept
     {
-#if defined(AMREX_USE_SYCL)
+#if defined(_WIN32)
+        AMREX_IF_ON_DEVICE((return m != m;))
+        AMREX_IF_ON_HOST((return std::isnan(m);))
+#elif defined(AMREX_USE_SYCL)
         return sycl::isnan(m);
 #else
         return std::isnan(m);
@@ -160,7 +163,10 @@ namespace Gpu {
     AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
     bool isinf (T m) noexcept
     {
-#if defined(AMREX_USE_SYCL)
+#if defined(_WIN32)
+        AMREX_IF_ON_DEVICE((return (2*m == m) && (m != 0);))
+        AMREX_IF_ON_HOST((return std::isinf(m);))
+#elif defined(AMREX_USE_SYCL)
         return sycl::isinf(m);
 #else
         return std::isinf(m);
diff --git a/Src/Base/AMReX_MultiFab.cpp b/Src/Base/AMReX_MultiFab.cpp
index 2ba4aa7bc48..9059c1d5e68 100644
--- a/Src/Base/AMReX_MultiFab.cpp
+++ b/Src/Base/AMReX_MultiFab.cpp
@@ -628,7 +628,11 @@ MultiFab::is_finite (int scomp, int ncomp, const IntVect& ngrow, bool local) con
         r = ParReduce(TypeList<ReduceOpLogicalOr>{}, TypeList<bool>{}, *this, ngrow, ncomp,
         [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept -> GpuTuple<bool>
         {
+#if defined(_WIN32)
+            return Gpu::isnan(ma[box_no](i,j,k,n+scomp)) || Gpu::isinf(ma[box_no](i,j,k,n+scomp));
+#else
             return !amrex::Math::isfinite(ma[box_no](i,j,k,n+scomp));
+#endif
         });
     } else
 #endif
diff --git a/Src/Base/AMReX_NonLocalBC.H b/Src/Base/AMReX_NonLocalBC.H
index f88e1d9aca4..3da847d0bf0 100644
--- a/Src/Base/AMReX_NonLocalBC.H
+++ b/Src/Base/AMReX_NonLocalBC.H
@@ -602,8 +602,10 @@ UnpackRecvBuffers (const PackComponents& components, FabArray<FAB>& dest, const
 }
 #endif // AMREX_USE_MPI
 
+#ifndef _WIN32
 static_assert(IsDataPacking<PackComponents, FArrayBox>(), // NOLINT(bugprone-throw-keyword-missing)
               "PackComponents is expected to satisfy the concept DataPacking.");
+#endif
 
 ////////////////////////////////////////////////////////////////////////////////////
 //                                    [DataPacking.ApplyDtosAndProjectionOnReciever]
@@ -663,8 +665,10 @@ UnpackRecvBuffers (const ApplyDtosAndProjectionOnReciever<DTOS, FabProj>& packin
 }
 #endif // AMREX_USE_MPI
 
+#ifndef _WIN32
 static_assert(IsDataPacking<ApplyDtosAndProjectionOnReciever<>, FArrayBox>(), // NOLINT(bugprone-throw-keyword-missing)
               "ApplyDtosAndProjectionOnReciever<> is expected to satisfy the DataPacking concept.");
+#endif
 
 ////////////////////////////////////////////////////////////////////////////////////
 //                                                             [ParallelCopy_nowait]
diff --git a/Src/Base/AMReX_PODVector.H b/Src/Base/AMReX_PODVector.H
index 464bb552ee1..e4df8562c1d 100644
--- a/Src/Base/AMReX_PODVector.H
+++ b/Src/Base/AMReX_PODVector.H
@@ -47,7 +47,11 @@ namespace amrex
                                      [[maybe_unused]] Allocator<T> const& allocator)
         {
 #ifdef AMREX_USE_GPU
+#ifdef _WIN32
+            if           (RunOnGpu<Allocator<T>>::value)
+#else
             if constexpr (RunOnGpu<Allocator<T>>::value)
+#endif
             {
                 amrex::ParallelFor(count, [=] AMREX_GPU_DEVICE (Size i) noexcept {
                     data[i] = value;
@@ -55,7 +59,11 @@ namespace amrex
                 Gpu::streamSynchronize();
                 return;
             }
+#ifdef _WIN32
+            else if           (IsPolymorphicArenaAllocator<Allocator<T>>::value)
+#else
             else if constexpr (IsPolymorphicArenaAllocator<Allocator<T>>::value)
+#endif
             {
                 if (allocator.arena()->isManaged() ||
                     allocator.arena()->isDevice())
@@ -103,7 +111,11 @@ namespace amrex
                              [[maybe_unused]] Allocator<T> const& allocator)
         {
 #ifdef AMREX_USE_GPU
+#ifdef _WIN32
+            if           (RunOnGpu<Allocator<T>>::value)
+#else
             if constexpr (RunOnGpu<Allocator<T>>::value)
+#endif
             {
                 amrex::ParallelFor(count, [=] AMREX_GPU_DEVICE (Size i) noexcept {
                     dst[i] = src[i];
@@ -111,7 +123,11 @@ namespace amrex
                 Gpu::Device::streamSynchronize();
                 return;
             }
+#ifdef _WIN32
+            else if           (IsPolymorphicArenaAllocator<Allocator<T>>::value)
+#else
             else if constexpr (IsPolymorphicArenaAllocator<Allocator<T>>::value)
+#endif
             {
                 if (allocator.arena()->isManaged() ||
                     allocator.arena()->isDevice())
diff --git a/Src/EB/AMReX_EB2_2D_C.cpp b/Src/EB/AMReX_EB2_2D_C.cpp
index 231faf0cb88..2795a07d000 100644
--- a/Src/EB/AMReX_EB2_2D_C.cpp
+++ b/Src/EB/AMReX_EB2_2D_C.cpp
@@ -13,11 +13,11 @@ void set_eb_data (const int i, const int j,
 {
 #ifdef AMREX_USE_FLOAT
     constexpr Real almostone = 1.0_rt-1.e-6_rt;
-    constexpr Real small = 1.e-5_rt;
+    constexpr Real sml = 1.e-5_rt;
     constexpr Real tiny = 1.e-6_rt;
 #else
     constexpr Real almostone = 1.0-1.e-15;
-    constexpr Real small = 1.e-14;
+    constexpr Real sml = 1.e-14;
     constexpr Real tiny = 1.e-15;
 #endif
 
@@ -117,11 +117,11 @@ void set_eb_data (const int i, const int j,
 
         vfrac(i,j,0) = 0.5_rt*(af1+af2)/(dx[0]*dx[1]);
 
-        if (vfrac(i,j,0) > 1.0_rt-small) {
+        if (vfrac(i,j,0) > 1.0_rt-sml) {
             vfrac(i,j,0) = 1.0_rt;
             vcent(i,j,0,0) = 0.0_rt;
             vcent(i,j,0,1) = 0.0_rt;
-        } else if (vfrac(i,j,0) < small) {
+        } else if (vfrac(i,j,0) < sml) {
             vfrac(i,j,0) = 0.0_rt;
             vcent(i,j,0,0) = 0.0_rt;
             vcent(i,j,0,1) = 0.0_rt;
@@ -206,9 +206,9 @@ int build_faces (Box const& bx, Array4<EBCellFlag> const& cell,
                  bool cover_multiple_cuts, int& nsmallfaces) noexcept
 {
 #ifdef AMREX_USE_FLOAT
-    constexpr Real small = 1.e-5_rt;
+    constexpr Real sml = 1.e-5_rt;
 #else
-    constexpr Real small = 1.e-14;
+    constexpr Real sml = 1.e-14;
 #endif
     const Real dxinv = 1.0_rt/dx[0];
     const Real dyinv = 1.0_rt/dx[1];
@@ -236,11 +236,11 @@ int build_faces (Box const& bx, Array4<EBCellFlag> const& cell,
                     fcx(i,j,0) = 0.5_rt - 0.5_rt*apx(i,j,0);
                 }
 
-                if (apx(i,j,0) > 1.0_rt-small) {
+                if (apx(i,j,0) > 1.0_rt-sml) {
                     apx(i,j,0) = 1.0_rt;
                     fcx(i,j,0) = 0.0_rt;
                     fx(i,j,0) = Type::regular;
-                } else if (apx(i,j,0) < small) {
+                } else if (apx(i,j,0) < sml) {
                     apx(i,j,0) = 0.0_rt;
                     fcx(i,j,0) = 0.0_rt;
                     fx(i,j,0) = Type::covered;
@@ -269,11 +269,11 @@ int build_faces (Box const& bx, Array4<EBCellFlag> const& cell,
                     fcy(i,j,0) = 0.5_rt - 0.5_rt*apy(i,j,0);
                 }
 
-                if (apy(i,j,0) > 1.0_rt-small) {
+                if (apy(i,j,0) > 1.0_rt-sml) {
                     apy(i,j,0) = 1.0_rt;
                     fcy(i,j,0) = 0.0_rt;
                     fy(i,j,0) = Type::regular;
-                } else if (apy(i,j,0) < small) {
+                } else if (apy(i,j,0) < sml) {
                     apy(i,j,0) = 0.0_rt;
                     fcy(i,j,0) = 0.0_rt;
                     fy(i,j,0) = Type::covered;
diff --git a/Src/EB/AMReX_EB2_3D_C.cpp b/Src/EB/AMReX_EB2_3D_C.cpp
index ec7d643391e..bf160f16ed6 100644
--- a/Src/EB/AMReX_EB2_3D_C.cpp
+++ b/Src/EB/AMReX_EB2_3D_C.cpp
@@ -199,10 +199,10 @@ void cut_face_2d (Real& areafrac, Real& centx, Real& centy,
                   Real bcx, Real bcy) noexcept
 {
 #ifdef AMREX_USE_FLOAT
-    constexpr Real small = 1.e-5_rt;
+    constexpr Real sml = 1.e-5_rt;
     constexpr Real tiny  = 1.e-6_rt;
 #else
-    constexpr Real small = 1.e-14;
+    constexpr Real sml = 1.e-14;
     constexpr Real tiny  = 1.e-15;
 #endif
     Real apnorm = std::hypot(axm-axp,aym-ayp);
@@ -214,13 +214,13 @@ void cut_face_2d (Real& areafrac, Real& centx, Real& centy,
 
     if (nxabs < tiny || nyabs > 1.0_rt-tiny) {
         areafrac = 0.5_rt*(axm+axp);
-        if (areafrac > 1.0_rt-small) {
+        if (areafrac > 1.0_rt-sml) {
             areafrac = 1.0_rt;
             centx = 0.0_rt;
             centy = 0.0_rt;
             Sx2 = Sy2 = 1.0_rt/12._rt;
             Sxy = 0.0_rt;
-        } else if (areafrac < small) {
+        } else if (areafrac < sml) {
             areafrac = 0.0_rt;
             centx = 0.0_rt;
             centy = 0.0_rt;
@@ -236,13 +236,13 @@ void cut_face_2d (Real& areafrac, Real& centx, Real& centy,
         }
     } else if (nyabs < tiny || nxabs > 1.0_rt-tiny) {
         areafrac = 0.5_rt*(aym+ayp);
-        if (areafrac > 1.0_rt-small) {
+        if (areafrac > 1.0_rt-sml) {
             areafrac = 1.0_rt;
             centx = 0.0_rt;
             centy = 0.0_rt;
             Sx2 = Sy2 = 1.0_rt/12._rt;
             Sxy = 0.0_rt;
-        } else if (areafrac < small) {
+        } else if (areafrac < sml) {
             areafrac = 0.0_rt;
             centx = 0.0_rt;
             centy = 0.0_rt;
@@ -289,13 +289,13 @@ void cut_face_2d (Real& areafrac, Real& centx, Real& centy,
             : -signx*(1.0_rt/16._rt)*dx2 + 0.5_rt*ny*S_b;
 
         areafrac = 0.5_rt*(af1+af2);
-        if (areafrac > 1.0_rt-small) {
+        if (areafrac > 1.0_rt-sml) {
             areafrac = 1.0_rt;
             centx = 0.0_rt;
             centy = 0.0_rt;
             Sx2 = Sy2 = 1.0_rt/12._rt;
             Sxy = 0.0_rt;
-        } else if (areafrac < small) {
+        } else if (areafrac < sml) {
             areafrac = 0.0_rt;
             centx = 0.0_rt;
             centy = 0.0_rt;
@@ -377,9 +377,9 @@ int build_faces (Box const& bx, Array4<EBCellFlag> const& cell,
     int* dp = nmulticuts.data();
 
 #ifdef AMREX_USE_FLOAT
-    constexpr Real small = 1.e-5_rt;
+    constexpr Real sml = 1.e-5_rt;
 #else
-    constexpr Real small = 1.e-14;
+    constexpr Real sml = 1.e-14;
 #endif
     const Real dxinv = 1.0_rt/dx[0];
     const Real dyinv = 1.0_rt/dx[1];
@@ -464,7 +464,7 @@ int build_faces (Box const& bx, Array4<EBCellFlag> const& cell,
                 Gpu::Atomic::Add(dp,1);
             }
 
-            if ((ncuts > 2) || (lym <= small && lyp <= small && lzm <= small && lzp <= small)) {
+            if ((ncuts > 2) || (lym <= sml && lyp <= sml && lzm <= sml && lzp <= sml)) {
                 apx(i,j,k) = 0.0_rt;
                 fcx(i,j,k,0) = 0.0_rt;
                 fcx(i,j,k,1) = 0.0_rt;
@@ -572,7 +572,7 @@ int build_faces (Box const& bx, Array4<EBCellFlag> const& cell,
                 Gpu::Atomic::Add(dp,1);
             }
 
-            if ((ncuts > 2) || (lxm <= small && lxp <= small && lzm <= small && lzp <= small)) {
+            if ((ncuts > 2) || (lxm <= sml && lxp <= sml && lzm <= sml && lzp <= sml)) {
                 apy(i,j,k) = 0.0_rt;
                 fcy(i,j,k,0) = 0.0_rt;
                 fcy(i,j,k,1) = 0.0_rt;
@@ -680,7 +680,7 @@ int build_faces (Box const& bx, Array4<EBCellFlag> const& cell,
                 Gpu::Atomic::Add(dp,1);
             }
 
-            if ((ncuts > 2) || (lxm <= small && lxp <= small && lym <= small && lyp <= small)) {
+            if ((ncuts > 2) || (lxm <= sml && lxp <= sml && lym <= sml && lyp <= sml)) {
                 apz(i,j,k) = 0.0_rt;
                 fcz(i,j,k,0) = 0.0_rt;
                 fcz(i,j,k,1) = 0.0_rt;
diff --git a/Src/EB/AMReX_EBFluxRegister_3D_C.H b/Src/EB/AMReX_EBFluxRegister_3D_C.H
index 99d5ca53c25..dd2ace93fea 100644
--- a/Src/EB/AMReX_EBFluxRegister_3D_C.H
+++ b/Src/EB/AMReX_EBFluxRegister_3D_C.H
@@ -57,7 +57,7 @@ void eb_flux_reg_crseadd_va(int i, int j, int k, Array4<Real> const& d,
 
 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
 Real eb_flux_reg_cvol (int i, int j, int k, Array4<Real const> const& vfrac,
-                       Dim3 const& ratio, Real small) noexcept
+                       Dim3 const& ratio, Real sml) noexcept
 {
     Real cvol = Real(0.0);
     for         (int kk = k*ratio.z; kk < (k+1)*ratio.z; ++kk) {
@@ -67,7 +67,7 @@ Real eb_flux_reg_cvol (int i, int j, int k, Array4<Real const> const& vfrac,
             }
         }
     }
-    return (cvol > small) ? Real(1.0)/cvol : Real(0.0);
+    return (cvol > sml) ? Real(1.0)/cvol : Real(0.0);
 }
 
 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
diff --git a/Src/EB/AMReX_EB_STL_utils.cpp b/Src/EB/AMReX_EB_STL_utils.cpp
index e4aea5a1eb3..f8cbc4d5b50 100644
--- a/Src/EB/AMReX_EB_STL_utils.cpp
+++ b/Src/EB/AMReX_EB_STL_utils.cpp
@@ -544,13 +544,13 @@ STLtools::build_bvh (Triangle* begin, Triangle* end, Gpu::PinnedVector<Node>& bv
 #else
         constexpr Real eps = Real(1.e-10);
 #endif
-        Real small = eps*std::max({AMREX_D_DECL(bbox.length(0),
-                                                bbox.length(1),
-                                                bbox.length(2))});
+        Real sml = eps*std::max({AMREX_D_DECL(bbox.length(0),
+                                              bbox.length(1),
+                                              bbox.length(2))});
         // Make bounding box slightly bigger for robustness.
         for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
-            bbox.setLo(idim,bbox.lo(idim)-small);
-            bbox.setHi(idim,bbox.hi(idim)+small);
+            bbox.setLo(idim,bbox.lo(idim)-sml);
+            bbox.setHi(idim,bbox.hi(idim)+sml);
         }
         node.ntriangles = int(ntri); // NOLINT
         return;
diff --git a/Src/EB/AMReX_EB_Slopes_2D_K.H b/Src/EB/AMReX_EB_Slopes_2D_K.H
index 9854756050e..1df63d78db7 100644
--- a/Src/EB/AMReX_EB_Slopes_2D_K.H
+++ b/Src/EB/AMReX_EB_Slopes_2D_K.H
@@ -666,11 +666,11 @@ amrex_calc_alpha_stencil(amrex::Real q_hat, amrex::Real q_max,
     using namespace amrex::literals;
 
     auto alpha_temp = 0.0_rt;
-    auto small  = 1.0e-13_rt;
+    auto sml  = 1.0e-13_rt;
 
-    if ((q_hat-state) > small) {
+    if ((q_hat-state) > sml) {
         alpha_temp = amrex::min(1.0_rt,(q_max-state)/(q_hat-state));
-    } else if ((q_hat-state) < -small) {
+    } else if ((q_hat-state) < -sml) {
         alpha_temp = amrex::min(1.0_rt,(q_min-state)/(q_hat-state));
     } else {
         alpha_temp = 1.0_rt;
diff --git a/Src/EB/AMReX_EB_Slopes_3D_K.H b/Src/EB/AMReX_EB_Slopes_3D_K.H
index 46f89f4bfd1..5fc12838edb 100644
--- a/Src/EB/AMReX_EB_Slopes_3D_K.H
+++ b/Src/EB/AMReX_EB_Slopes_3D_K.H
@@ -808,11 +808,11 @@ amrex_calc_alpha_stencil(amrex::Real q_hat, amrex::Real q_max,
     using namespace amrex::literals;
 
     auto alpha_temp = amrex::Real(0.0);
-    auto small  = amrex::Real(1.0e-13);
+    auto sml  = amrex::Real(1.0e-13);
 
-    if ((q_hat-state) > small) {
+    if ((q_hat-state) > sml) {
         alpha_temp = amrex::min(1.0_rt,(q_max-state)/(q_hat-state));
-    } else if ((q_hat-state) < -small) {
+    } else if ((q_hat-state) < -sml) {
         alpha_temp = amrex::min(1.0_rt,(q_min-state)/(q_hat-state));
     } else {
         alpha_temp = 1.0_rt;
diff --git a/Src/EB/AMReX_EB_StateRedistSlopeLimiter_K.H b/Src/EB/AMReX_EB_StateRedistSlopeLimiter_K.H
index 90ce4a07016..e0c80f159b5 100644
--- a/Src/EB/AMReX_EB_StateRedistSlopeLimiter_K.H
+++ b/Src/EB/AMReX_EB_StateRedistSlopeLimiter_K.H
@@ -13,12 +13,12 @@ amrex_calc_alpha_stencil(Real q_hat, Real q_max, Real q_min, Real state) noexcep
     constexpr Real epsilon = 1.e-12;
 #endif
 
-    const Real small = epsilon*amrex::max(amrex::Math::abs(q_max),amrex::Math::abs(q_min));
+    const Real sml = epsilon*amrex::max(amrex::Math::abs(q_max),amrex::Math::abs(q_min));
     Real alpha;
 
-    if ((q_hat-state) > small) {
+    if ((q_hat-state) > sml) {
         alpha = amrex::min(1.0_rt,(q_max-state)/(q_hat-state));
-    } else if ((q_hat-state) < -small) {
+    } else if ((q_hat-state) < -sml) {
         alpha = amrex::min(1.0_rt,(q_min-state)/(q_hat-state));
     } else {
         alpha = 1.0_rt;
diff --git a/Src/LinearSolvers/AMReX_GMRES.H b/Src/LinearSolvers/AMReX_GMRES.H
index 415050fc5bb..1aeedccaaf6 100644
--- a/Src/LinearSolvers/AMReX_GMRES.H
+++ b/Src/LinearSolvers/AMReX_GMRES.H
@@ -305,8 +305,8 @@ void GMRES<V,M>::cycle (V& a_xx, int& a_status, int& a_itcount, RT& a_rnorm0)
 
         auto tt = m_linop->norm2(vv_it1);
 
-        auto const small = RT((sizeof(RT) == 8) ? 1.e-99 : 1.e-30);
-        bool happyend = (tt < small);
+        auto const sml = RT((sizeof(RT) == 8) ? 1.e-99 : 1.e-30);
+        bool happyend = (tt < sml);
         if (!happyend) {
             m_linop->scale(vv_it1, RT(1.0)/tt);
         }
diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H b/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H
index e46041a6f7e..d985c3d20d0 100644
--- a/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H
+++ b/Src/LinearSolvers/MLMG/AMReX_MLCellLinOp.H
@@ -1648,7 +1648,11 @@ MLCellLinOpT<MF>::prepareForSolve ()
             if (Gpu::inLaunchRegion()) {
 #ifdef AMREX_USE_EB
                 if (factory && !factory->isAllRegular()) {
+#if defined(AMREX_USE_CUDA) && defined(_WIN32)
+                    if           (!std::is_same<MF,MultiFab>()) {
+#else
                     if constexpr (!std::is_same<MF,MultiFab>()) {
+#endif
                         amrex::Abort("MLCellLinOp with EB only works with MultiFab");
                     } else {
                         Vector<MLMGPSEBTag<RT>> tags;
@@ -2011,7 +2015,11 @@ MLCellLinOpT<MF>::normInf (int amrlev, MF const& mf, bool local) const -> RT
 #ifdef AMREX_USE_EB
     const auto *factory = dynamic_cast<EBFArrayBoxFactory const*>(this->Factory(amrlev));
     if (factory && !factory->isAllRegular()) {
+#if defined(AMREX_USE_CUDA) && defined(_WIN32)
+        if           (!std::is_same<MF,MultiFab>()) {
+#else
         if constexpr (!std::is_same<MF,MultiFab>()) {
+#endif
             amrex::Abort("MLCellLinOpT with EB only works with MultiFab");
         } else {
             const MultiFab& vfrac = factory->getVolFrac();
diff --git a/Src/Particle/AMReX_ParticleUtil.H b/Src/Particle/AMReX_ParticleUtil.H
index aa061013d0e..b389001bc55 100644
--- a/Src/Particle/AMReX_ParticleUtil.H
+++ b/Src/Particle/AMReX_ParticleUtil.H
@@ -248,17 +248,17 @@ int getTileIndex (const IntVect& iv, const Box& box, const bool a_do_tiling,
                 thi = tlo + ts_right - 1;
             }
         };
-        const IntVect& small = box.smallEnd();
-        const IntVect& big   = box.bigEnd();
+        const IntVect& sml = box.smallEnd();
+        const IntVect& big = box.bigEnd();
         IntVect ntiles, ivIndex, tilelo, tilehi;
 
-        AMREX_D_TERM(int iv0 = amrex::min(amrex::max(iv[0], small[0]), big[0]);,
-                     int iv1 = amrex::min(amrex::max(iv[1], small[1]), big[1]);,
-                     int iv2 = amrex::min(amrex::max(iv[2], small[2]), big[2]););
+        AMREX_D_TERM(int iv0 = amrex::min(amrex::max(iv[0], sml[0]), big[0]);,
+                     int iv1 = amrex::min(amrex::max(iv[1], sml[1]), big[1]);,
+                     int iv2 = amrex::min(amrex::max(iv[2], sml[2]), big[2]););
 
-        AMREX_D_TERM(tiling_1d(iv0, small[0], big[0], a_tile_size[0], ntiles[0], ivIndex[0], tilelo[0], tilehi[0]);,
-                     tiling_1d(iv1, small[1], big[1], a_tile_size[1], ntiles[1], ivIndex[1], tilelo[1], tilehi[1]);,
-                     tiling_1d(iv2, small[2], big[2], a_tile_size[2], ntiles[2], ivIndex[2], tilelo[2], tilehi[2]););
+        AMREX_D_TERM(tiling_1d(iv0, sml[0], big[0], a_tile_size[0], ntiles[0], ivIndex[0], tilelo[0], tilehi[0]);,
+                     tiling_1d(iv1, sml[1], big[1], a_tile_size[1], ntiles[1], ivIndex[1], tilelo[1], tilehi[1]);,
+                     tiling_1d(iv2, sml[2], big[2], a_tile_size[2], ntiles[2], ivIndex[2], tilelo[2], tilehi[2]););
 
         tbx = Box(tilelo, tilehi);
 
@@ -280,13 +280,13 @@ int numTilesInBox (const Box& box, const bool a_do_tiling, const IntVect& a_tile
             ntile = amrex::max(ncells/tilesize, 1);
         };
 
-        const IntVect& small = box.smallEnd();
-        const IntVect& big   = box.bigEnd();
+        const IntVect& sml = box.smallEnd();
+        const IntVect& big = box.bigEnd();
         IntVect ntiles;
 
-        AMREX_D_TERM(tiling_1d(small[0], big[0], a_tile_size[0], ntiles[0]);,
-                     tiling_1d(small[1], big[1], a_tile_size[1], ntiles[1]);,
-                     tiling_1d(small[2], big[2], a_tile_size[2], ntiles[2]););
+        AMREX_D_TERM(tiling_1d(sml[0], big[0], a_tile_size[0], ntiles[0]);,
+                     tiling_1d(sml[1], big[1], a_tile_size[1], ntiles[1]);,
+                     tiling_1d(sml[2], big[2], a_tile_size[2], ntiles[2]););
 
         return AMREX_D_TERM(ntiles[0], *=ntiles[1], *=ntiles[2]);
     }
diff --git a/Tests/DeviceGlobal/CMakeLists.txt b/Tests/DeviceGlobal/CMakeLists.txt
index 990662d406b..ecc90373b13 100644
--- a/Tests/DeviceGlobal/CMakeLists.txt
+++ b/Tests/DeviceGlobal/CMakeLists.txt
@@ -4,6 +4,10 @@ if (( (AMReX_GPU_BACKEND STREQUAL "CUDA") OR
    return()
 endif ()
 
+if (WIN32)
+   return()
+endif()
+
 foreach(D IN LISTS AMReX_SPACEDIM)
     set(_sources     main.cpp global_vars.cpp init.cpp work.cpp)
     set(_input_files)
diff --git a/Tests/EB_CNS/Source/hydro/CNS_hydro_K.H b/Tests/EB_CNS/Source/hydro/CNS_hydro_K.H
index 86af431d2ee..810778d8da8 100644
--- a/Tests/EB_CNS/Source/hydro/CNS_hydro_K.H
+++ b/Tests/EB_CNS/Source/hydro/CNS_hydro_K.H
@@ -240,7 +240,7 @@ riemann (const amrex::Real gamma, const amrex::Real smallp, const amrex::Real /*
 #endif
 
     constexpr Real weakwv = Real(1.e-3);
-    constexpr Real small = Real(1.e-6);
+    constexpr Real sml = Real(1.e-6);
 
     Real clsql = gamma*pl*rl;
     Real clsqr = gamma*pr*rr;
@@ -248,7 +248,7 @@ riemann (const amrex::Real gamma, const amrex::Real smallp, const amrex::Real /*
     Real wr = std::sqrt(clsqr);
     Real cleft = wl/rl;
     Real cright = wr/rr;
-    Real ccsmall = small*(cleft+cright);
+    Real ccsmall = sml*(cleft+cright);
 
     Real pstar = (wl*pr + wr*pl - wr*wl*(ur-ul))/(wl+wr);
     pstar = amrex::max(pstar,smallp);
diff --git a/Tests/GPU/CNS/Source/hydro/CNS_hydro_K.H b/Tests/GPU/CNS/Source/hydro/CNS_hydro_K.H
index 2734976efcd..687b2a1255b 100644
--- a/Tests/GPU/CNS/Source/hydro/CNS_hydro_K.H
+++ b/Tests/GPU/CNS/Source/hydro/CNS_hydro_K.H
@@ -191,7 +191,7 @@ riemann (const amrex::Real gamma, const amrex::Real smallp, const amrex::Real /*
     using amrex::Real;
 
     constexpr Real weakwv = Real(1.e-3);
-    constexpr Real small = Real(1.e-6);
+    constexpr Real sml = Real(1.e-6);
 
     Real clsql = gamma*pl*rl;
     Real clsqr = gamma*pr*rr;
@@ -199,7 +199,7 @@ riemann (const amrex::Real gamma, const amrex::Real smallp, const amrex::Real /*
     Real wr = std::sqrt(clsqr);
     Real cleft = wl/rl;
     Real cright = wr/rr;
-    Real ccsmall = small*(cleft+cright);
+    Real ccsmall = sml*(cleft+cright);
 
     Real pstar = (wl*pr + wr*pl - wr*wl*(ur-ul))/(wl+wr);
     pstar = amrex::max(pstar,smallp);
diff --git a/Tools/CMake/AMReXOptions.cmake b/Tools/CMake/AMReXOptions.cmake
index a7863f125e3..ef55a1053ca 100644
--- a/Tools/CMake/AMReXOptions.cmake
+++ b/Tools/CMake/AMReXOptions.cmake
@@ -482,7 +482,7 @@ option(AMReX_DIFFERENT_COMPILER
    "Allow an application to use a different compiler than the one used to build AMReX" OFF)
 print_option(AMReX_DIFFERENT_COMPILER)
 
-if (AMReX_BUILD_SHARED_LIBS AND NOT (CMAKE_SYSTEM_NAME STREQUAL "Linux") )
+if ( NOT (CMAKE_SYSTEM_NAME STREQUAL "Linux") )
    option(AMReX_PROBINIT "Enable support for probin file" OFF)
 else ()
    cmake_dependent_option(AMReX_PROBINIT "Enable support for probin file" ON