Skip to content

Commit

Permalink
more fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
WeiqunZhang committed Jan 24, 2025
1 parent d9253b6 commit de90063
Show file tree
Hide file tree
Showing 7 changed files with 102 additions and 76 deletions.
11 changes: 11 additions & 0 deletions Src/Base/AMReX_GpuLaunch.nolint.H
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,16 @@
); \
}

#if defined(AMREX_USE_CUDA) && defined(_WIN32)
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_FLAG(where_to_run,box,tbox,block) \
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
{ \
amrex::launch(box, tbox, [=] AMREX_GPU_DEVICE () { block }); \
} else { \
auto tbox = box; \
block; \
}
#else
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_FLAG(where_to_run,box,tbox,block) \
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
{ \
Expand All @@ -131,6 +141,7 @@
auto tbox = box; \
block; \
}
#endif

#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_FLAG(where_to_run,bx1,tbx1,block1) \
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
Expand Down
4 changes: 4 additions & 0 deletions Src/FFT/AMReX_FFT_R2C.H
Original file line number Diff line number Diff line change
Expand Up @@ -672,7 +672,11 @@ void R2C<T,D,S>::post_forward_doit (F const& post_forward)
amrex::Abort("xxxxx todo: post_forward");
#if (AMREX_SPACEDIM > 1)
} else if (m_r2c_sub) {
#if defined(AMREX_USE_CUDA) && defined(_WIN32)
if (Depth == 0) {
#else
if constexpr (Depth == 0) {
#endif
// We need to pass the originally ordered indices to post_forward.
#if (AMREX_SPACEDIM == 2)
// The original domain is (1,ny). The sub domain is (ny,1).
Expand Down
4 changes: 4 additions & 0 deletions Src/FFT/AMReX_FFT_R2X.H
Original file line number Diff line number Diff line change
Expand Up @@ -565,7 +565,11 @@ void R2X<T>::forwardThenBackward_doit (MF const& inmf, MF& outmf,
BL_PROFILE("FFT::R2X::forwardbackward");

if (m_r2x_sub) {
#if defined(AMREX_USE_CUDA) && defined(_WIN32)
if (Depth == 0)
#else
if constexpr (Depth == 0)
#endif
{
bool inmf_safe = m_sub_helper.ghost_safe(inmf.nGrowVect());
MF inmf_sub, inmf_tmp;
Expand Down
45 changes: 26 additions & 19 deletions Src/LinearSolvers/MLMG/AMReX_MLABecLaplacian.H
Original file line number Diff line number Diff line change
Expand Up @@ -673,20 +673,27 @@ MLABecLaplacianT<MF>::averageDownCoeffsSameAmrLevel (int amrlev, Vector<MF>& a,
auto const& bz = b[mglev][2].array(mfi));
auto const& osm = this->m_overset_mask[amrlev][mglev]->const_array(mfi);
#if defined(AMREX_USE_CUDA) && defined(_WIN32)
amrex::launch(xbx, [=] AMREX_GPU_DEVICE (Box const& t_xbx)
{
overset_rescale_bcoef_x(t_xbx, bx, osm, ncomp, osfac);
});
amrex::launch(ybx, [=] AMREX_GPU_DEVICE (Box const& t_ybx)
{
overset_rescale_bcoef_y(t_ybx, by, osm, ncomp, osfac);
});
amrex::launch(zbx, [=] AMREX_GPU_DEVICE (Box const& t_zbx)
{
overset_rescale_bcoef_z(t_zbx, bz, osm, ncomp, osfac);
});
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE
(xbx, t_xbx,
{
overset_rescale_bcoef_x(t_xbx, bx, osm, ncomp, osfac);
});
#if (AMREX_SPACEDIM >= 2)
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE
(ybx, t_ybx,
{
overset_rescale_bcoef_y(t_ybx, by, osm, ncomp, osfac);
});
#endif
#if (AMREX_SPACEDIM == 3)
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE
(zbx, t_zbx,
{
overset_rescale_bcoef_z(t_zbx, bz, osm, ncomp, osfac);
});
#endif
#else
AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_DIM
(xbx, t_xbx,
{
overset_rescale_bcoef_x(t_xbx, bx, osm, ncomp, osfac);
Expand Down Expand Up @@ -1179,15 +1186,15 @@ MLABecLaplacianT<MF>::FFlux (Box const& box, Real const* dxinv, RT bscalar,
RT fac = bscalar*static_cast<RT>(dxinv[0]);
Box blo = amrex::bdryLo(box, 0);
int blen = box.length(0);
AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( blo, tbox,
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE ( blo, tbox,
{
mlabeclap_flux_xface(tbox, fxarr, solarr, bx, fac, blen, ncomp);
});
#if (AMREX_SPACEDIM >= 2)
fac = bscalar*static_cast<RT>(dxinv[1]);
blo = amrex::bdryLo(box, 1);
blen = box.length(1);
AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( blo, tbox,
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE ( blo, tbox,
{
mlabeclap_flux_yface(tbox, fyarr, solarr, by, fac, blen, ncomp);
});
Expand All @@ -1196,7 +1203,7 @@ MLABecLaplacianT<MF>::FFlux (Box const& box, Real const* dxinv, RT bscalar,
fac = bscalar*static_cast<RT>(dxinv[2]);
blo = amrex::bdryLo(box, 2);
blen = box.length(2);
AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( blo, tbox,
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE ( blo, tbox,
{
mlabeclap_flux_zface(tbox, fzarr, solarr, bz, fac, blen, ncomp);
});
Expand All @@ -1206,22 +1213,22 @@ MLABecLaplacianT<MF>::FFlux (Box const& box, Real const* dxinv, RT bscalar,
{
RT fac = bscalar*static_cast<RT>(dxinv[0]);
Box bflux = amrex::surroundingNodes(box, 0);
AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( bflux, tbox,
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE ( bflux, tbox,
{
mlabeclap_flux_x(tbox, fxarr, solarr, bx, fac, ncomp);
});
#if (AMREX_SPACEDIM >= 2)
fac = bscalar*static_cast<RT>(dxinv[1]);
bflux = amrex::surroundingNodes(box, 1);
AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( bflux, tbox,
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE ( bflux, tbox,
{
mlabeclap_flux_y(tbox, fyarr, solarr, by, fac, ncomp);
});
#endif
#if (AMREX_SPACEDIM == 3)
fac = bscalar*static_cast<RT>(dxinv[2]);
bflux = amrex::surroundingNodes(box, 2);
AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( bflux, tbox,
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE ( bflux, tbox,
{
mlabeclap_flux_z(tbox, fzarr, solarr, bz, fac, ncomp);
});
Expand Down
Loading

0 comments on commit de90063

Please sign in to comment.