Skip to content

Commit

Permalink
more fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
WeiqunZhang committed Jan 24, 2025
1 parent d9253b6 commit d1c8857
Show file tree
Hide file tree
Showing 7 changed files with 101 additions and 75 deletions.
11 changes: 11 additions & 0 deletions Src/Base/AMReX_GpuLaunch.nolint.H
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,16 @@
); \
}

#if defined(AMREX_USE_CUDA) && defined(_WIN32)
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_FLAG(where_to_run,box,tbox,block) \
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
{ \
amrex::launch(box, tbox, [=] AMREX_GPU_DEVICE () { block }); \
} else { \
auto tbox = box; \
block; \
}
#else
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_FLAG(where_to_run,box,tbox,block) \
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
{ \
Expand All @@ -131,6 +141,7 @@
auto tbox = box; \
block; \
}
#endif

#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_FLAG(where_to_run,bx1,tbx1,block1) \
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
Expand Down
4 changes: 4 additions & 0 deletions Src/FFT/AMReX_FFT_R2C.H
Original file line number Diff line number Diff line change
Expand Up @@ -672,7 +672,11 @@ void R2C<T,D,S>::post_forward_doit (F const& post_forward)
amrex::Abort("xxxxx todo: post_forward");
#if (AMREX_SPACEDIM > 1)
} else if (m_r2c_sub) {
#if defined(AMREX_USE_CUDA) && defined(_WIN32)
if (Depth == 0) {
#else
if constexpr (Depth == 0) {
#endif
// We need to pass the originally ordered indices to post_forward.
#if (AMREX_SPACEDIM == 2)
// The original domain is (1,ny). The sub domain is (ny,1).
Expand Down
4 changes: 4 additions & 0 deletions Src/FFT/AMReX_FFT_R2X.H
Original file line number Diff line number Diff line change
Expand Up @@ -565,7 +565,11 @@ void R2X<T>::forwardThenBackward_doit (MF const& inmf, MF& outmf,
BL_PROFILE("FFT::R2X::forwardbackward");

if (m_r2x_sub) {
#if defined(AMREX_USE_CUDA) && defined(_WIN32)
if (Depth == 0)
#else
if constexpr (Depth == 0)
#endif
{
bool inmf_safe = m_sub_helper.ghost_safe(inmf.nGrowVect());
MF inmf_sub, inmf_tmp;
Expand Down
43 changes: 25 additions & 18 deletions Src/LinearSolvers/MLMG/AMReX_MLABecLaplacian.H
Original file line number Diff line number Diff line change
Expand Up @@ -673,18 +673,25 @@ MLABecLaplacianT<MF>::averageDownCoeffsSameAmrLevel (int amrlev, Vector<MF>& a,
auto const& bz = b[mglev][2].array(mfi));
auto const& osm = this->m_overset_mask[amrlev][mglev]->const_array(mfi);
#if defined(AMREX_USE_CUDA) && defined(_WIN32)
amrex::launch(xbx, [=] AMREX_GPU_DEVICE (Box const& t_xbx)
{
overset_rescale_bcoef_x(t_xbx, bx, osm, ncomp, osfac);
});
amrex::launch(ybx, [=] AMREX_GPU_DEVICE (Box const& t_ybx)
{
overset_rescale_bcoef_y(t_ybx, by, osm, ncomp, osfac);
});
amrex::launch(zbx, [=] AMREX_GPU_DEVICE (Box const& t_zbx)
{
overset_rescale_bcoef_z(t_zbx, bz, osm, ncomp, osfac);
});
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE
(xbx, t_xbx,
{
overset_rescale_bcoef_x(t_xbx, bx, osm, ncomp, osfac);
});
#if (AMREX_SPACEDIM >= 2)
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE
(ybx, t_ybx,
{
overset_rescale_bcoef_y(t_ybx, by, osm, ncomp, osfac);
});
#endif
#if (AMREX_SPACEDIM == 3)
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE
(zbx, t_zbx,
{
overset_rescale_bcoef_z(t_zbx, bz, osm, ncomp, osfac);
});
#endif
#else
AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM
(xbx, t_xbx,
Expand Down Expand Up @@ -1179,15 +1186,15 @@ MLABecLaplacianT<MF>::FFlux (Box const& box, Real const* dxinv, RT bscalar,
RT fac = bscalar*static_cast<RT>(dxinv[0]);
Box blo = amrex::bdryLo(box, 0);
int blen = box.length(0);
AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( blo, tbox,
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE ( blo, tbox,
{
mlabeclap_flux_xface(tbox, fxarr, solarr, bx, fac, blen, ncomp);
});
#if (AMREX_SPACEDIM >= 2)
fac = bscalar*static_cast<RT>(dxinv[1]);
blo = amrex::bdryLo(box, 1);
blen = box.length(1);
AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( blo, tbox,
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE ( blo, tbox,
{
mlabeclap_flux_yface(tbox, fyarr, solarr, by, fac, blen, ncomp);
});
Expand All @@ -1196,7 +1203,7 @@ MLABecLaplacianT<MF>::FFlux (Box const& box, Real const* dxinv, RT bscalar,
fac = bscalar*static_cast<RT>(dxinv[2]);
blo = amrex::bdryLo(box, 2);
blen = box.length(2);
AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( blo, tbox,
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE ( blo, tbox,
{
mlabeclap_flux_zface(tbox, fzarr, solarr, bz, fac, blen, ncomp);
});
Expand All @@ -1206,22 +1213,22 @@ MLABecLaplacianT<MF>::FFlux (Box const& box, Real const* dxinv, RT bscalar,
{
RT fac = bscalar*static_cast<RT>(dxinv[0]);
Box bflux = amrex::surroundingNodes(box, 0);
AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( bflux, tbox,
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE ( bflux, tbox,
{
mlabeclap_flux_x(tbox, fxarr, solarr, bx, fac, ncomp);
});
#if (AMREX_SPACEDIM >= 2)
fac = bscalar*static_cast<RT>(dxinv[1]);
bflux = amrex::surroundingNodes(box, 1);
AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( bflux, tbox,
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE ( bflux, tbox,
{
mlabeclap_flux_y(tbox, fyarr, solarr, by, fac, ncomp);
});
#endif
#if (AMREX_SPACEDIM == 3)
fac = bscalar*static_cast<RT>(dxinv[2]);
bflux = amrex::surroundingNodes(box, 2);
AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( bflux, tbox,
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE ( bflux, tbox,
{
mlabeclap_flux_z(tbox, fzarr, solarr, bz, fac, ncomp);
});
Expand Down
64 changes: 32 additions & 32 deletions Src/LinearSolvers/MLMG/AMReX_MLALaplacian.H
Original file line number Diff line number Diff line change
Expand Up @@ -302,12 +302,12 @@ MLALaplacianT<MF>::Fapply (int amrlev, int mglev, MF& out, const MF& in) const

#if (AMREX_SPACEDIM != 3)
if (this->m_has_metric_term) {
AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( bx, tbx,
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE ( bx, tbx,
{
mlalap_adotx_m(tbx, yfab, xfab, afab, dxinv, ascalar, bscalar, dx, probxlo, ncomp);
});
} else {
AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( bx, tbx,
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE ( bx, tbx,
{
mlalap_adotx(tbx, yfab, xfab, afab, dxinv, ascalar, bscalar, ncomp);
});
Expand All @@ -318,12 +318,12 @@ MLALaplacianT<MF>::Fapply (int amrlev, int mglev, MF& out, const MF& in) const
const auto& xfab2d = this->compactify(xfab);
const auto& yfab2d = this->compactify(yfab);
const auto& afab2d = this->compactify(afab);
AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( bx2d, tbx2d,
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE ( bx2d, tbx2d,
{
TwoD::mlalap_adotx(tbx2d, yfab2d, xfab2d, afab2d, dhinv, ascalar, bscalar, ncomp);
});
} else {
AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( bx, tbx,
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE ( bx, tbx,
{
mlalap_adotx(tbx, yfab, xfab, afab, dxinv, ascalar, bscalar, ncomp);
});
Expand Down Expand Up @@ -370,12 +370,12 @@ MLALaplacianT<MF>::normalize (int amrlev, int mglev, MF& mf) const

#if (AMREX_SPACEDIM != 3)
if (this->m_has_metric_term) {
AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( bx, tbx,
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE ( bx, tbx,
{
mlalap_normalize_m(tbx, fab, afab, dxinv, ascalar, bscalar, dx, probxlo, ncomp);
});
} else {
AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( bx, tbx,
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE ( bx, tbx,
{
mlalap_normalize(tbx, fab, afab, dxinv, ascalar, bscalar, ncomp);
});
Expand All @@ -385,12 +385,12 @@ MLALaplacianT<MF>::normalize (int amrlev, int mglev, MF& mf) const
Box const& bx2d = this->compactify(bx);
const auto& fab2d = this->compactify(fab);
const auto& afab2d = this->compactify(afab);
AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( bx2d, tbx2d,
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE ( bx2d, tbx2d,
{
TwoD::mlalap_normalize(tbx2d, fab2d, afab2d, dhinv, ascalar, bscalar, ncomp);
});
} else {
AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( bx, tbx,
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE ( bx, tbx,
{
mlalap_normalize(tbx, fab, afab, dxinv, ascalar, bscalar, ncomp);
});
Expand Down Expand Up @@ -490,7 +490,7 @@ MLALaplacianT<MF>::Fsmooth (int amrlev, int mglev, MF& sol, const MF& rhs, int r

#if (AMREX_SPACEDIM == 1)
if (this->m_has_metric_term) {
AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( tbx, thread_box,
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE ( tbx, thread_box,
{
mlalap_gsrb_m(thread_box, solnfab, rhsfab, alpha, dhx,
afab,
Expand All @@ -500,7 +500,7 @@ MLALaplacianT<MF>::Fsmooth (int amrlev, int mglev, MF& sol, const MF& rhs, int r
dx, probxlo, ncomp);
});
} else {
AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( tbx, thread_box,
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE ( tbx, thread_box,
{
mlalap_gsrb(thread_box, solnfab, rhsfab, alpha, dhx,
afab,
Expand All @@ -514,7 +514,7 @@ MLALaplacianT<MF>::Fsmooth (int amrlev, int mglev, MF& sol, const MF& rhs, int r

#if (AMREX_SPACEDIM == 2)
if (this->m_has_metric_term) {
AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( tbx, thread_box,
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE ( tbx, thread_box,
{
mlalap_gsrb_m(thread_box, solnfab, rhsfab, alpha, dhx, dhy,
afab,
Expand All @@ -526,7 +526,7 @@ MLALaplacianT<MF>::Fsmooth (int amrlev, int mglev, MF& sol, const MF& rhs, int r
dx, probxlo, ncomp);
});
} else {
AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( tbx, thread_box,
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE ( tbx, thread_box,
{
mlalap_gsrb(thread_box, solnfab, rhsfab, alpha, dhx, dhy,
afab,
Expand Down Expand Up @@ -554,7 +554,7 @@ MLALaplacianT<MF>::Fsmooth (int amrlev, int mglev, MF& sol, const MF& rhs, int r
const auto& m1_2d = this->compactify(this->get_d1(m0,m1,m2));
const auto& m2_2d = this->compactify(this->get_d0(m3,m4,m5));
const auto& m3_2d = this->compactify(this->get_d1(m3,m4,m5));
AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( tbx_2d, thread_box,
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE ( tbx_2d, thread_box,
{
TwoD::mlalap_gsrb(thread_box, solnfab_2d, rhsfab_2d, alpha, dh0, dh1,
afab_2d,
Expand All @@ -565,7 +565,7 @@ MLALaplacianT<MF>::Fsmooth (int amrlev, int mglev, MF& sol, const MF& rhs, int r
vbx_2d, redblack, ncomp);
});
} else {
AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( tbx, thread_box,
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE ( tbx, thread_box,
{
mlalap_gsrb(thread_box, solnfab, rhsfab, alpha, dhx, dhy, dhz,
afab,
Expand Down Expand Up @@ -611,7 +611,7 @@ MLALaplacianT<MF>::FFlux (int amrlev, const MFIter& mfi,
RT fac = m_b_scalar * RT(dxinv[0]);
Box blo = amrex::bdryLo(box, 0);
int blen = box.length(0);
AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( blo, tbox,
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE ( blo, tbox,
{
mlalap_flux_xface(tbox, fxarr, solarr, fac, blen, ncomp);
});
Expand All @@ -622,7 +622,7 @@ MLALaplacianT<MF>::FFlux (int amrlev, const MFIter& mfi,
RT fac = m_b_scalar * RT(dxinv[1]);
Box blo = amrex::bdryLo(box, 1);
int blen = box.length(1);
AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( blo, tbox,
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE ( blo, tbox,
{
mlalap_flux_yface(tbox, fyarr, solarr, fac, blen, ncomp);
});
Expand All @@ -633,7 +633,7 @@ MLALaplacianT<MF>::FFlux (int amrlev, const MFIter& mfi,
RT fac = m_b_scalar * RT(dxinv[2]);
Box blo = amrex::bdryLo(box, 2);
int blen = box.length(2);
AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( blo, tbox,
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE ( blo, tbox,
{
mlalap_flux_zface(tbox, fzarr, solarr, fac, blen, ncomp);
});
Expand All @@ -644,7 +644,7 @@ MLALaplacianT<MF>::FFlux (int amrlev, const MFIter& mfi,
if (this->hiddenDirection() != 0) {
RT fac = m_b_scalar * RT(dxinv[0]);
Box bflux = amrex::surroundingNodes(box, 0);
AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( bflux, tbox,
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE ( bflux, tbox,
{
mlalap_flux_x(tbox, fxarr, solarr, fac, ncomp);
});
Expand All @@ -654,7 +654,7 @@ MLALaplacianT<MF>::FFlux (int amrlev, const MFIter& mfi,
if (this->hiddenDirection() != 1) {
RT fac = m_b_scalar * RT(dxinv[1]);
Box bflux = amrex::surroundingNodes(box, 1);
AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( bflux, tbox,
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE ( bflux, tbox,
{
mlalap_flux_y(tbox, fyarr, solarr, fac, ncomp);
});
Expand All @@ -664,7 +664,7 @@ MLALaplacianT<MF>::FFlux (int amrlev, const MFIter& mfi,
if (this->hiddenDirection() != 2) {
RT fac = m_b_scalar * RT(dxinv[2]);
Box bflux = amrex::surroundingNodes(box, 2);
AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( bflux, tbox,
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE ( bflux, tbox,
{
mlalap_flux_z(tbox, fzarr, solarr, fac, ncomp);
});
Expand All @@ -679,12 +679,12 @@ MLALaplacianT<MF>::FFlux (int amrlev, const MFIter& mfi,
Box blo = amrex::bdryLo(box, 0);
int blen = box.length(0);
if (this->m_has_metric_term) {
AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( blo, tbox,
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE ( blo, tbox,
{
mlalap_flux_xface_m(tbox, fxarr, solarr, fac, blen, dx, probxlo, ncomp);
});
} else {
AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( blo, tbox,
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE ( blo, tbox,
{
mlalap_flux_xface(tbox, fxarr, solarr, fac, blen, ncomp);
});
Expand All @@ -697,12 +697,12 @@ MLALaplacianT<MF>::FFlux (int amrlev, const MFIter& mfi,
Box blo = amrex::bdryLo(box, 1);
int blen = box.length(1);
if (this->m_has_metric_term) {
AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( blo, tbox,
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE ( blo, tbox,
{
mlalap_flux_yface_m(tbox, fyarr, solarr, fac, blen, dx, probxlo, ncomp);
});
} else {
AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( blo, tbox,
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE ( blo, tbox,
{
mlalap_flux_yface(tbox, fyarr, solarr, fac, blen, ncomp);
});
Expand All @@ -715,12 +715,12 @@ MLALaplacianT<MF>::FFlux (int amrlev, const MFIter& mfi,
RT fac = m_b_scalar * RT(dxinv[0]);
Box bflux = amrex::surroundingNodes(box, 0);
if (this->m_has_metric_term) {
AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( bflux, tbox,
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE ( bflux, tbox,
{
mlalap_flux_x_m(tbox, fxarr, solarr, fac, dx, probxlo, ncomp);
});
} else {
AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( bflux, tbox,
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE ( bflux, tbox,
{
mlalap_flux_x(tbox, fxarr, solarr, fac, ncomp);
});
Expand All @@ -732,12 +732,12 @@ MLALaplacianT<MF>::FFlux (int amrlev, const MFIter& mfi,
RT fac = m_b_scalar * RT(dxinv[1]);
Box bflux = amrex::surroundingNodes(box, 1);
if (this->m_has_metric_term) {
AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( bflux, tbox,
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE ( bflux, tbox,
{
mlalap_flux_y_m(tbox, fyarr, solarr, fac, dx, probxlo, ncomp);
});
} else {
AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( bflux, tbox,
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE ( bflux, tbox,
{
mlalap_flux_y(tbox, fyarr, solarr, fac, ncomp);
});
Expand All @@ -752,12 +752,12 @@ MLALaplacianT<MF>::FFlux (int amrlev, const MFIter& mfi,
Box blo = amrex::bdryLo(box, 0);
int blen = box.length(0);
if (this->m_has_metric_term) {
AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( blo, tbox,
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE ( blo, tbox,
{
mlalap_flux_xface_m(tbox, fxarr, solarr, fac, blen, dx, probxlo, ncomp);
});
} else {
AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( blo, tbox,
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE ( blo, tbox,
{
mlalap_flux_xface(tbox, fxarr, solarr, fac, blen, ncomp);
});
Expand All @@ -766,12 +766,12 @@ MLALaplacianT<MF>::FFlux (int amrlev, const MFIter& mfi,
RT fac = m_b_scalar * RT(dxinv[0]);
Box bflux = amrex::surroundingNodes(box, 0);
if (this->m_has_metric_term) {
AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( bflux, tbox,
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE ( bflux, tbox,
{
mlalap_flux_x_m(tbox, fxarr, solarr, fac, dx, probxlo, ncomp);
});
} else {
AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( bflux, tbox,
AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE ( bflux, tbox,
{
mlalap_flux_x(tbox, fxarr, solarr, fac, ncomp);
});
Expand Down
Loading

0 comments on commit d1c8857

Please sign in to comment.