7SpadesParticleContainer<NType, NStructReal, NStructInt, NArrayReal, NArrayInt>::
8 SpadesParticleContainer(amrex::AmrParGDB* par_gdb,
int ngrow)
9 :
amrex::NeighborParticleContainer<
13 NArrayInt>(par_gdb, ngrow)
16 const int nlevs_max = par_gdb->maxLevel() + 1;
20 "spades::SPADES::SpadesParticleContainer::"
21 "SpadesParticleContainer(): not supporting multilevel right "
32SpadesParticleContainer<NType, NStructReal, NStructInt, NArrayReal, NArrayInt>::
33 SpadesParticleContainer(
34 const amrex::Vector<amrex::Geometry>& geom,
35 const amrex::Vector<amrex::DistributionMapping>& dmap,
36 const amrex::Vector<amrex::BoxArray>& ba,
38 :
amrex::NeighborParticleContainer<
42 NArrayInt>(geom, dmap, ba, {2}, ngrow)
45 if (geom.size() > 1) {
47 "spades::SPADES::SpadesParticleContainer::"
48 "SpadesParticleContainer(): not supporting multilevel right "
59void SpadesParticleContainer<
64 NArrayInt>::initialize_state()
66 BL_PROFILE(
"spades::SpadesParticleContainer::initialize_state()");
69 this->ParticleBoxArray(LEV), this->ParticleDistributionMap(LEV), NType,
70 m_ngrow, amrex::MFInfo());
73 this->ParticleBoxArray(LEV), this->ParticleDistributionMap(LEV), NType,
74 m_ngrow, amrex::MFInfo());
79 if (m_sort_type ==
"encoded") {
80 m_min_timestamp.define(
81 this->ParticleBoxArray(LEV), this->ParticleDistributionMap(LEV),
82 NType, m_ngrow, amrex::MFInfo());
83 m_max_timestamp.define(
84 this->ParticleBoxArray(LEV), this->ParticleDistributionMap(LEV),
85 NType, m_ngrow, amrex::MFInfo());
87 m_min_timestamp.setVal(constants::LARGE_NUM);
88 m_min_timestamp.setVal(0.0, NType - 1, 1);
89 m_max_timestamp.setVal(0.0);
99void SpadesParticleContainer<
104 NArrayInt>::clear_state()
106 BL_PROFILE(
"spades::SpadesParticleContainer::clear_state()");
117void SpadesParticleContainer<
122 NArrayInt>::count_particles()
124 BL_PROFILE(
"spades::SpadesParticleContainer::count_particles()");
129#pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
131 for (amrex::MFIter mfi = this->MakeMFIter(LEV); mfi.isValid(); ++mfi) {
133 const amrex::Box& box = mfi.tilebox();
134 const auto& cnt_arr = m_counts.array(mfi);
135 const auto index = std::make_pair(mfi.index(), mfi.LocalTileIndex());
136 auto& pti = this->GetParticles(LEV)[index];
137 const auto parrs = particle_arrays(pti);
138 const int np = pti.numParticles();
140 amrex::ParallelFor(np, [=] AMREX_GPU_DEVICE(
long pidx)
noexcept {
141 const amrex::IntVect iv(AMREX_D_DECL(
142 parrs.m_idata[CommonIntData::i][pidx],
143 parrs.m_idata[CommonIntData::j][pidx],
144 parrs.m_idata[CommonIntData::k][pidx]));
146 if (box.contains(iv)) {
147 amrex::Gpu::Atomic::AddNoRet(
148 &cnt_arr(iv, parrs.m_idata[CommonIntData::type_id][pidx]),
161void SpadesParticleContainer<
166 NArrayInt>::count_offsets()
168 BL_PROFILE(
"spades::SpadesParticleContainer::count_offsets()");
173#pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
175 for (amrex::MFIter mfi = this->MakeMFIter(LEV); mfi.isValid(); ++mfi) {
176 const amrex::Box& box = mfi.tilebox();
177 const auto ncell = box.numPts();
178 const auto& cnt_arr = m_counts.const_array(mfi);
179 const auto& offsets_arr = m_offsets.array(mfi);
180 int* p_offsets = offsets_arr.dataPtr();
181 amrex::Scan::PrefixSum<int>(
183 [=] AMREX_GPU_DEVICE(
int i) ->
int {
184 const auto iv = box.atOffset(i);
185 int total_particles = 0;
186 for (
int typ = 0; typ < NType; typ++) {
187 total_particles += cnt_arr(iv, typ);
189 return total_particles;
191 [=] AMREX_GPU_DEVICE(
int i,
const int& xi) { p_offsets[i] = xi; },
192 amrex::Scan::Type::exclusive, amrex::Scan::noRetSum);
195 box, [=] AMREX_GPU_DEVICE(
196 int i,
int j,
int AMREX_D_PICK(, , k))
noexcept {
197 const amrex::IntVect iv(AMREX_D_DECL(i, j, k));
198 for (
int typ = 1; typ < NType; typ++) {
199 offsets_arr(iv, typ) =
200 offsets_arr(iv, typ - 1) + cnt_arr(iv, typ - 1);
212void SpadesParticleContainer<
217 NArrayInt>::update_counts()
219 BL_PROFILE(
"spades::SpadesParticleContainer::update_counts()");
230void SpadesParticleContainer<
235 NArrayInt>::check_sort(
const amrex::MFIter& mfi)
237 BL_PROFILE(
"spades::SpadesParticleContainer::check_sort()");
238 const auto index = std::make_pair(mfi.index(), mfi.LocalTileIndex());
239 auto& pti = this->GetParticles(LEV)[index];
240 const size_t np = pti.numParticles();
241 const auto parrs = particle_arrays(pti);
243 amrex::ParallelFor(np, [=] AMREX_GPU_DEVICE(
long pidx)
noexcept {
245 const auto pm = pidx - 1;
246 const amrex::IntVect pivm(AMREX_D_DECL(
247 parrs.m_idata[CommonIntData::i][pm],
248 parrs.m_idata[CommonIntData::j][pm],
249 parrs.m_idata[CommonIntData::k][pm]));
250 const amrex::IntVect piv(AMREX_D_DECL(
251 parrs.m_idata[CommonIntData::i][pidx],
252 parrs.m_idata[CommonIntData::j][pidx],
253 parrs.m_idata[CommonIntData::k][pidx]));
254 AMREX_ASSERT(pivm <= piv);
256 if (parrs.m_idata[CommonIntData::type_id][pm] >
257 parrs.m_idata[CommonIntData::type_id][pidx]) {
259 "Unsorted type %d > %d\n",
260 parrs.m_idata[CommonIntData::type_id][pm],
261 parrs.m_idata[CommonIntData::type_id][pidx]);
264 parrs.m_idata[CommonIntData::type_id][pm] <=
265 parrs.m_idata[CommonIntData::type_id][pidx]);
267 if ((parrs.m_idata[CommonIntData::type_id][pm] ==
268 parrs.m_idata[CommonIntData::type_id][pidx]) &&
270 if (parrs.m_rdata[CommonRealData::timestamp][pm] >
271 parrs.m_rdata[CommonRealData::timestamp][pidx]) {
273 "Unsorted time stamp %.16f > %.16f in ivm: (%d, %d) of "
275 "%d, iv: (%d,%d) of type %d\n",
276 parrs.m_rdata[CommonRealData::timestamp][pm],
277 parrs.m_rdata[CommonRealData::timestamp][pidx],
278 parrs.m_idata[CommonIntData::i][pm],
279 parrs.m_idata[CommonIntData::j][pm],
280 parrs.m_idata[CommonIntData::type_id][pm],
281 parrs.m_idata[CommonIntData::i][pidx],
282 parrs.m_idata[CommonIntData::j][pidx],
283 parrs.m_idata[CommonIntData::type_id][pidx]);
286 parrs.m_rdata[CommonRealData::timestamp][pm] <=
287 parrs.m_rdata[CommonRealData::timestamp][pidx]);
291 amrex::Gpu::streamSynchronize();
300template <
typename CompareFunc>
301void SpadesParticleContainer<
306 NArrayInt>::sort_impl(
const CompareFunc& compare)
308 BL_PROFILE(
"spades::SpadesParticleContainer::sort_impl()");
309 if (m_sort_type ==
"nonencoded") {
310 nonencoded_sort_impl(compare);
311 }
else if (m_sort_type ==
"encoded") {
314 amrex::Abort(
"Invalid sort type. Must be nonencoded or encoded");
325template <
typename CompareFunc>
331 NArrayInt>::nonencoded_sort_impl(
const CompareFunc& compare)
334 BL_PROFILE(
"spades::SpadesParticleContainer::nonencoded_sort_impl()");
337#pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
339 for (amrex::MFIter mfi = this->MakeMFIter(LEV); mfi.isValid(); ++mfi) {
340 const auto index = std::make_pair(mfi.index(), mfi.LocalTileIndex());
341 auto& pti = this->GetParticles(LEV)[index];
342 const size_t np = pti.numParticles();
349 "spades::SpadesParticleContainer::nonencoded_sort::sort_prep",
351 amrex::Gpu::DeviceVector<amrex::Long> cell_list(np);
352 auto* p_cell_list = cell_list.data();
353 amrex::ParallelFor(np, [=] AMREX_GPU_DEVICE(
long pidx)
noexcept {
354 p_cell_list[pidx] = pidx;
356 amrex::Gpu::streamSynchronize();
357 BL_PROFILE_VAR_STOP(prep);
361 "spades::SpadesParticleContainer::nonencoded_sort::sort", sort);
362 const auto parrs = particle_arrays(pti);
364#if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
366 thrust::device, cell_list.begin(), cell_list.end(),
367 [=] AMREX_GPU_DEVICE(
368 const amrex::Long xi,
const amrex::Long yi)
noexcept {
369 return compare(xi, yi, parrs);
373 amrex::Vector<amrex::Long> h_cell_list(np, 0);
375 amrex::Gpu::deviceToHost, cell_list.begin(), cell_list.end(),
376 h_cell_list.begin());
378 h_cell_list.begin(), h_cell_list.end(),
379 [=](
const amrex::Long xi,
const amrex::Long yi) {
380 return compare(xi, yi, parrs);
383 amrex::Gpu::hostToDevice, h_cell_list.begin(), h_cell_list.end(),
388 cell_list.begin(), cell_list.end(),
389 [=](
const amrex::Long xi,
const amrex::Long yi) {
390 return compare(xi, yi, parrs);
393 amrex::Gpu::streamSynchronize();
394 BL_PROFILE_VAR_STOP(sort);
398 "spades::SpadesParticleContainer::nonencoded_sort::"
401 this->ReorderParticles(LEV, mfi, cell_list.data());
402 amrex::Gpu::streamSynchronize();
403 BL_PROFILE_VAR_STOP(reorder);
422 NArrayInt>::encoded_sort_impl()
425 BL_PROFILE(
"spades::SpadesParticleContainer::encoded_sort_impl()");
430 "spades::SpadesParticleContainer::encoded_sort::bounds", bounds);
431 m_min_timestamp.setVal(constants::LARGE_NUM, 0, NType - 1);
432 m_max_timestamp.setVal(0.0, 0, NType - 1);
435#pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
437 for (amrex::MFIter mfi = this->MakeMFIter(LEV); mfi.isValid(); ++mfi) {
439 const auto index = std::make_pair(mfi.index(), mfi.LocalTileIndex());
440 auto& pti = this->GetParticles(LEV)[index];
441 const size_t np = pti.numParticles();
446 const auto& min_ts_arr = m_min_timestamp.array(mfi);
447 const auto& max_ts_arr = m_max_timestamp.array(mfi);
448 const auto parrs = particle_arrays(pti);
449 auto* ts = parrs.m_rdata[CommonRealData::timestamp];
451 const amrex::Box& box = mfi.tilebox();
453 amrex::ParallelFor(np, [=] AMREX_GPU_DEVICE(
long pidx)
noexcept {
454 if (parrs.m_idata[CommonIntData::type_id][pidx] != (NType - 1)) {
455 const amrex::IntVect piv(AMREX_D_DECL(
456 parrs.m_idata[CommonIntData::i][pidx],
457 parrs.m_idata[CommonIntData::j][pidx],
458 parrs.m_idata[CommonIntData::k][pidx]));
460 AMREX_ASSERT(box.contains(piv));
461 amrex::Gpu::Atomic::Min(
463 piv, parrs.m_idata[CommonIntData::type_id][pidx]),
465 amrex::Gpu::Atomic::Max(
467 piv, parrs.m_idata[CommonIntData::type_id][pidx]),
472 BL_PROFILE_VAR_STOP(bounds);
475#pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
477 for (amrex::MFIter mfi = this->MakeMFIter(LEV); mfi.isValid(); ++mfi) {
479 const amrex::Box& box = mfi.tilebox();
480 const auto index = std::make_pair(mfi.index(), mfi.LocalTileIndex());
481 auto& pti = this->GetParticles(LEV)[index];
482 const size_t np = pti.numParticles();
489 "spades::SpadesParticleContainer::encoded_sort::sort_prep", prep);
490 amrex::Gpu::DeviceVector<amrex::Long> cell_list(np);
491 auto* p_cell_list = cell_list.data();
492 amrex::ParallelFor(np, [=] AMREX_GPU_DEVICE(
long pidx)
noexcept {
493 p_cell_list[pidx] = pidx;
495 amrex::Gpu::streamSynchronize();
496 BL_PROFILE_VAR_STOP(prep);
499 "spades::SpadesParticleContainer::encoded_sort::encode", encode);
500 const auto parrs = particle_arrays(pti);
501 auto* ts_arr = parrs.m_rdata[CommonRealData::timestamp];
502 const auto& min_ts_arr = m_min_timestamp.array(mfi);
503 const auto& max_ts_arr = m_max_timestamp.array(mfi);
504 amrex::Gpu::DeviceVector<std::uint64_t> encode(np);
505 auto* p_encode = encode.data();
506 const auto box_lo = box.smallEnd();
507 amrex::ParallelFor(np, [=] AMREX_GPU_DEVICE(
long pidx)
noexcept {
510 parrs.m_idata[CommonIntData::i][pidx] - box_lo[0];
512 parrs.m_idata[CommonIntData::j][pidx] - box_lo[1];
514 parrs.m_idata[CommonIntData::k][pidx] - box_lo[2];);
517 AMREX_ALWAYS_ASSERT(pi >= 0), AMREX_ALWAYS_ASSERT(pj >= 0),
518 AMREX_ALWAYS_ASSERT(pk >= 0));
521 static_cast<std::uint64_t
>(pi) <=
524 static_cast<std::uint64_t
>(pj) <=
527 static_cast<std::uint64_t
>(pk) <=
529 AMREX_ASSERT(parrs.m_idata[CommonIntData::type_id][pidx] >= 0);
530 AMREX_ASSERT(ts_arr[pidx] >= (0.0 - constants::SMALL_NUM));
532 int shift = constants::TOTAL_NBITS;
533#if AMREX_SPACEDIM == 3
534 shift -= constants::K_NBITS;
535 const std::uint64_t k =
536 static_cast<std::uint64_t
>(pk &
bitmask(constants::K_NBITS))
539#if AMREX_SPACEDIM >= 2
540 shift -= constants::J_NBITS;
541 const std::uint64_t j =
542 static_cast<std::uint64_t
>(pj &
bitmask(constants::J_NBITS))
545 shift -= constants::I_NBITS;
546 const std::uint64_t i =
547 static_cast<std::uint64_t
>(pi &
bitmask(constants::I_NBITS))
549 shift -= constants::TYPE_NBITS;
550 const std::uint64_t typ =
551 static_cast<std::uint64_t
>(
552 parrs.m_idata[CommonIntData::type_id][pidx] &
553 bitmask(constants::TYPE_NBITS))
556 const amrex::IntVect piv(AMREX_D_DECL(
557 parrs.m_idata[CommonIntData::i][pidx],
558 parrs.m_idata[CommonIntData::j][pidx],
559 parrs.m_idata[CommonIntData::k][pidx]));
560 const amrex::Real min_t =
561 min_ts_arr(piv, parrs.m_idata[CommonIntData::type_id][pidx]);
562 const amrex::Real max_t =
563 max_ts_arr(piv, parrs.m_idata[CommonIntData::type_id][pidx]);
564 AMREX_ASSERT(min_t <= max_t);
565 const amrex::Real delta =
566 (amrex::Math::abs(max_t - min_t) > constants::EPS)
570 const amrex::Real normalized = (ts_arr[pidx] - min_t) / delta;
571 const amrex::Real scaled =
572 normalized *
static_cast<amrex::Real
>((1ULL << shift) - 1);
573 const auto ts =
static_cast<std::uint64_t
>(
574 static_cast<std::uint32_t
>(scaled) &
575 bitmask(constants::TIMESTAMP_NBITS));
576 AMREX_ASSERT((shift - constants::TIMESTAMP_NBITS) == 0);
578 p_encode[pidx] = AMREX_D_PICK(i, j | i, k | j | i) | typ | ts;
586 amrex::Gpu::streamSynchronize();
587 BL_PROFILE_VAR_STOP(encode);
591 "spades::SpadesParticleContainer::encoded_sort::sort", sort);
593#if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
595 thrust::device, cell_list.begin(), cell_list.end(),
596 [=] AMREX_GPU_DEVICE(
597 const amrex::Long xi,
const amrex::Long yi)
noexcept {
598 return p_encode[xi] < p_encode[yi];
604 amrex::Vector<amrex::Long> h_cell_list(np, 0);
605 amrex::Vector<std::uint64_t> h_encode(np, 0);
607 amrex::Gpu::deviceToHost, cell_list.begin(), cell_list.end(),
608 h_cell_list.begin());
610 amrex::Gpu::deviceToHost, encode.begin(), encode.end(),
613 h_cell_list.begin(), h_cell_list.end(),
614 [=](
const amrex::Long xi,
const amrex::Long yi) {
615 return h_encode[xi] < h_encode[yi];
618 amrex::Gpu::hostToDevice, h_cell_list.begin(), h_cell_list.end(),
623 cell_list.begin(), cell_list.end(),
624 [=](
const amrex::Long xi,
const amrex::Long yi) {
625 return p_encode[xi] < p_encode[yi];
628 amrex::Gpu::streamSynchronize();
629 BL_PROFILE_VAR_STOP(sort);
633 "spades::SpadesParticleContainer::encoded_sort::"
636 this->ReorderParticles(LEV, mfi, cell_list.data());
637 amrex::Gpu::streamSynchronize();
638 BL_PROFILE_VAR_STOP(reorder);
652void SpadesParticleContainer<
657 NArrayInt>::print_messages(
const std::string& header)
659 BL_PROFILE(
"spades::SpadesParticleContainer::print_messages()");
661 amrex::Print() << header <<
" -----------------------------" << std::endl;
663#pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
665 for (amrex::MFIter mfi = this->MakeMFIter(LEV); mfi.isValid(); ++mfi) {
666 const auto index = std::make_pair(mfi.index(), mfi.LocalTileIndex());
667 const auto& pti = this->GetParticles(LEV)[index];
668 const size_t np = pti.numParticles();
669 const auto parrs = particle_arrays(pti);
671 amrex::ParallelFor(np, [=] AMREX_GPU_DEVICE(
long pidx)
noexcept {
672 DevicePrint()(pidx, parrs);
675 amrex::Print() <<
"End " << header <<
"-----------------------------"
685void SpadesParticleContainer<
690 NArrayInt>::reposition_particles()
692 BL_PROFILE(
"spades::SpadesParticleContainer::reposition_particles()");
694 const auto& plo = this->Geom(LEV).ProbLoArray();
695 const auto& dx = this->Geom(LEV).CellSizeArray();
697 const auto& dxi = this->Geom(LEV).InvCellSizeArray();
698 const auto& dom = this->Geom(LEV).Domain();
700 const int nbins = 500;
703#pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
705 for (amrex::MFIter mfi = this->MakeMFIter(LEV); mfi.isValid(); ++mfi) {
706 const amrex::Box& box = mfi.tilebox();
707 const auto& cnt_arr = m_counts.const_array(mfi);
708 const auto& offsets_arr = m_offsets.const_array(mfi);
709 const auto index = std::make_pair(mfi.index(), mfi.LocalTileIndex());
710 auto& pti = this->GetParticles(LEV)[index];
711 const auto parrs = particle_arrays(pti);
714 box, [=] AMREX_GPU_DEVICE(
715 int i,
int j,
int AMREX_D_PICK(, , k))
noexcept {
716 const amrex::IntVect iv(AMREX_D_DECL(i, j, k));
717 const auto getter = Get(iv, cnt_arr, offsets_arr, parrs);
719 for (
int typ = 0; typ < NType; typ++) {
720 AMREX_ASSERT(cnt_arr(iv, typ) < nbins);
721 for (
int n = 0; n < cnt_arr(iv, typ); n++) {
722 const auto pidx = getter(n, typ);
723 auto& p = parrs.m_aos[pidx];
725 const amrex::IntVect piv(AMREX_D_DECL(
726 parrs.m_idata[CommonIntData::i][pidx],
727 parrs.m_idata[CommonIntData::j][pidx],
728 parrs.m_idata[CommonIntData::k][pidx]));
729 AMREX_ASSERT(piv == iv);
731 AMREX_D_TERM(p.pos(0) = plo[0] + iv[0] * dx[0] +
732 (typ + 1) * dx[0] / (NType + 1);
733 , p.pos(1) = plo[1] + iv[1] * dx[1] +
734 (n + 1) * dx[1] / nbins;
737 (iv[2] + constants::HALF) * dx[2];)
740 AMREX_ASSERT(piv == getParticleCell(p, plo, dxi, dom));
753void SpadesParticleContainer<
759 write_plot_file_impl(
760 const std::string& plt_filename,
const std::string& name)
762 BL_PROFILE(
"spades::SpadesParticleContainer::write_plot_file()");
763 reposition_particles();
765 plt_filename, name, m_writeflags_real, m_writeflags_int,
766 m_real_data_names, m_int_data_names);
Main SPADES particle container.
Definition SpadesParticleContainer.H:35
Definition ConsoleIO.cpp:7
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE std::uint64_t max_representation(const int nbits)
Maximum unsigned integer representation given a number of bits.
Definition Utilities.H:49
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE std::uint64_t bitmask(const int nbits)
bitmask given a number of bits
Definition Utilities.H:64