/home/runner/work/spades/spades/Source/SpadesParticleContainerImpl.H Source File

SPADES API: /home/runner/work/spades/spades/Source/SpadesParticleContainerImpl.H Source File
SPADES API
SpadesParticleContainerImpl.H
Go to the documentation of this file.
1template <
2 int NType,
3 int NStructReal,
4 int NStructInt,
5 int NArrayReal,
6 int NArrayInt>
7SpadesParticleContainer<NType, NStructReal, NStructInt, NArrayReal, NArrayInt>::
8 SpadesParticleContainer(amrex::AmrParGDB* par_gdb, int ngrow)
9 : amrex::NeighborParticleContainer<
10 NStructReal,
11 NStructInt,
12 NArrayReal,
13 NArrayInt>(par_gdb, ngrow)
14 , m_ngrow(ngrow)
15{
16 const int nlevs_max = par_gdb->maxLevel() + 1;
17
18 if (nlevs_max > 1) {
19 amrex::Abort(
20 "spades::SPADES::SpadesParticleContainer::"
21 "SpadesParticleContainer(): not supporting multilevel right "
22 "now");
23 }
24}
25
26template <
27 int NType,
28 int NStructReal,
29 int NStructInt,
30 int NArrayReal,
31 int NArrayInt>
32SpadesParticleContainer<NType, NStructReal, NStructInt, NArrayReal, NArrayInt>::
33 SpadesParticleContainer(
34 const amrex::Vector<amrex::Geometry>& geom,
35 const amrex::Vector<amrex::DistributionMapping>& dmap,
36 const amrex::Vector<amrex::BoxArray>& ba,
37 int ngrow)
38 : amrex::NeighborParticleContainer<
39 NStructReal,
40 NStructInt,
41 NArrayReal,
42 NArrayInt>(geom, dmap, ba, {2}, ngrow)
43 , m_ngrow(ngrow)
44{
45 if (geom.size() > 1) {
46 amrex::Abort(
47 "spades::SPADES::SpadesParticleContainer::"
48 "SpadesParticleContainer(): not supporting multilevel right "
49 "now");
50 }
51}
52
53template <
54 int NType,
55 int NStructReal,
56 int NStructInt,
57 int NArrayReal,
58 int NArrayInt>
59void SpadesParticleContainer<
60 NType,
61 NStructReal,
62 NStructInt,
63 NArrayReal,
64 NArrayInt>::initialize_state()
65{
66 BL_PROFILE("spades::SpadesParticleContainer::initialize_state()");
67
68 m_counts.define(
69 this->ParticleBoxArray(LEV), this->ParticleDistributionMap(LEV), NType,
70 m_ngrow, amrex::MFInfo());
71
72 m_offsets.define(
73 this->ParticleBoxArray(LEV), this->ParticleDistributionMap(LEV), NType,
74 m_ngrow, amrex::MFInfo());
75
76 m_counts.setVal(0);
77 m_offsets.setVal(0);
78
79 if (m_sort_type == "encoded") {
80 m_min_timestamp.define(
81 this->ParticleBoxArray(LEV), this->ParticleDistributionMap(LEV),
82 NType, m_ngrow, amrex::MFInfo());
83 m_max_timestamp.define(
84 this->ParticleBoxArray(LEV), this->ParticleDistributionMap(LEV),
85 NType, m_ngrow, amrex::MFInfo());
86
87 m_min_timestamp.setVal(constants::LARGE_NUM);
88 m_min_timestamp.setVal(0.0, NType - 1, 1);
89 m_max_timestamp.setVal(0.0);
90 }
91}
92
93template <
94 int NType,
95 int NStructReal,
96 int NStructInt,
97 int NArrayReal,
98 int NArrayInt>
99void SpadesParticleContainer<
100 NType,
101 NStructReal,
102 NStructInt,
103 NArrayReal,
104 NArrayInt>::clear_state()
105{
106 BL_PROFILE("spades::SpadesParticleContainer::clear_state()");
107
108 m_counts.clear();
109 m_offsets.clear();
110}
111template <
112 int NType,
113 int NStructReal,
114 int NStructInt,
115 int NArrayReal,
116 int NArrayInt>
117void SpadesParticleContainer<
118 NType,
119 NStructReal,
120 NStructInt,
121 NArrayReal,
122 NArrayInt>::count_particles()
123{
124 BL_PROFILE("spades::SpadesParticleContainer::count_particles()");
125
126 m_counts.setVal(0);
127
128#ifdef AMREX_USE_OMP
129#pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
130#endif
131 for (amrex::MFIter mfi = this->MakeMFIter(LEV); mfi.isValid(); ++mfi) {
132
133 const amrex::Box& box = mfi.tilebox();
134 const auto& cnt_arr = m_counts.array(mfi);
135 const auto index = std::make_pair(mfi.index(), mfi.LocalTileIndex());
136 auto& pti = this->GetParticles(LEV)[index];
137 const auto parrs = particle_arrays(pti);
138 const int np = pti.numParticles();
139
140 amrex::ParallelFor(np, [=] AMREX_GPU_DEVICE(long pidx) noexcept {
141 const amrex::IntVect iv(AMREX_D_DECL(
142 parrs.m_idata[CommonIntData::i][pidx],
143 parrs.m_idata[CommonIntData::j][pidx],
144 parrs.m_idata[CommonIntData::k][pidx]));
145
146 if (box.contains(iv)) {
147 amrex::Gpu::Atomic::AddNoRet(
148 &cnt_arr(iv, parrs.m_idata[CommonIntData::type_id][pidx]),
149 1);
150 }
151 });
152 }
153}
154
155template <
156 int NType,
157 int NStructReal,
158 int NStructInt,
159 int NArrayReal,
160 int NArrayInt>
161void SpadesParticleContainer<
162 NType,
163 NStructReal,
164 NStructInt,
165 NArrayReal,
166 NArrayInt>::count_offsets()
167{
168 BL_PROFILE("spades::SpadesParticleContainer::count_offsets()");
169
170 m_offsets.setVal(0);
171
172#ifdef AMREX_USE_OMP
173#pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
174#endif
175 for (amrex::MFIter mfi = this->MakeMFIter(LEV); mfi.isValid(); ++mfi) {
176 const amrex::Box& box = mfi.tilebox();
177 const auto ncell = box.numPts();
178 const auto& cnt_arr = m_counts.const_array(mfi);
179 const auto& offsets_arr = m_offsets.array(mfi);
180 int* p_offsets = offsets_arr.dataPtr();
181 amrex::Scan::PrefixSum<int>(
182 ncell,
183 [=] AMREX_GPU_DEVICE(int i) -> int {
184 const auto iv = box.atOffset(i);
185 int total_particles = 0;
186 for (int typ = 0; typ < NType; typ++) {
187 total_particles += cnt_arr(iv, typ);
188 }
189 return total_particles;
190 },
191 [=] AMREX_GPU_DEVICE(int i, const int& xi) { p_offsets[i] = xi; },
192 amrex::Scan::Type::exclusive, amrex::Scan::noRetSum);
193
194 amrex::ParallelFor(
195 box, [=] AMREX_GPU_DEVICE(
196 int i, int j, int AMREX_D_PICK(, , k)) noexcept {
197 const amrex::IntVect iv(AMREX_D_DECL(i, j, k));
198 for (int typ = 1; typ < NType; typ++) {
199 offsets_arr(iv, typ) =
200 offsets_arr(iv, typ - 1) + cnt_arr(iv, typ - 1);
201 }
202 });
203 }
204}
205
206template <
207 int NType,
208 int NStructReal,
209 int NStructInt,
210 int NArrayReal,
211 int NArrayInt>
212void SpadesParticleContainer<
213 NType,
214 NStructReal,
215 NStructInt,
216 NArrayReal,
217 NArrayInt>::update_counts()
218{
219 BL_PROFILE("spades::SpadesParticleContainer::update_counts()");
220 count_particles();
221 count_offsets();
222}
223
224template <
225 int NType,
226 int NStructReal,
227 int NStructInt,
228 int NArrayReal,
229 int NArrayInt>
230void SpadesParticleContainer<
231 NType,
232 NStructReal,
233 NStructInt,
234 NArrayReal,
235 NArrayInt>::check_sort(const amrex::MFIter& mfi)
236{
237 BL_PROFILE("spades::SpadesParticleContainer::check_sort()");
238 const auto index = std::make_pair(mfi.index(), mfi.LocalTileIndex());
239 auto& pti = this->GetParticles(LEV)[index];
240 const size_t np = pti.numParticles();
241 const auto parrs = particle_arrays(pti);
242
243 amrex::ParallelFor(np, [=] AMREX_GPU_DEVICE(long pidx) noexcept {
244 if (pidx > 0) {
245 const auto pm = pidx - 1;
246 const amrex::IntVect pivm(AMREX_D_DECL(
247 parrs.m_idata[CommonIntData::i][pm],
248 parrs.m_idata[CommonIntData::j][pm],
249 parrs.m_idata[CommonIntData::k][pm]));
250 const amrex::IntVect piv(AMREX_D_DECL(
251 parrs.m_idata[CommonIntData::i][pidx],
252 parrs.m_idata[CommonIntData::j][pidx],
253 parrs.m_idata[CommonIntData::k][pidx]));
254 AMREX_ASSERT(pivm <= piv);
255 if (pivm == piv) {
256 if (parrs.m_idata[CommonIntData::type_id][pm] >
257 parrs.m_idata[CommonIntData::type_id][pidx]) {
258 printf(
259 "Unsorted type %d > %d\n",
260 parrs.m_idata[CommonIntData::type_id][pm],
261 parrs.m_idata[CommonIntData::type_id][pidx]);
262 }
263 AMREX_ASSERT(
264 parrs.m_idata[CommonIntData::type_id][pm] <=
265 parrs.m_idata[CommonIntData::type_id][pidx]);
266 }
267 if ((parrs.m_idata[CommonIntData::type_id][pm] ==
268 parrs.m_idata[CommonIntData::type_id][pidx]) &&
269 (pivm == piv)) {
270 if (parrs.m_rdata[CommonRealData::timestamp][pm] >
271 parrs.m_rdata[CommonRealData::timestamp][pidx]) {
272 printf(
273 "Unsorted time stamp %.16f > %.16f in ivm: (%d, %d) of "
274 "type "
275 "%d, iv: (%d,%d) of type %d\n",
276 parrs.m_rdata[CommonRealData::timestamp][pm],
277 parrs.m_rdata[CommonRealData::timestamp][pidx],
278 parrs.m_idata[CommonIntData::i][pm],
279 parrs.m_idata[CommonIntData::j][pm],
280 parrs.m_idata[CommonIntData::type_id][pm],
281 parrs.m_idata[CommonIntData::i][pidx],
282 parrs.m_idata[CommonIntData::j][pidx],
283 parrs.m_idata[CommonIntData::type_id][pidx]);
284 }
285 AMREX_ASSERT(
286 parrs.m_rdata[CommonRealData::timestamp][pm] <=
287 parrs.m_rdata[CommonRealData::timestamp][pidx]);
288 }
289 }
290 });
291 amrex::Gpu::streamSynchronize();
292}
293
294template <
295 int NType,
296 int NStructReal,
297 int NStructInt,
298 int NArrayReal,
299 int NArrayInt>
300template <typename CompareFunc>
301void SpadesParticleContainer<
302 NType,
303 NStructReal,
304 NStructInt,
305 NArrayReal,
306 NArrayInt>::sort_impl(const CompareFunc& compare)
307{
308 BL_PROFILE("spades::SpadesParticleContainer::sort_impl()");
309 if (m_sort_type == "nonencoded") {
310 nonencoded_sort_impl(compare);
311 } else if (m_sort_type == "encoded") {
312 encoded_sort_impl();
313 } else {
314 amrex::Abort("Invalid sort type. Must be nonencoded or encoded");
315 }
316 update_counts();
317}
318
319template <
320 int NType,
321 int NStructReal,
322 int NStructInt,
323 int NArrayReal,
324 int NArrayInt>
325template <typename CompareFunc>
327 NType,
328 NStructReal,
329 NStructInt,
330 NArrayReal,
331 NArrayInt>::nonencoded_sort_impl(const CompareFunc& compare)
332{
333 // Taking inspiration from AMReX's SortParticlesByBin
334 BL_PROFILE("spades::SpadesParticleContainer::nonencoded_sort_impl()");
335
336#ifdef AMREX_USE_OMP
337#pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
338#endif
339 for (amrex::MFIter mfi = this->MakeMFIter(LEV); mfi.isValid(); ++mfi) {
340 const auto index = std::make_pair(mfi.index(), mfi.LocalTileIndex());
341 auto& pti = this->GetParticles(LEV)[index];
342 const size_t np = pti.numParticles();
343
344 if (np == 0) {
345 continue;
346 }
347
348 BL_PROFILE_VAR(
349 "spades::SpadesParticleContainer::nonencoded_sort::sort_prep",
350 prep);
351 amrex::Gpu::DeviceVector<amrex::Long> cell_list(np);
352 auto* p_cell_list = cell_list.data();
353 amrex::ParallelFor(np, [=] AMREX_GPU_DEVICE(long pidx) noexcept {
354 p_cell_list[pidx] = pidx;
355 });
356 amrex::Gpu::streamSynchronize();
357 BL_PROFILE_VAR_STOP(prep);
358
359 // Sort particle indices based on the cell index
360 BL_PROFILE_VAR(
361 "spades::SpadesParticleContainer::nonencoded_sort::sort", sort);
362 const auto parrs = particle_arrays(pti);
363#ifdef AMREX_USE_GPU
364#if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
365 thrust::sort(
366 thrust::device, cell_list.begin(), cell_list.end(),
367 [=] AMREX_GPU_DEVICE(
368 const amrex::Long xi, const amrex::Long yi) noexcept {
369 return compare(xi, yi, parrs);
370 });
371#else
372 // Perform sort on CPU, then copy back to device (not good)
373 amrex::Vector<amrex::Long> h_cell_list(np, 0);
374 amrex::Gpu::copy(
375 amrex::Gpu::deviceToHost, cell_list.begin(), cell_list.end(),
376 h_cell_list.begin());
377 std::sort(
378 h_cell_list.begin(), h_cell_list.end(),
379 [=](const amrex::Long xi, const amrex::Long yi) {
380 return compare(xi, yi, parrs);
381 });
382 amrex::Gpu::copy(
383 amrex::Gpu::hostToDevice, h_cell_list.begin(), h_cell_list.end(),
384 cell_list.begin());
385#endif
386#else
387 std::sort(
388 cell_list.begin(), cell_list.end(),
389 [=](const amrex::Long xi, const amrex::Long yi) {
390 return compare(xi, yi, parrs);
391 });
392#endif
393 amrex::Gpu::streamSynchronize();
394 BL_PROFILE_VAR_STOP(sort);
395
396 // Reorder the particles in memory
397 BL_PROFILE_VAR(
398 "spades::SpadesParticleContainer::nonencoded_sort::"
399 "ReorderParticles",
400 reorder);
401 this->ReorderParticles(LEV, mfi, cell_list.data());
402 amrex::Gpu::streamSynchronize();
403 BL_PROFILE_VAR_STOP(reorder);
404
405#ifdef AMREX_DEBUG
406 check_sort(mfi);
407#endif
408 }
409}
410
411template <
412 int NType,
413 int NStructReal,
414 int NStructInt,
415 int NArrayReal,
416 int NArrayInt>
418 NType,
419 NStructReal,
420 NStructInt,
421 NArrayReal,
422 NArrayInt>::encoded_sort_impl()
423{
424 // Taking inspiration from AMReX's SortParticlesByBin
425 BL_PROFILE("spades::SpadesParticleContainer::encoded_sort_impl()");
426
427 AMREX_ALWAYS_ASSERT(NType <= max_representation(constants::TYPE_NBITS));
428
429 BL_PROFILE_VAR(
430 "spades::SpadesParticleContainer::encoded_sort::bounds", bounds);
431 m_min_timestamp.setVal(constants::LARGE_NUM, 0, NType - 1);
432 m_max_timestamp.setVal(0.0, 0, NType - 1);
433
434#ifdef AMREX_USE_OMP
435#pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
436#endif
437 for (amrex::MFIter mfi = this->MakeMFIter(LEV); mfi.isValid(); ++mfi) {
438
439 const auto index = std::make_pair(mfi.index(), mfi.LocalTileIndex());
440 auto& pti = this->GetParticles(LEV)[index];
441 const size_t np = pti.numParticles();
442
443 if (np == 0) {
444 continue;
445 }
446 const auto& min_ts_arr = m_min_timestamp.array(mfi);
447 const auto& max_ts_arr = m_max_timestamp.array(mfi);
448 const auto parrs = particle_arrays(pti);
449 auto* ts = parrs.m_rdata[CommonRealData::timestamp];
450#ifdef AMREX_DEBUG
451 const amrex::Box& box = mfi.tilebox();
452#endif
453 amrex::ParallelFor(np, [=] AMREX_GPU_DEVICE(long pidx) noexcept {
454 if (parrs.m_idata[CommonIntData::type_id][pidx] != (NType - 1)) {
455 const amrex::IntVect piv(AMREX_D_DECL(
456 parrs.m_idata[CommonIntData::i][pidx],
457 parrs.m_idata[CommonIntData::j][pidx],
458 parrs.m_idata[CommonIntData::k][pidx]));
459
460 AMREX_ASSERT(box.contains(piv));
461 amrex::Gpu::Atomic::Min(
462 &min_ts_arr(
463 piv, parrs.m_idata[CommonIntData::type_id][pidx]),
464 ts[pidx]);
465 amrex::Gpu::Atomic::Max(
466 &max_ts_arr(
467 piv, parrs.m_idata[CommonIntData::type_id][pidx]),
468 ts[pidx]);
469 }
470 });
471 }
472 BL_PROFILE_VAR_STOP(bounds);
473
474#ifdef AMREX_USE_OMP
475#pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
476#endif
477 for (amrex::MFIter mfi = this->MakeMFIter(LEV); mfi.isValid(); ++mfi) {
478
479 const amrex::Box& box = mfi.tilebox();
480 const auto index = std::make_pair(mfi.index(), mfi.LocalTileIndex());
481 auto& pti = this->GetParticles(LEV)[index];
482 const size_t np = pti.numParticles();
483
484 if (np == 0) {
485 continue;
486 }
487
488 BL_PROFILE_VAR(
489 "spades::SpadesParticleContainer::encoded_sort::sort_prep", prep);
490 amrex::Gpu::DeviceVector<amrex::Long> cell_list(np);
491 auto* p_cell_list = cell_list.data();
492 amrex::ParallelFor(np, [=] AMREX_GPU_DEVICE(long pidx) noexcept {
493 p_cell_list[pidx] = pidx;
494 });
495 amrex::Gpu::streamSynchronize();
496 BL_PROFILE_VAR_STOP(prep);
497
498 BL_PROFILE_VAR(
499 "spades::SpadesParticleContainer::encoded_sort::encode", encode);
500 const auto parrs = particle_arrays(pti);
501 auto* ts_arr = parrs.m_rdata[CommonRealData::timestamp];
502 const auto& min_ts_arr = m_min_timestamp.array(mfi);
503 const auto& max_ts_arr = m_max_timestamp.array(mfi);
504 amrex::Gpu::DeviceVector<std::uint64_t> encode(np);
505 auto* p_encode = encode.data();
506 const auto box_lo = box.smallEnd();
507 amrex::ParallelFor(np, [=] AMREX_GPU_DEVICE(long pidx) noexcept {
508 AMREX_D_TERM(
509 const auto pi =
510 parrs.m_idata[CommonIntData::i][pidx] - box_lo[0];
511 , const auto pj =
512 parrs.m_idata[CommonIntData::j][pidx] - box_lo[1];
513 , const auto pk =
514 parrs.m_idata[CommonIntData::k][pidx] - box_lo[2];);
515
516 AMREX_D_EXPR(
517 AMREX_ALWAYS_ASSERT(pi >= 0), AMREX_ALWAYS_ASSERT(pj >= 0),
518 AMREX_ALWAYS_ASSERT(pk >= 0));
519 AMREX_D_EXPR(
520 AMREX_ALWAYS_ASSERT(
521 static_cast<std::uint64_t>(pi) <=
522 max_representation(constants::I_NBITS)),
523 AMREX_ALWAYS_ASSERT(
524 static_cast<std::uint64_t>(pj) <=
525 max_representation(constants::J_NBITS)),
526 AMREX_ALWAYS_ASSERT(
527 static_cast<std::uint64_t>(pk) <=
528 max_representation(constants::K_NBITS)));
529 AMREX_ASSERT(parrs.m_idata[CommonIntData::type_id][pidx] >= 0);
530 AMREX_ASSERT(ts_arr[pidx] >= (0.0 - constants::SMALL_NUM));
531
532 int shift = constants::TOTAL_NBITS;
533#if AMREX_SPACEDIM == 3
534 shift -= constants::K_NBITS;
535 const std::uint64_t k =
536 static_cast<std::uint64_t>(pk & bitmask(constants::K_NBITS))
537 << shift;
538#endif
539#if AMREX_SPACEDIM >= 2
540 shift -= constants::J_NBITS;
541 const std::uint64_t j =
542 static_cast<std::uint64_t>(pj & bitmask(constants::J_NBITS))
543 << shift;
544#endif
545 shift -= constants::I_NBITS;
546 const std::uint64_t i =
547 static_cast<std::uint64_t>(pi & bitmask(constants::I_NBITS))
548 << shift;
549 shift -= constants::TYPE_NBITS;
550 const std::uint64_t typ =
551 static_cast<std::uint64_t>(
552 parrs.m_idata[CommonIntData::type_id][pidx] &
553 bitmask(constants::TYPE_NBITS))
554 << shift;
555
556 const amrex::IntVect piv(AMREX_D_DECL(
557 parrs.m_idata[CommonIntData::i][pidx],
558 parrs.m_idata[CommonIntData::j][pidx],
559 parrs.m_idata[CommonIntData::k][pidx]));
560 const amrex::Real min_t =
561 min_ts_arr(piv, parrs.m_idata[CommonIntData::type_id][pidx]);
562 const amrex::Real max_t =
563 max_ts_arr(piv, parrs.m_idata[CommonIntData::type_id][pidx]);
564 AMREX_ASSERT(min_t <= max_t);
565 const amrex::Real delta =
566 (amrex::Math::abs(max_t - min_t) > constants::EPS)
567 ? max_t - min_t
568 : 1.0;
569
570 const amrex::Real normalized = (ts_arr[pidx] - min_t) / delta;
571 const amrex::Real scaled =
572 normalized * static_cast<amrex::Real>((1ULL << shift) - 1);
573 const auto ts = static_cast<std::uint64_t>(
574 static_cast<std::uint32_t>(scaled) &
575 bitmask(constants::TIMESTAMP_NBITS));
576 AMREX_ASSERT((shift - constants::TIMESTAMP_NBITS) == 0);
577
578 p_encode[pidx] = AMREX_D_PICK(i, j | i, k | j | i) | typ | ts;
579 // printf(
580 // "encode at %ld is %lu with iv: (%d, %d), type: %d, time: "
581 // "%.16f, (normalized: %.16f, scaled: %.16f, encoded: %lu)\n",
582 // pidx, p_encode[pidx], piv[0], piv[1],
583 // idata(CommonIntData::type_id)[pidx],
584 // ts_arr[pidx], normalized, scaled, t);
585 });
586 amrex::Gpu::streamSynchronize();
587 BL_PROFILE_VAR_STOP(encode);
588
589 // Sort particle indices based on the cell index
590 BL_PROFILE_VAR(
591 "spades::SpadesParticleContainer::encoded_sort::sort", sort);
592#ifdef AMREX_USE_GPU
593#if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
594 thrust::sort(
595 thrust::device, cell_list.begin(), cell_list.end(),
596 [=] AMREX_GPU_DEVICE(
597 const amrex::Long xi, const amrex::Long yi) noexcept {
598 return p_encode[xi] < p_encode[yi];
599 });
600 // thrust::sort_by_key(
601 // thrust::device, encode.begin(), encode.end(), cell_list.begin());
602#else
603 // Perform sort on CPU, then copy back to device (not good)
604 amrex::Vector<amrex::Long> h_cell_list(np, 0);
605 amrex::Vector<std::uint64_t> h_encode(np, 0);
606 amrex::Gpu::copy(
607 amrex::Gpu::deviceToHost, cell_list.begin(), cell_list.end(),
608 h_cell_list.begin());
609 amrex::Gpu::copy(
610 amrex::Gpu::deviceToHost, encode.begin(), encode.end(),
611 h_encode.begin());
612 std::sort(
613 h_cell_list.begin(), h_cell_list.end(),
614 [=](const amrex::Long xi, const amrex::Long yi) {
615 return h_encode[xi] < h_encode[yi];
616 });
617 amrex::Gpu::copy(
618 amrex::Gpu::hostToDevice, h_cell_list.begin(), h_cell_list.end(),
619 cell_list.begin());
620#endif
621#else
622 std::sort(
623 cell_list.begin(), cell_list.end(),
624 [=](const amrex::Long xi, const amrex::Long yi) {
625 return p_encode[xi] < p_encode[yi];
626 });
627#endif
628 amrex::Gpu::streamSynchronize();
629 BL_PROFILE_VAR_STOP(sort);
630
631 // Reorder the particles in memory
632 BL_PROFILE_VAR(
633 "spades::SpadesParticleContainer::encoded_sort::"
634 "ReorderParticles",
635 reorder);
636 this->ReorderParticles(LEV, mfi, cell_list.data());
637 amrex::Gpu::streamSynchronize();
638 BL_PROFILE_VAR_STOP(reorder);
639
640#ifdef AMREX_DEBUG
641 check_sort(mfi);
642#endif
643 }
644}
645
646template <
647 int NType,
648 int NStructReal,
649 int NStructInt,
650 int NArrayReal,
651 int NArrayInt>
652void SpadesParticleContainer<
653 NType,
654 NStructReal,
655 NStructInt,
656 NArrayReal,
657 NArrayInt>::print_messages(const std::string& header)
658{
659 BL_PROFILE("spades::SpadesParticleContainer::print_messages()");
660
661 amrex::Print() << header << " -----------------------------" << std::endl;
662#ifdef AMREX_USE_OMP
663#pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
664#endif
665 for (amrex::MFIter mfi = this->MakeMFIter(LEV); mfi.isValid(); ++mfi) {
666 const auto index = std::make_pair(mfi.index(), mfi.LocalTileIndex());
667 const auto& pti = this->GetParticles(LEV)[index];
668 const size_t np = pti.numParticles();
669 const auto parrs = particle_arrays(pti);
670
671 amrex::ParallelFor(np, [=] AMREX_GPU_DEVICE(long pidx) noexcept {
672 DevicePrint()(pidx, parrs);
673 });
674 }
675 amrex::Print() << "End " << header << "-----------------------------"
676 << std::endl;
677}
678
679template <
680 int NType,
681 int NStructReal,
682 int NStructInt,
683 int NArrayReal,
684 int NArrayInt>
685void SpadesParticleContainer<
686 NType,
687 NStructReal,
688 NStructInt,
689 NArrayReal,
690 NArrayInt>::reposition_particles()
691{
692 BL_PROFILE("spades::SpadesParticleContainer::reposition_particles()");
693
694 const auto& plo = this->Geom(LEV).ProbLoArray();
695 const auto& dx = this->Geom(LEV).CellSizeArray();
696#ifdef AMREX_DEBUG
697 const auto& dxi = this->Geom(LEV).InvCellSizeArray();
698 const auto& dom = this->Geom(LEV).Domain();
699#endif
700 const int nbins = 500;
701
702#ifdef AMREX_USE_OMP
703#pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
704#endif
705 for (amrex::MFIter mfi = this->MakeMFIter(LEV); mfi.isValid(); ++mfi) {
706 const amrex::Box& box = mfi.tilebox();
707 const auto& cnt_arr = m_counts.const_array(mfi);
708 const auto& offsets_arr = m_offsets.const_array(mfi);
709 const auto index = std::make_pair(mfi.index(), mfi.LocalTileIndex());
710 auto& pti = this->GetParticles(LEV)[index];
711 const auto parrs = particle_arrays(pti);
712
713 amrex::ParallelFor(
714 box, [=] AMREX_GPU_DEVICE(
715 int i, int j, int AMREX_D_PICK(, , k)) noexcept {
716 const amrex::IntVect iv(AMREX_D_DECL(i, j, k));
717 const auto getter = Get(iv, cnt_arr, offsets_arr, parrs);
718
719 for (int typ = 0; typ < NType; typ++) {
720 AMREX_ASSERT(cnt_arr(iv, typ) < nbins);
721 for (int n = 0; n < cnt_arr(iv, typ); n++) {
722 const auto pidx = getter(n, typ);
723 auto& p = parrs.m_aos[pidx];
724
725 const amrex::IntVect piv(AMREX_D_DECL(
726 parrs.m_idata[CommonIntData::i][pidx],
727 parrs.m_idata[CommonIntData::j][pidx],
728 parrs.m_idata[CommonIntData::k][pidx]));
729 AMREX_ASSERT(piv == iv);
730
731 AMREX_D_TERM(p.pos(0) = plo[0] + iv[0] * dx[0] +
732 (typ + 1) * dx[0] / (NType + 1);
733 , p.pos(1) = plo[1] + iv[1] * dx[1] +
734 (n + 1) * dx[1] / nbins;
735 , p.pos(2) =
736 plo[2] +
737 (iv[2] + constants::HALF) * dx[2];)
738
739 // ensure the particle didn't change cells
740 AMREX_ASSERT(piv == getParticleCell(p, plo, dxi, dom));
741 }
742 }
743 });
744 }
745}
746
747template <
748 int NType,
749 int NStructReal,
750 int NStructInt,
751 int NArrayReal,
752 int NArrayInt>
753void SpadesParticleContainer<
754 NType,
755 NStructReal,
756 NStructInt,
757 NArrayReal,
758 NArrayInt>::
759 write_plot_file_impl(
760 const std::string& plt_filename, const std::string& name)
761{
762 BL_PROFILE("spades::SpadesParticleContainer::write_plot_file()");
763 reposition_particles();
764 this->WritePlotFile(
765 plt_filename, name, m_writeflags_real, m_writeflags_int,
766 m_real_data_names, m_int_data_names);
767}
Main SPADES particle container.
Definition SpadesParticleContainer.H:35
Definition ConsoleIO.cpp:7
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE std::uint64_t max_representation(const int nbits)
Maximum unsigned integer representation given a number of bits.
Definition Utilities.H:49
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE std::uint64_t bitmask(const int nbits)
bitmask given a number of bits
Definition Utilities.H:64