Manifold 1.0
Robust computational geometry
 
Loading...
Searching...
No Matches
parallel.h
1// Copyright 2022 The Manifold Authors.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// Simple implementation of selected functions in PSTL.
16// Iterators must be RandomAccessIterator.
17
18#pragma once
19
20#if (MANIFOLD_PAR == 1)
21#include <tbb/combinable.h>
22#include <tbb/parallel_for.h>
23#include <tbb/parallel_invoke.h>
24#include <tbb/parallel_reduce.h>
25#include <tbb/parallel_scan.h>
26#endif
27#include <algorithm>
28#include <numeric>
29
30#include "manifold/iters.h"
31namespace manifold {
32
33enum class ExecutionPolicy {
34 Par,
35 Seq,
36};
37
38constexpr size_t kSeqThreshold = 1e4;
39// ExecutionPolicy:
40// - Sequential for small workload,
41// - Parallel (CPU) for medium workload,
42inline constexpr ExecutionPolicy autoPolicy(size_t size,
43 size_t threshold = kSeqThreshold) {
44 if (size <= threshold) {
45 return ExecutionPolicy::Seq;
46 }
47 return ExecutionPolicy::Par;
48}
49
50template <typename Iter,
51 typename Dummy = std::enable_if_t<!std::is_integral_v<Iter>>>
52inline constexpr ExecutionPolicy autoPolicy(Iter first, Iter last,
53 size_t threshold = kSeqThreshold) {
54 if (static_cast<size_t>(std::distance(first, last)) <= threshold) {
55 return ExecutionPolicy::Seq;
56 }
57 return ExecutionPolicy::Par;
58}
59
60template <typename InputIter, typename OutputIter>
61void copy(ExecutionPolicy policy, InputIter first, InputIter last,
62 OutputIter d_first);
63template <typename InputIter, typename OutputIter>
64void copy(InputIter first, InputIter last, OutputIter d_first);
65
66#if (MANIFOLD_PAR == 1)
67namespace details {
68using manifold::kSeqThreshold;
69// implementation from
70// https://duvanenko.tech.blog/2018/01/14/parallel-merge/
71// https://github.com/DragonSpit/ParallelAlgorithms
72// note that the ranges are now [p, r) to fit our convention.
73template <typename SrcIter, typename DestIter, typename Comp>
74void mergeRec(SrcIter src, DestIter dest, size_t p1, size_t r1, size_t p2,
75 size_t r2, size_t p3, Comp comp) {
76 size_t length1 = r1 - p1;
77 size_t length2 = r2 - p2;
78 if (length1 < length2) {
79 std::swap(p1, p2);
80 std::swap(r1, r2);
81 std::swap(length1, length2);
82 }
83 if (length1 == 0) return;
84 if (length1 + length2 <= kSeqThreshold) {
85 std::merge(src + p1, src + r1, src + p2, src + r2, dest + p3, comp);
86 } else {
87 size_t q1 = p1 + length1 / 2;
88 size_t q2 =
89 std::distance(src, std::lower_bound(src + p2, src + r2, src[q1], comp));
90 size_t q3 = p3 + (q1 - p1) + (q2 - p2);
91 dest[q3] = src[q1];
92 tbb::parallel_invoke(
93 [=] { mergeRec(src, dest, p1, q1, p2, q2, p3, comp); },
94 [=] { mergeRec(src, dest, q1 + 1, r1, q2, r2, q3 + 1, comp); });
95 }
96}
97
98template <typename SrcIter, typename DestIter, typename Comp>
99void mergeSortRec(SrcIter src, DestIter dest, size_t begin, size_t end,
100 Comp comp) {
101 size_t numElements = end - begin;
102 if (numElements <= kSeqThreshold) {
103 std::copy(src + begin, src + end, dest + begin);
104 std::stable_sort(dest + begin, dest + end, comp);
105 } else {
106 size_t middle = begin + numElements / 2;
107 tbb::parallel_invoke([=] { mergeSortRec(dest, src, begin, middle, comp); },
108 [=] { mergeSortRec(dest, src, middle, end, comp); });
109 mergeRec(src, dest, begin, middle, middle, end, begin, comp);
110 }
111}
112
113template <typename T, typename InputIter, typename OutputIter, typename BinOp>
114struct ScanBody {
115 T sum;
116 T identity;
117 BinOp &f;
118 InputIter input;
119 OutputIter output;
120
121 ScanBody(T sum, T identity, BinOp &f, InputIter input, OutputIter output)
122 : sum(sum), identity(identity), f(f), input(input), output(output) {}
123 ScanBody(ScanBody &b, tbb::split)
124 : sum(b.identity),
125 identity(b.identity),
126 f(b.f),
127 input(b.input),
128 output(b.output) {}
129 template <typename Tag>
130 void operator()(const tbb::blocked_range<size_t> &r, Tag) {
131 T temp = sum;
132 for (size_t i = r.begin(); i < r.end(); ++i) {
133 T inputTmp = input[i];
134 if (Tag::is_final_scan()) output[i] = temp;
135 temp = f(temp, inputTmp);
136 }
137 sum = temp;
138 }
139 T get_sum() const { return sum; }
140 void reverse_join(ScanBody &a) { sum = f(a.sum, sum); }
141 void assign(ScanBody &b) { sum = b.sum; }
142};
143
144template <typename InputIter, typename OutputIter, typename P>
145struct CopyIfScanBody {
146 size_t sum;
147 P &pred;
148 InputIter input;
149 OutputIter output;
150
151 CopyIfScanBody(P &pred, InputIter input, OutputIter output)
152 : sum(0), pred(pred), input(input), output(output) {}
153 CopyIfScanBody(CopyIfScanBody &b, tbb::split)
154 : sum(0), pred(b.pred), input(b.input), output(b.output) {}
155 template <typename Tag>
156 void operator()(const tbb::blocked_range<size_t> &r, Tag) {
157 size_t temp = sum;
158 for (size_t i = r.begin(); i < r.end(); ++i) {
159 if (pred(i)) {
160 temp += 1;
161 if (Tag::is_final_scan()) output[temp - 1] = input[i];
162 }
163 }
164 sum = temp;
165 }
166 size_t get_sum() const { return sum; }
167 void reverse_join(CopyIfScanBody &a) { sum = a.sum + sum; }
168 void assign(CopyIfScanBody &b) { sum = b.sum; }
169};
170
171template <typename N, const int K>
172struct Hist {
173 using SizeType = N;
174 static constexpr int k = K;
175 N hist[k][256] = {{0}};
176 void merge(const Hist<N, K> &other) {
177 for (int i = 0; i < k; ++i)
178 for (int j = 0; j < 256; ++j) hist[i][j] += other.hist[i][j];
179 }
180 void prefixSum(N total, bool *canSkip) {
181 for (int i = 0; i < k; ++i) {
182 size_t count = 0;
183 for (int j = 0; j < 256; ++j) {
184 N tmp = hist[i][j];
185 hist[i][j] = count;
186 count += tmp;
187 if (tmp == total) canSkip[i] = true;
188 }
189 }
190 }
191};
192
193template <typename T, typename H>
194void histogram(T *ptr, typename H::SizeType n, H &hist) {
195 auto worker = [](T *ptr, typename H::SizeType n, H &hist) {
196 for (typename H::SizeType i = 0; i < n; ++i)
197 for (int k = 0; k < hist.k; ++k)
198 ++hist.hist[k][(ptr[i] >> (8 * k)) & 0xFF];
199 };
200 if (n < kSeqThreshold) {
201 worker(ptr, n, hist);
202 } else {
203 tbb::combinable<H> store;
204 tbb::parallel_for(
205 tbb::blocked_range<typename H::SizeType>(0, n, kSeqThreshold),
206 [&worker, &store, ptr](const auto &r) {
207 worker(ptr + r.begin(), r.end() - r.begin(), store.local());
208 });
209 store.combine_each([&hist](const H &h) { hist.merge(h); });
210 }
211}
212
213template <typename T, typename H>
214void shuffle(T *src, T *target, typename H::SizeType n, H &hist, int k) {
215 for (typename H::SizeType i = 0; i < n; ++i)
216 target[hist.hist[k][(src[i] >> (8 * k)) & 0xFF]++] = src[i];
217}
218
219template <typename T, typename SizeType>
220bool LSB_radix_sort(T *input, T *tmp, SizeType n) {
221 Hist<SizeType, sizeof(T) / sizeof(char)> hist;
222 if (std::is_sorted(input, input + n)) return false;
223 histogram(input, n, hist);
224 bool canSkip[hist.k] = {0};
225 hist.prefixSum(n, canSkip);
226 T *a = input, *b = tmp;
227 for (int k = 0; k < hist.k; ++k) {
228 if (!canSkip[k]) {
229 shuffle(a, b, n, hist, k);
230 std::swap(a, b);
231 }
232 }
233 return a == tmp;
234}
235
236// LSB radix sort with merge
237template <typename T, typename SizeType>
238struct SortedRange {
239 T *input, *tmp;
240 SizeType offset = 0, length = 0;
241 bool inTmp = false;
242
243 SortedRange(T *input, T *tmp, SizeType offset = 0, SizeType length = 0)
244 : input(input), tmp(tmp), offset(offset), length(length) {}
245 SortedRange(SortedRange<T, SizeType> &r, tbb::split)
246 : input(r.input), tmp(r.tmp) {}
247 void operator()(const tbb::blocked_range<SizeType> &range) {
248 SortedRange<T, SizeType> rhs(input, tmp, range.begin(),
249 range.end() - range.begin());
250 rhs.inTmp =
251 LSB_radix_sort(input + rhs.offset, tmp + rhs.offset, rhs.length);
252 if (length == 0)
253 *this = rhs;
254 else
255 join(rhs);
256 }
257 bool swapBuffer() const {
258 T *src = input, *target = tmp;
259 if (inTmp) std::swap(src, target);
260 copy(src + offset, src + offset + length, target + offset);
261 return !inTmp;
262 }
263 void join(const SortedRange<T, SizeType> &rhs) {
264 if (inTmp != rhs.inTmp) {
265 if (length < rhs.length)
266 inTmp = swapBuffer();
267 else
268 rhs.swapBuffer();
269 }
270 T *src = input, *target = tmp;
271 if (inTmp) std::swap(src, target);
272 if (src[offset + length - 1] > src[rhs.offset]) {
273 mergeRec(src, target, offset, offset + length, rhs.offset,
274 rhs.offset + rhs.length, offset, std::less<T>());
275 inTmp = !inTmp;
276 }
277 length += rhs.length;
278 }
279};
280
281template <typename T, typename SizeTy>
282void radix_sort(T *input, SizeTy n) {
283 T *aux = new T[n];
284 SizeTy blockSize = std::max(n / tbb::this_task_arena::max_concurrency() / 4,
285 static_cast<SizeTy>(kSeqThreshold / sizeof(T)));
286 SortedRange<T, SizeTy> result(input, aux);
287 tbb::parallel_reduce(tbb::blocked_range<SizeTy>(0, n, blockSize), result);
288 if (result.inTmp) copy(aux, aux + n, input);
289 delete[] aux;
290}
291
292template <typename Iterator,
293 typename T = typename std::iterator_traits<Iterator>::value_type,
294 typename Comp = decltype(std::less<T>())>
295void mergeSort(ExecutionPolicy policy, Iterator first, Iterator last,
296 Comp comp) {
297#if (MANIFOLD_PAR == 1)
298 if (policy == ExecutionPolicy::Par) {
299 // apparently this prioritizes threads inside here?
300 tbb::this_task_arena::isolate([&] {
301 size_t length = std::distance(first, last);
302 T *tmp = new T[length];
303 copy(policy, first, last, tmp);
304 details::mergeSortRec(tmp, first, 0, length, comp);
305 delete[] tmp;
306 });
307 return;
308 }
309#endif
310 std::stable_sort(first, last, comp);
311}
312
313// stable_sort using merge sort.
314//
315// For simpler implementation, we do not support types that are not trivially
316// destructable.
317template <typename Iterator,
318 typename T = typename std::iterator_traits<Iterator>::value_type,
319 typename Dummy = void>
320struct SortFunctor {
321 void operator()(ExecutionPolicy policy, Iterator first, Iterator last) {
322 static_assert(
323 std::is_convertible_v<
324 typename std::iterator_traits<Iterator>::iterator_category,
325 std::random_access_iterator_tag>,
326 "You can only parallelize RandomAccessIterator.");
327 static_assert(std::is_trivially_destructible_v<T>,
328 "Our simple implementation does not support types that are "
329 "not trivially destructable.");
330 return mergeSort(policy, first, last, std::less<T>());
331 }
332};
333
334// stable_sort specialized with radix sort for integral types.
335// Typically faster than merge sort.
336template <typename Iterator, typename T>
337struct SortFunctor<
338 Iterator, T,
339 std::enable_if_t<
340 std::is_integral_v<T> &&
341 std::is_pointer_v<typename std::iterator_traits<Iterator>::pointer>>> {
342 void operator()(ExecutionPolicy policy, Iterator first, Iterator last) {
343 static_assert(
344 std::is_convertible_v<
345 typename std::iterator_traits<Iterator>::iterator_category,
346 std::random_access_iterator_tag>,
347 "You can only parallelize RandomAccessIterator.");
348 static_assert(std::is_trivially_destructible_v<T>,
349 "Our simple implementation does not support types that are "
350 "not trivially destructable.");
351#if (MANIFOLD_PAR == 1)
352 if (policy == ExecutionPolicy::Par) {
353 radix_sort(&*first, static_cast<size_t>(std::distance(first, last)));
354 return;
355 }
356#endif
357 stable_sort(policy, first, last, std::less<T>());
358 }
359};
360
361} // namespace details
362
363#endif
364
365// Applies the function `f` to each element in the range `[first, last)`
366template <typename Iter, typename F>
367void for_each(ExecutionPolicy policy, Iter first, Iter last, F f) {
368 static_assert(std::is_convertible_v<
369 typename std::iterator_traits<Iter>::iterator_category,
370 std::random_access_iterator_tag>,
371 "You can only parallelize RandomAccessIterator.");
372#if (MANIFOLD_PAR == 1)
373 if (policy == ExecutionPolicy::Par) {
374 tbb::parallel_for(tbb::blocked_range<Iter>(first, last),
375 [&f](const tbb::blocked_range<Iter> &range) {
376 for (Iter i = range.begin(); i != range.end(); i++)
377 f(*i);
378 });
379 return;
380 }
381#endif
382 std::for_each(first, last, f);
383}
384
385// Applies the function `f` to each element in the range `[first, last)`
386template <typename Iter, typename F>
387void for_each_n(ExecutionPolicy policy, Iter first, size_t n, F f) {
388 static_assert(std::is_convertible_v<
389 typename std::iterator_traits<Iter>::iterator_category,
390 std::random_access_iterator_tag>,
391 "You can only parallelize RandomAccessIterator.");
392 for_each(policy, first, first + n, f);
393}
394
395// Reduce the range `[first, last)` using a binary operation `f` with an initial
396// value `init`.
397//
398// The binary operation should be commutative and associative. Otherwise, the
399// result is non-deterministic.
400template <typename InputIter, typename BinaryOp,
401 typename T = typename std::iterator_traits<InputIter>::value_type>
402T reduce(ExecutionPolicy policy, InputIter first, InputIter last, T init,
403 BinaryOp f) {
404 static_assert(std::is_convertible_v<
405 typename std::iterator_traits<InputIter>::iterator_category,
406 std::random_access_iterator_tag>,
407 "You can only parallelize RandomAccessIterator.");
408#if (MANIFOLD_PAR == 1)
409 if (policy == ExecutionPolicy::Par) {
410 // should we use deterministic reduce here?
411 return tbb::parallel_reduce(
412 tbb::blocked_range<InputIter>(first, last, details::kSeqThreshold),
413 init,
414 [&f](const tbb::blocked_range<InputIter> &range, T value) {
415 return std::reduce(range.begin(), range.end(), value, f);
416 },
417 f);
418 }
419#endif
420 return std::reduce(first, last, init, f);
421}
422
423// Reduce the range `[first, last)` using a binary operation `f` with an initial
424// value `init`.
425//
426// The binary operation should be commutative and associative. Otherwise, the
427// result is non-deterministic.
428template <typename InputIter, typename BinaryOp,
429 typename T = typename std::iterator_traits<InputIter>::value_type>
430T reduce(InputIter first, InputIter last, T init, BinaryOp f) {
431 return reduce(autoPolicy(first, last, 1e5), first, last, init, f);
432}
433
434// Transform and reduce the range `[first, last)` by first applying a unary
435// function `g`, and then combining the results using a binary operation `f`
436// with an initial value `init`.
437//
438// The binary operation should be commutative and associative. Otherwise, the
439// result is non-deterministic.
440template <typename InputIter, typename BinaryOp, typename UnaryOp,
441 typename T = std::invoke_result_t<
442 UnaryOp, typename std::iterator_traits<InputIter>::value_type>>
443T transform_reduce(ExecutionPolicy policy, InputIter first, InputIter last,
444 T init, BinaryOp f, UnaryOp g) {
445 return reduce(policy, TransformIterator(first, g), TransformIterator(last, g),
446 init, f);
447}
448
449// Transform and reduce the range `[first, last)` by first applying a unary
450// function `g`, and then combining the results using a binary operation `f`
451// with an initial value `init`.
452//
453// The binary operation should be commutative and associative. Otherwise, the
454// result is non-deterministic.
455template <typename InputIter, typename BinaryOp, typename UnaryOp,
456 typename T = std::invoke_result_t<
457 UnaryOp, typename std::iterator_traits<InputIter>::value_type>>
458T transform_reduce(InputIter first, InputIter last, T init, BinaryOp f,
459 UnaryOp g) {
460 return manifold::reduce(TransformIterator(first, g),
461 TransformIterator(last, g), init, f);
462}
463
464// Compute the inclusive prefix sum for the range `[first, last)`
465// using the summation operator, and store the result in the range
466// starting from `d_first`.
467//
468// The input range `[first, last)` and
469// the output range `[d_first, d_first + last - first)`
470// must be equal or non-overlapping.
471template <typename InputIter, typename OutputIter,
472 typename T = typename std::iterator_traits<InputIter>::value_type>
473void inclusive_scan(ExecutionPolicy policy, InputIter first, InputIter last,
474 OutputIter d_first) {
475 static_assert(std::is_convertible_v<
476 typename std::iterator_traits<InputIter>::iterator_category,
477 std::random_access_iterator_tag>,
478 "You can only parallelize RandomAccessIterator.");
479 static_assert(
480 std::is_convertible_v<
481 typename std::iterator_traits<OutputIter>::iterator_category,
482 std::random_access_iterator_tag>,
483 "You can only parallelize RandomAccessIterator.");
484#if (MANIFOLD_PAR == 1)
485 if (policy == ExecutionPolicy::Par) {
486 tbb::parallel_scan(
487 tbb::blocked_range<size_t>(0, std::distance(first, last)),
488 static_cast<T>(0),
489 [&](const tbb::blocked_range<size_t> &range, T sum,
490 bool is_final_scan) {
491 T temp = sum;
492 for (size_t i = range.begin(); i < range.end(); ++i) {
493 temp = temp + first[i];
494 if (is_final_scan) d_first[i] = temp;
495 }
496 return temp;
497 },
498 std::plus<T>());
499 return;
500 }
501#endif
502 std::inclusive_scan(first, last, d_first);
503}
504
505// Compute the inclusive prefix sum for the range `[first, last)` using the
506// summation operator, and store the result in the range
507// starting from `d_first`.
508//
509// The input range `[first, last)` and
510// the output range `[d_first, d_first + last - first)`
511// must be equal or non-overlapping.
512template <typename InputIter, typename OutputIter,
513 typename T = typename std::iterator_traits<InputIter>::value_type>
514void inclusive_scan(InputIter first, InputIter last, OutputIter d_first) {
515 return inclusive_scan(autoPolicy(first, last, 1e5), first, last, d_first);
516}
517
518// Compute the inclusive prefix sum for the range `[first, last)` using the
519// binary operator `f`, with initial value `init` and
520// identity element `identity`, and store the result in the range
521// starting from `d_first`.
522//
523// This is different from `exclusive_scan` in the sequential algorithm by
524// requiring an identity element. This is needed so that each block can be
525// scanned in parallel and combined later.
526//
527// The input range `[first, last)` and
528// the output range `[d_first, d_first + last - first)`
529// must be equal or non-overlapping.
530template <typename InputIter, typename OutputIter,
531 typename BinOp = decltype(std::plus<typename std::iterator_traits<
532 InputIter>::value_type>()),
533 typename T = typename std::iterator_traits<InputIter>::value_type>
534void exclusive_scan(ExecutionPolicy policy, InputIter first, InputIter last,
535 OutputIter d_first, T init = static_cast<T>(0),
536 BinOp f = std::plus<T>(), T identity = static_cast<T>(0)) {
537 static_assert(std::is_convertible_v<
538 typename std::iterator_traits<InputIter>::iterator_category,
539 std::random_access_iterator_tag>,
540 "You can only parallelize RandomAccessIterator.");
541 static_assert(
542 std::is_convertible_v<
543 typename std::iterator_traits<OutputIter>::iterator_category,
544 std::random_access_iterator_tag>,
545 "You can only parallelize RandomAccessIterator.");
546#if (MANIFOLD_PAR == 1)
547 if (policy == ExecutionPolicy::Par) {
548 details::ScanBody<T, InputIter, OutputIter, BinOp> body(init, identity, f,
549 first, d_first);
550 tbb::parallel_scan(
551 tbb::blocked_range<size_t>(0, std::distance(first, last)), body);
552 return;
553 }
554#endif
555 std::exclusive_scan(first, last, d_first, init, f);
556}
557
558// Compute the inclusive prefix sum for the range `[first, last)` using the
559// binary operator `f`, with initial value `init` and
560// identity element `identity`, and store the result in the range
561// starting from `d_first`.
562//
563// This is different from `exclusive_scan` in the sequential algorithm by
564// requiring an identity element. This is needed so that each block can be
565// scanned in parallel and combined later.
566//
567// The input range `[first, last)` and
568// the output range `[d_first, d_first + last - first)`
569// must be equal or non-overlapping.
570template <typename InputIter, typename OutputIter,
571 typename BinOp = decltype(std::plus<typename std::iterator_traits<
572 InputIter>::value_type>()),
573 typename T = typename std::iterator_traits<InputIter>::value_type>
574void exclusive_scan(InputIter first, InputIter last, OutputIter d_first,
575 T init = static_cast<T>(0), BinOp f = std::plus<T>(),
576 T identity = static_cast<T>(0)) {
577 exclusive_scan(autoPolicy(first, last, 1e5), first, last, d_first, init, f,
578 identity);
579}
580
581// Apply function `f` on the input range `[first, last)` and store the result in
582// the range starting from `d_first`.
583//
584// The input range `[first, last)` and
585// the output range `[d_first, d_first + last - first)`
586// must be equal or non-overlapping.
587template <typename InputIter, typename OutputIter, typename F>
588void transform(ExecutionPolicy policy, InputIter first, InputIter last,
589 OutputIter d_first, F f) {
590 static_assert(std::is_convertible_v<
591 typename std::iterator_traits<InputIter>::iterator_category,
592 std::random_access_iterator_tag>,
593 "You can only parallelize RandomAccessIterator.");
594 static_assert(
595 std::is_convertible_v<
596 typename std::iterator_traits<OutputIter>::iterator_category,
597 std::random_access_iterator_tag>,
598 "You can only parallelize RandomAccessIterator.");
599#if (MANIFOLD_PAR == 1)
600 if (policy == ExecutionPolicy::Par) {
601 tbb::parallel_for(tbb::blocked_range<size_t>(
602 0, static_cast<size_t>(std::distance(first, last))),
603 [&](const tbb::blocked_range<size_t> &range) {
604 std::transform(first + range.begin(),
605 first + range.end(),
606 d_first + range.begin(), f);
607 });
608 return;
609 }
610#endif
611 std::transform(first, last, d_first, f);
612}
613
614// Apply function `f` on the input range `[first, last)` and store the result in
615// the range starting from `d_first`.
616//
617// The input range `[first, last)` and
618// the output range `[d_first, d_first + last - first)`
619// must be equal or non-overlapping.
620template <typename InputIter, typename OutputIter, typename F>
621void transform(InputIter first, InputIter last, OutputIter d_first, F f) {
622 transform(autoPolicy(first, last, 1e5), first, last, d_first, f);
623}
624
625// Copy the input range `[first, last)` to the output range
626// starting from `d_first`.
627//
628// The input range `[first, last)` and
629// the output range `[d_first, d_first + last - first)`
630// must not overlap.
631template <typename InputIter, typename OutputIter>
632void copy(ExecutionPolicy policy, InputIter first, InputIter last,
633 OutputIter d_first) {
634 static_assert(std::is_convertible_v<
635 typename std::iterator_traits<InputIter>::iterator_category,
636 std::random_access_iterator_tag>,
637 "You can only parallelize RandomAccessIterator.");
638 static_assert(
639 std::is_convertible_v<
640 typename std::iterator_traits<OutputIter>::iterator_category,
641 std::random_access_iterator_tag>,
642 "You can only parallelize RandomAccessIterator.");
643#if (MANIFOLD_PAR == 1)
644 if (policy == ExecutionPolicy::Par) {
645 tbb::parallel_for(tbb::blocked_range<size_t>(
646 0, static_cast<size_t>(std::distance(first, last)),
647 details::kSeqThreshold),
648 [&](const tbb::blocked_range<size_t> &range) {
649 std::copy(first + range.begin(), first + range.end(),
650 d_first + range.begin());
651 });
652 return;
653 }
654#endif
655 std::copy(first, last, d_first);
656}
657
658// Copy the input range `[first, last)` to the output range
659// starting from `d_first`.
660//
661// The input range `[first, last)` and
662// the output range `[d_first, d_first + last - first)`
663// must not overlap.
664template <typename InputIter, typename OutputIter>
665void copy(InputIter first, InputIter last, OutputIter d_first) {
666 copy(autoPolicy(first, last, 1e6), first, last, d_first);
667}
668
669// Copy the input range `[first, first + n)` to the output range
670// starting from `d_first`.
671//
672// The input range `[first, first + n)` and
673// the output range `[d_first, d_first + n)`
674// must not overlap.
675template <typename InputIter, typename OutputIter>
676void copy_n(ExecutionPolicy policy, InputIter first, size_t n,
677 OutputIter d_first) {
678 copy(policy, first, first + n, d_first);
679}
680
681// Copy the input range `[first, first + n)` to the output range
682// starting from `d_first`.
683//
684// The input range `[first, first + n)` and
685// the output range `[d_first, d_first + n)`
686// must not overlap.
687template <typename InputIter, typename OutputIter>
688void copy_n(InputIter first, size_t n, OutputIter d_first) {
689 copy(autoPolicy(n, 1e6), first, first + n, d_first);
690}
691
692// Fill the range `[first, last)` with `value`.
693template <typename OutputIter, typename T>
694void fill(ExecutionPolicy policy, OutputIter first, OutputIter last, T value) {
695 static_assert(
696 std::is_convertible_v<
697 typename std::iterator_traits<OutputIter>::iterator_category,
698 std::random_access_iterator_tag>,
699 "You can only parallelize RandomAccessIterator.");
700#if (MANIFOLD_PAR == 1)
701 if (policy == ExecutionPolicy::Par) {
702 tbb::parallel_for(tbb::blocked_range<OutputIter>(first, last),
703 [&](const tbb::blocked_range<OutputIter> &range) {
704 std::fill(range.begin(), range.end(), value);
705 });
706 return;
707 }
708#endif
709 std::fill(first, last, value);
710}
711
712// Fill the range `[first, last)` with `value`.
713template <typename OutputIter, typename T>
714void fill(OutputIter first, OutputIter last, T value) {
715 fill(autoPolicy(first, last, 5e5), first, last, value);
716}
717
718// Count the number of elements in the input range `[first, last)` satisfying
719// predicate `pred`, i.e. `pred(x) == true`.
720template <typename InputIter, typename P>
721size_t count_if(ExecutionPolicy policy, InputIter first, InputIter last,
722 P pred) {
723#if (MANIFOLD_PAR == 1)
724 if (policy == ExecutionPolicy::Par) {
725 return reduce(policy, TransformIterator(first, pred),
726 TransformIterator(last, pred), 0, std::plus<size_t>());
727 }
728#endif
729 return std::count_if(first, last, pred);
730}
731
732// Count the number of elements in the input range `[first, last)` satisfying
733// predicate `pred`, i.e. `pred(x) == true`.
734template <typename InputIter, typename P>
735size_t count_if(InputIter first, InputIter last, P pred) {
736 return count_if(autoPolicy(first, last, 1e4), first, last, pred);
737}
738
739// Check if all elements in the input range `[first, last)` satisfy
740// predicate `pred`, i.e. `pred(x) == true`.
741template <typename InputIter, typename P>
742bool all_of(ExecutionPolicy policy, InputIter first, InputIter last, P pred) {
743 static_assert(std::is_convertible_v<
744 typename std::iterator_traits<InputIter>::iterator_category,
745 std::random_access_iterator_tag>,
746 "You can only parallelize RandomAccessIterator.");
747#if (MANIFOLD_PAR == 1)
748 if (policy == ExecutionPolicy::Par) {
749 // should we use deterministic reduce here?
750 return tbb::parallel_reduce(
751 tbb::blocked_range<InputIter>(first, last), true,
752 [&](const tbb::blocked_range<InputIter> &range, bool value) {
753 if (!value) return false;
754 for (InputIter i = range.begin(); i != range.end(); i++)
755 if (!pred(*i)) return false;
756 return true;
757 },
758 [](bool a, bool b) { return a && b; });
759 }
760#endif
761 return std::all_of(first, last, pred);
762}
763
764// Check if all elements in the input range `[first, last)` satisfy
765// predicate `pred`, i.e. `pred(x) == true`.
766template <typename InputIter, typename P>
767bool all_of(InputIter first, InputIter last, P pred) {
768 return all_of(autoPolicy(first, last, 1e5), first, last, pred);
769}
770
771// Copy values in the input range `[first, last)` to the output range
772// starting from `d_first` that satisfies the predicate `pred`,
773// i.e. `pred(x) == true`, and returns `d_first + n` where `n` is the number of
774// times the predicate is evaluated to true.
775//
776// This function is stable, meaning that the relative order of elements in the
777// output range remains unchanged.
778//
779// The input range `[first, last)` and
780// the output range `[d_first, d_first + last - first)`
781// must not overlap.
782template <typename InputIter, typename OutputIter, typename P>
783OutputIter copy_if(ExecutionPolicy policy, InputIter first, InputIter last,
784 OutputIter d_first, P pred) {
785 static_assert(std::is_convertible_v<
786 typename std::iterator_traits<InputIter>::iterator_category,
787 std::random_access_iterator_tag>,
788 "You can only parallelize RandomAccessIterator.");
789 static_assert(
790 std::is_convertible_v<
791 typename std::iterator_traits<OutputIter>::iterator_category,
792 std::random_access_iterator_tag>,
793 "You can only parallelize RandomAccessIterator.");
794#if (MANIFOLD_PAR == 1)
795 if (policy == ExecutionPolicy::Par) {
796 auto pred2 = [&](size_t i) { return pred(first[i]); };
797 details::CopyIfScanBody body(pred2, first, d_first);
798 tbb::parallel_scan(
799 tbb::blocked_range<size_t>(0, std::distance(first, last)), body);
800 return d_first + body.get_sum();
801 }
802#endif
803 return std::copy_if(first, last, d_first, pred);
804}
805
806// Copy values in the input range `[first, last)` to the output range
807// starting from `d_first` that satisfies the predicate `pred`, i.e. `pred(x) ==
808// true`, and returns `d_first + n` where `n` is the number of times the
809// predicate is evaluated to true.
810//
811// This function is stable, meaning that the relative order of elements in the
812// output range remains unchanged.
813//
814// The input range `[first, last)` and
815// the output range `[d_first, d_first + last - first)`
816// must not overlap.
817template <typename InputIter, typename OutputIter, typename P>
818OutputIter copy_if(InputIter first, InputIter last, OutputIter d_first,
819 P pred) {
820 return copy_if(autoPolicy(first, last, 1e5), first, last, d_first, pred);
821}
822
823// Remove values in the input range `[first, last)` that satisfies
824// the predicate `pred`, i.e. `pred(x) == true`, and returns `first + n`
825// where `n` is the number of times the predicate is evaluated to false.
826//
827// This function is stable, meaning that the relative order of elements that
828// remained are unchanged.
829//
830// Only trivially destructable types are supported.
831template <typename Iter, typename P,
832 typename T = typename std::iterator_traits<Iter>::value_type>
833Iter remove_if(ExecutionPolicy policy, Iter first, Iter last, P pred) {
834 static_assert(std::is_convertible_v<
835 typename std::iterator_traits<Iter>::iterator_category,
836 std::random_access_iterator_tag>,
837 "You can only parallelize RandomAccessIterator.");
838 static_assert(std::is_trivially_destructible_v<T>,
839 "Our simple implementation does not support types that are "
840 "not trivially destructable.");
841#if (MANIFOLD_PAR == 1)
842 if (policy == ExecutionPolicy::Par) {
843 T *tmp = new T[std::distance(first, last)];
844 auto back =
845 copy_if(policy, first, last, tmp, [&](T v) { return !pred(v); });
846 copy(policy, tmp, back, first);
847 auto d = std::distance(tmp, back);
848 delete[] tmp;
849 return first + d;
850 }
851#endif
852 return std::remove_if(first, last, pred);
853}
854
855// Remove values in the input range `[first, last)` that satisfies
856// the predicate `pred`, i.e. `pred(x) == true`, and
857// returns `first + n` where `n` is the number of times the predicate is
858// evaluated to false.
859//
860// This function is stable, meaning that the relative order of elements that
861// remained are unchanged.
862//
863// Only trivially destructable types are supported.
864template <typename Iter, typename P,
865 typename T = typename std::iterator_traits<Iter>::value_type>
866Iter remove_if(Iter first, Iter last, P pred) {
867 return remove_if(autoPolicy(first, last, 1e4), first, last, pred);
868}
869
870// Remove values in the input range `[first, last)` that are equal to `value`.
871// Returns `first + n` where `n` is the number of values
872// that are not equal to `value`.
873//
874// This function is stable, meaning that the relative order of elements that
875// remained are unchanged.
876//
877// Only trivially destructable types are supported.
878template <typename Iter,
879 typename T = typename std::iterator_traits<Iter>::value_type>
880Iter remove(ExecutionPolicy policy, Iter first, Iter last, T value) {
881 static_assert(std::is_convertible_v<
882 typename std::iterator_traits<Iter>::iterator_category,
883 std::random_access_iterator_tag>,
884 "You can only parallelize RandomAccessIterator.");
885 static_assert(std::is_trivially_destructible_v<T>,
886 "Our simple implementation does not support types that are "
887 "not trivially destructable.");
888#if (MANIFOLD_PAR == 1)
889 if (policy == ExecutionPolicy::Par) {
890 T *tmp = new T[std::distance(first, last)];
891 auto back =
892 copy_if(policy, first, last, tmp, [&](T v) { return v != value; });
893 copy(policy, tmp, back, first);
894 auto d = std::distance(tmp, back);
895 delete[] tmp;
896 return first + d;
897 }
898#endif
899 return std::remove(first, last, value);
900}
901
902// Remove values in the input range `[first, last)` that are equal to `value`.
903// Returns `first + n` where `n` is the number of values
904// that are not equal to `value`.
905//
906// This function is stable, meaning that the relative order of elements that
907// remained are unchanged.
908//
909// Only trivially destructable types are supported.
910template <typename Iter,
911 typename T = typename std::iterator_traits<Iter>::value_type>
912Iter remove(Iter first, Iter last, T value) {
913 return remove(autoPolicy(first, last, 1e4), first, last, value);
914}
915
916// For each group of consecutive elements in the range `[first, last)` with the
917// same value, unique removes all but the first element of the group. The return
918// value is an iterator `new_last` such that no two consecutive elements in the
919// range `[first, new_last)` are equal.
920//
921// This function is stable, meaning that the relative order of elements that
922// remained are unchanged.
923//
924// Only trivially destructable types are supported.
925template <typename Iter,
926 typename T = typename std::iterator_traits<Iter>::value_type>
927Iter unique(ExecutionPolicy policy, Iter first, Iter last) {
928 static_assert(std::is_convertible_v<
929 typename std::iterator_traits<Iter>::iterator_category,
930 std::random_access_iterator_tag>,
931 "You can only parallelize RandomAccessIterator.");
932 static_assert(std::is_trivially_destructible_v<T>,
933 "Our simple implementation does not support types that are "
934 "not trivially destructable.");
935#if (MANIFOLD_PAR == 1)
936 if (policy == ExecutionPolicy::Par && first != last) {
937 Iter newSrcStart = first;
938 // cap the maximum buffer size, proved to be beneficial for unique with huge
939 // array size
940 constexpr size_t MAX_BUFFER_SIZE = 1 << 16;
941 T *tmp = new T[std::min(MAX_BUFFER_SIZE,
942 static_cast<size_t>(std::distance(first, last)))];
943 auto pred = [&](size_t i) { return tmp[i] != tmp[i + 1]; };
944 do {
945 size_t length =
946 std::min(MAX_BUFFER_SIZE,
947 static_cast<size_t>(std::distance(newSrcStart, last)));
948 copy(policy, newSrcStart, newSrcStart + length, tmp);
949 *first = *newSrcStart;
950 // this is not a typo, the index i is offset by 1, so to compare an
951 // element with its predecessor we need to compare i and i + 1.
952 details::CopyIfScanBody body(pred, tmp + 1, first + 1);
953 tbb::parallel_scan(tbb::blocked_range<size_t>(0, length - 1), body);
954 first += body.get_sum() + 1;
955 newSrcStart += length;
956 } while (newSrcStart != last);
957 delete[] tmp;
958 return first;
959 }
960#endif
961 return std::unique(first, last);
962}
963
964// For each group of consecutive elements in the range `[first, last)` with the
965// same value, unique removes all but the first element of the group. The return
966// value is an iterator `new_last` such that no two consecutive elements in the
967// range `[first, new_last)` are equal.
968//
969// This function is stable, meaning that the relative order of elements that
970// remained are unchanged.
971//
972// Only trivially destructable types are supported.
973template <typename Iter,
974 typename T = typename std::iterator_traits<Iter>::value_type>
975Iter unique(Iter first, Iter last) {
976 return unique(autoPolicy(first, last, 1e4), first, last);
977}
978
979// Sort the input range `[first, last)` in ascending order.
980//
981// This function is stable, meaning that the relative order of elements that are
982// incomparable remains unchanged.
983//
984// Only trivially destructable types are supported.
985template <typename Iterator,
986 typename T = typename std::iterator_traits<Iterator>::value_type>
987void stable_sort(ExecutionPolicy policy, Iterator first, Iterator last) {
988#if (MANIFOLD_PAR == 1)
989 details::SortFunctor<Iterator, T>()(policy, first, last);
990#else
991 std::stable_sort(first, last);
992#endif
993}
994
995// Sort the input range `[first, last)` in ascending order.
996//
997// This function is stable, meaning that the relative order of elements that are
998// incomparable remains unchanged.
999//
1000// Only trivially destructable types are supported.
1001template <typename Iterator,
1002 typename T = typename std::iterator_traits<Iterator>::value_type>
1003void stable_sort(Iterator first, Iterator last) {
1004 stable_sort(autoPolicy(first, last, 1e4), first, last);
1005}
1006
1007// Sort the input range `[first, last)` in ascending order using the comparison
1008// function `comp`.
1009//
1010// This function is stable, meaning that the relative order of elements that are
1011// incomparable remains unchanged.
1012//
1013// Only trivially destructable types are supported.
1014template <typename Iterator,
1015 typename T = typename std::iterator_traits<Iterator>::value_type,
1016 typename Comp = decltype(std::less<T>())>
1017void stable_sort(ExecutionPolicy policy, Iterator first, Iterator last,
1018 Comp comp) {
1019#if (MANIFOLD_PAR == 1)
1020 details::mergeSort(policy, first, last, comp);
1021#else
1022 std::stable_sort(first, last, comp);
1023#endif
1024}
1025
1026// Sort the input range `[first, last)` in ascending order using the comparison
1027// function `comp`.
1028//
1029// This function is stable, meaning that the relative order of elements that are
1030// incomparable remains unchanged.
1031//
1032// Only trivially destructable types are supported.
1033template <typename Iterator,
1034 typename T = typename std::iterator_traits<Iterator>::value_type,
1035 typename Comp = decltype(std::less<T>())>
1036void stable_sort(Iterator first, Iterator last, Comp comp) {
1037 stable_sort(autoPolicy(first, last, 1e4), first, last, comp);
1038}
1039
1040// `scatter` copies elements from a source range into an output array according
1041// to a map. For each iterator `i` in the range `[first, last)`, the value `*i`
1042// is assigned to `outputFirst[mapFirst[i - first]]`. If the same index appears
1043// more than once in the range `[mapFirst, mapFirst + (last - first))`, the
1044// result is undefined.
1045//
1046// The map range, input range and the output range must not overlap.
1047template <typename InputIterator1, typename InputIterator2,
1048 typename OutputIterator>
1049void scatter(ExecutionPolicy policy, InputIterator1 first, InputIterator1 last,
1050 InputIterator2 mapFirst, OutputIterator outputFirst) {
1051 for_each(policy, countAt(0),
1052 countAt(static_cast<size_t>(std::distance(first, last))),
1053 [first, mapFirst, outputFirst](size_t i) {
1054 outputFirst[mapFirst[i]] = first[i];
1055 });
1056}
1057
1058// `scatter` copies elements from a source range into an output array according
1059// to a map. For each iterator `i` in the range `[first, last)`, the value `*i`
1060// is assigned to `outputFirst[mapFirst[i - first]]`. If the same index appears
1061// more than once in the range `[mapFirst, mapFirst + (last - first))`,
1062// the result is undefined.
1063//
1064// The map range, input range and the output range must not overlap.
1065template <typename InputIterator1, typename InputIterator2,
1066 typename OutputIterator>
1067void scatter(InputIterator1 first, InputIterator1 last, InputIterator2 mapFirst,
1068 OutputIterator outputFirst) {
1069 scatter(autoPolicy(first, last, 1e5), first, last, mapFirst, outputFirst);
1070}
1071
1072// `gather` copies elements from a source array into a destination range
1073// according to a map. For each input iterator `i`
1074// in the range `[mapFirst, mapLast)`, the value `inputFirst[*i]`
1075// is assigned to `outputFirst[i - map_first]`.
1076//
1077// The map range, input range and the output range must not overlap.
1078template <typename InputIterator, typename RandomAccessIterator,
1079 typename OutputIterator>
1080void gather(ExecutionPolicy policy, InputIterator mapFirst,
1081 InputIterator mapLast, RandomAccessIterator inputFirst,
1082 OutputIterator outputFirst) {
1083 for_each(policy, countAt(0),
1084 countAt(static_cast<size_t>(std::distance(mapFirst, mapLast))),
1085 [mapFirst, inputFirst, outputFirst](size_t i) {
1086 outputFirst[i] = inputFirst[mapFirst[i]];
1087 });
1088}
1089
1090// `gather` copies elements from a source array into a destination range
1091// according to a map. For each input iterator `i`
1092// in the range `[mapFirst, mapLast)`, the value `inputFirst[*i]`
1093// is assigned to `outputFirst[i - map_first]`.
1094//
1095// The map range, input range and the output range must not overlap.
1096template <typename InputIterator, typename RandomAccessIterator,
1097 typename OutputIterator>
1098void gather(InputIterator mapFirst, InputIterator mapLast,
1099 RandomAccessIterator inputFirst, OutputIterator outputFirst) {
1100 gather(autoPolicy(std::distance(mapFirst, mapLast), 1e5), mapFirst, mapLast,
1101 inputFirst, outputFirst);
1102}
1103
1104// Write `[0, last - first)` to the range `[first, last)`.
1105template <typename Iterator>
1106void sequence(ExecutionPolicy policy, Iterator first, Iterator last) {
1107 for_each(policy, countAt(0),
1108 countAt(static_cast<size_t>(std::distance(first, last))),
1109 [first](size_t i) { first[i] = i; });
1110}
1111
1112// Write `[0, last - first)` to the range `[first, last)`.
1113template <typename Iterator>
1114void sequence(Iterator first, Iterator last) {
1115 sequence(autoPolicy(first, last, 1e5), first, last);
1116}
1117
1118} // namespace manifold
Definition common.h:22