DGtal  1.5.beta
exampleHyperRectDomainParallelScan.cpp
Go to the documentation of this file.
1 
32 // Inclusions
33 
34 #include <numeric>
35 #include <iterator>
36 #include <chrono>
37 #include <string>
38 #include <iostream>
39 #include <iomanip>
40 #include <omp.h>
41 
42 #include "DGtal/base/Common.h"
43 #include "DGtal/kernel/SpaceND.h"
44 #include "DGtal/kernel/PointVector.h"
45 #include "DGtal/kernel/domains/HyperRectDomain.h"
46 #include "DGtal/images/ImageContainerBySTLVector.h"
47 #include "DGtal/base/SimpleConstRange.h"
48 
50 using namespace DGtal;
51 
52 // Timer used in tic and toc
53 auto tic_timer = std::chrono::high_resolution_clock::now();
54 
55 // Starts timer
56 void tic()
57 {
58  tic_timer = std::chrono::high_resolution_clock::now();
59 }
60 
61 // Ends timer and return elapsed time
62 double toc()
63 {
64  const auto toc_timer = std::chrono::high_resolution_clock::now();
65  const std::chrono::duration<double> time_span = toc_timer - tic_timer;
66  return time_span.count();
67 }
68 
70 // Splits range in given parts count and returns the part of given idx
71 template <typename TIterator>
73 split_range(TIterator it_begin, TIterator it_end, std::size_t idx, std::size_t count)
74 {
75  auto range_size = std::distance(it_begin, it_end);
76  auto begin_shift = (range_size*idx) / count;
77  auto end_shift = (range_size*(idx+1)) / count;
78  return { it_begin + begin_shift, it_begin + end_shift };
79 }
80 
81 // Splits range in given parts count and returns the part of given idx
82 template <typename TIterable>
83 auto
84 split_range(TIterable & iterable, std::size_t idx, std::size_t count)
85  -> decltype(split_range(iterable.begin(), iterable.end(), idx, count))
86 {
87  return split_range(iterable.begin(), iterable.end(), idx, count);
88 }
90 
91 // Returns a kind of checksum of a given image (to avoid aggressive optimization)
92 template <typename Image>
93 typename Image::Value calc_image_checksum(Image const& image)
94 {
95  typename Image::Value sum = 0;
96 
97  #pragma omp parallel reduction(+:sum)
98  {
99  std::size_t thread_idx = omp_get_thread_num();
100  std::size_t thread_cnt = omp_get_max_threads();
101  auto const range = split_range(image, thread_idx, thread_cnt);
102  sum = std::accumulate(range.begin(), range.end(), sum);
103  }
104 
105  return sum;
106 }
107 
108 // Sum a function over a given domain
109 template <typename Domain, typename Function>
110 auto sum_fn_on_domain(Domain const& domain, Function const& fn)
111  -> decltype(fn(domain.lowerBound()))
112 {
114  auto sum = 0 * fn(domain.lowerBound()); // To initialize the sum depending on the function return type
115 
116  #pragma omp parallel reduction(+:sum)
117  {
118  // OpenMP context
119  std::size_t thread_idx = omp_get_thread_num();
120  std::size_t thread_cnt = omp_get_num_threads();
121 
122  for (auto const& pt : split_range(domain, thread_idx, thread_cnt))
123  sum += fn(pt);
124  }
126 
127  return sum;
128 }
129 
130 // Initialize an image with a given function, using getter and setter
131 template <typename Image, typename Function>
132 void init_image_getset(Image & image, Function const& fn)
133 {
135  #pragma omp parallel
136  {
137  std::size_t thread_idx = omp_get_thread_num();
138  std::size_t thread_cnt = omp_get_num_threads();
139 
140  for (auto const& pt : split_range(image.domain(), thread_idx, thread_cnt))
141  image.setValue(pt, fn(pt, image(pt)));
142  }
144 }
145 
146 // Initialize an image with a given function, using iterators
147 template <typename Image, typename Function>
148 void init_image_iter(Image & image, Function const& fn)
149 {
151  #pragma omp parallel
152  {
153  std::size_t thread_idx = omp_get_thread_num();
154  std::size_t thread_cnt = omp_get_num_threads();
155 
156  auto domain_it = split_range(image.domain(), thread_idx, thread_cnt).begin();
157  for (auto & v : split_range(image, thread_idx, thread_cnt))
158  {
159  v = fn(*domain_it, v);
160  ++domain_it;
161  }
162  }
164 }
165 
166 int main(int argc, char* argv[])
167 {
168  using Space = SpaceND<3>;
169  using Point = Space::Point;
171  using Value = double;
173 
174  if (argc < 2)
175  {
176  std::cerr << "Usage: " << argv[0] << " <domain_size>" << std::endl;
177  return 1;
178  }
179 
180  trace.info() << "Initialization..." << std::endl;
181  std::size_t domain_size = std::stoll(argv[1]);
182  Domain domain(Point::diagonal(0), Point::diagonal(domain_size-1));
183  Image image(domain);
184 
185  double ref_duration = 0;
186  std::size_t max_threads = omp_get_max_threads();
187  trace.info() << std::fixed << std::setprecision(6);
188 
190  // Choose here the function you want to use
191 
192  //auto const fn = [&domain] (Point const& pt) { return 25 * ( std::cos( (pt - domain.upperBound()).norm() ) + 1 ); }; // CPU intensive
193  auto const fn = [&domain] (Point const& pt) { return (pt - domain.upperBound()).norm(); }; // Mixed
194  //auto const fn = [] (Point const& pt) { return Value(pt[0]); }; // Memory bound
195 
196 
198  // Scanning a domain in parallel
199 
200  trace.info() << std::endl;
201  trace.info() << "Scanning a domain in parallel..." << std::endl;
202  for (std::size_t thread_cnt = 1; thread_cnt <= max_threads; ++thread_cnt)
203  {
204  omp_set_num_threads(thread_cnt);
205  Value sum = sum_fn_on_domain(domain, fn);
206  tic();
207  sum += sum_fn_on_domain(domain, fn);
208  const double duration = toc();
209 
210  if (thread_cnt == 1)
211  ref_duration = duration;
212 
213  trace.info() << "\tthreads: " << thread_cnt
214  << "\tduration: " << duration << "s"
215  << "\tspeed: " << 1e-6 * domain.size() / duration << "Mpt/s"
216  << "\tspeedup: " << ref_duration/duration
217  << "\tchecksum: " << sum
218  << std::endl;
219  }
220 
221 
223  // Initializing an image in parallel using getter and setter
224 
225  trace.info() << std::endl;
226  trace.info() << "Initializing an image in parallel using getter and setter..." << std::endl;
227  for (std::size_t thread_cnt = 1; thread_cnt <= max_threads; ++thread_cnt)
228  {
229  omp_set_num_threads(thread_cnt);
230  init_image_getset(image, [&fn] (Point const& pt, Value) { return fn(pt); });
231  tic();
232  init_image_getset(image, [&fn] (Point const& pt, Value v) { return v + fn(pt); });
233  const double duration = toc();
234 
235  if (thread_cnt == 1)
236  ref_duration = duration;
237 
238  trace.info() << "\tthreads: " << thread_cnt
239  << "\tduration: " << duration << "s"
240  << "\tspeed: " << 1e-6 * domain.size() / duration << "Mpt/s"
241  << "\tspeedup: " << ref_duration/duration
242  << "\tchecksum: " << calc_image_checksum(image)
243  << std::endl;
244  }
245 
246 
248  // Initializing an image in parallel using iterators
249 
250  trace.info() << std::endl;
251  trace.info() << "Initializing an image in parallel using iterators..." << std::endl;
252  for (std::size_t thread_cnt = 1; thread_cnt <= max_threads; ++thread_cnt)
253  {
254  omp_set_num_threads(thread_cnt);
255  init_image_iter(image, [&fn] (Point const& pt, Value) { return fn(pt); });
256  tic();
257  init_image_iter(image, [&fn] (Point const& pt, Value v) { return v + fn(pt); });
258  const double duration = toc();
259 
260  if (thread_cnt == 1)
261  ref_duration = duration;
262 
263  trace.info() << "\tthreads: " << thread_cnt
264  << "\tduration: " << duration << "s"
265  << "\tspeed: " << 1e-6 * domain.size() / duration << "Mpt/s"
266  << "\tspeedup: " << ref_duration/duration
267  << "\tchecksum: " << calc_image_checksum(image)
268  << std::endl;
269  }
270 
271  return 0;
272 }
const Point & lowerBound() const
const Point & upperBound() const
Aim: implements association bewteen points lying in a digital domain and values.
Definition: Image.h:70
Aim: model of CConstRange that adapts any range of elements bounded by two iterators [itb,...
std::ostream & info()
int main(int argc, char *argv[])
Image::Value calc_image_checksum(Image const &image)
[split_range]
void init_image_iter(Image &image, Function const &fn)
void init_image_getset(Image &image, Function const &fn)
SimpleConstRange< TIterator > split_range(TIterator it_begin, TIterator it_end, std::size_t idx, std::size_t count)
[split_range]
auto sum_fn_on_domain(Domain const &domain, Function const &fn) -> decltype(fn(domain.lowerBound()))
DGtal is the top-level namespace which contains all DGtal functions and types.
Trace trace
Definition: Common.h:153
MyPointD Point
Definition: testClone2.cpp:383
Domain domain
Image image(domain)