Sacado Package Browser (Single Doxygen Collection)  Version of the Day
Fad_KokkosAtomicTests.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Sacado Package
5 // Copyright (2006) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // This library is free software; you can redistribute it and/or modify
11 // it under the terms of the GNU Lesser General Public License as
12 // published by the Free Software Foundation; either version 2.1 of the
13 // License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful, but
16 // WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
23 // USA
24 // Questions? Contact David M. Gay (dmgay@sandia.gov) or Eric T. Phipps
25 // (etphipp@sandia.gov).
26 //
27 // ***********************************************************************
28 // @HEADER
29 #include "Teuchos_TestingHelpers.hpp"
30 
31 #include "Sacado.hpp"
32 
33 template <typename T>
34 struct is_dfad {
35  static const bool value = false;
36 };
37 
38 template <typename T>
39 struct is_dfad< Sacado::Fad::Exp::DFad<T> > {
40  static const bool value = true;
41 };
42 
43 template <typename FadType1, typename FadType2>
44 bool checkFads(const FadType1& x, const FadType2& x2,
45  Teuchos::FancyOStream& out, double tol = 1.0e-15)
46 {
47  bool success = true;
48 
49  // Check sizes match
50  TEUCHOS_TEST_EQUALITY(x.size(), x2.size(), out, success);
51 
52  // Check values match
53  TEUCHOS_TEST_FLOATING_EQUALITY(x.val(), x2.val(), tol, out, success);
54 
55  // Check derivatives match
56  for (int i=0; i<x.size(); ++i)
57  TEUCHOS_TEST_FLOATING_EQUALITY(x.dx(i), x2.dx(i), tol, out, success);
58 
59  return success;
60 }
61 
62 template <typename fadtype, typename ordinal>
63 inline
64 fadtype generate_fad( const ordinal num_rows,
65  const ordinal num_cols,
66  const ordinal fad_size,
67  const ordinal row,
68  const ordinal col )
69 {
70  typedef typename fadtype::value_type scalar;
71  fadtype x(fad_size, scalar(0.0));
72 
73  const scalar x_row = 100.0 + scalar(num_rows) / scalar(row+1);
74  const scalar x_col = 10.0 + scalar(num_cols) / scalar(col+1);
75  x.val() = x_row + x_col;
76  for (ordinal i=0; i<fad_size; ++i) {
77  const scalar x_fad = 1.0 + scalar(fad_size) / scalar(i+1);
78  x.fastAccessDx(i) = x_row + x_col + x_fad;
79  }
80  return x;
81 }
82 
83 #ifndef GLOBAL_FAD_SIZE
84 #define GLOBAL_FAD_SIZE 5
85 #endif
86 const int global_num_rows = 11;
87 const int global_num_cols = 7;
89 
90 struct AddTag {
91  static double init() { return 0.0; }
92  template <typename T1, typename T2>
93  static auto apply(const T1& a, const T2& b) -> decltype(a+b)
94  {
95  return a+b;
96  }
97 };
98 struct SubTag {
99  static double init() { return 0.0; }
100  template <typename T1, typename T2>
101  static auto apply(const T1& a, const T2& b) -> decltype(a-b)
102  {
103  return a-b;
104  }
105 };
106 struct MulTag {
107  static double init() { return 1.0; }
108  template <typename T1, typename T2>
109  static auto apply(const T1& a, const T2& b) -> decltype(a*b)
110  {
111  return a*b;
112  }
113 };
114 struct DivTag {
115  static double init() { return 1.0; }
116  template <typename T1, typename T2>
117  static auto apply(const T1& a, const T2& b) -> decltype(a/b)
118  {
119  return a/b;
120  }
121 };
122 struct MaxTag {
123  static double init() { return 1.0; }
124  template <typename T1, typename T2>
125  static auto apply(const T1& a, const T2& b) -> decltype(max(a,b))
126  {
127  return max(a,b);
128  }
129 };
130 struct MinTag {
131  static double init() { return 1.0; }
132  template <typename T1, typename T2>
133  static auto apply(const T1& a, const T2& b) -> decltype(min(a,b))
134  {
135  return min(a,b);
136  }
137 };
138 
139 // Kernel to test atomic_add
140 template <typename ViewType, typename ScalarViewType, bool OperFetch>
141 struct AtomicKernel {
142  typedef typename ViewType::execution_space execution_space;
143  typedef typename ViewType::size_type size_type;
144  typedef typename Kokkos::TeamPolicy< execution_space>::member_type team_handle;
145  typedef typename Kokkos::ThreadLocalScalarType<ViewType>::type local_scalar_type;
146  static const size_type stride = Kokkos::ViewScalarStride<ViewType>::stride;
147 
148  const ViewType m_v;
149  const ScalarViewType m_s;
150 
151  AtomicKernel(const ViewType& v, const ScalarViewType& s) :
152  m_v(v), m_s(s) {};
153 
154  KOKKOS_INLINE_FUNCTION
155  void operator() (AddTag tag, const size_type i) const {
157  if (OperFetch)
158  Kokkos::atomic_add_fetch(&(m_s()), x);
159  else
160  Kokkos::atomic_fetch_add(&(m_s()), x);
161  }
162 
163  KOKKOS_INLINE_FUNCTION
164  void operator() (SubTag tag, const size_type i) const {
166  if (OperFetch)
167  Kokkos::atomic_sub_fetch(&(m_s()), x);
168  else
169  Kokkos::atomic_fetch_sub(&(m_s()), x);
170  }
171 
172  KOKKOS_INLINE_FUNCTION
173  void operator() (MulTag tag, const size_type i) const {
175  if (OperFetch)
176  Kokkos::atomic_mul_fetch(&(m_s()), x);
177  else
178  Kokkos::atomic_fetch_mul(&(m_s()), x);
179  }
180 
181  KOKKOS_INLINE_FUNCTION
182  void operator() (DivTag tag, const size_type i) const {
184  if (OperFetch)
185  Kokkos::atomic_div_fetch(&(m_s()), x);
186  else
187  Kokkos::atomic_fetch_div(&(m_s()), x);
188  }
189 
190  KOKKOS_INLINE_FUNCTION
191  void operator() (MaxTag tag, const size_type i) const {
193  if (OperFetch)
194  Kokkos::atomic_max_fetch(&(m_s()), x);
195  else
196  Kokkos::atomic_fetch_max(&(m_s()), x);
197  }
198 
199  KOKKOS_INLINE_FUNCTION
200  void operator() (MinTag tag, const size_type i) const {
202  if (OperFetch)
203  Kokkos::atomic_min_fetch(&(m_s()), x);
204  else
205  Kokkos::atomic_fetch_min(&(m_s()), x);
206  }
207 
208  template <typename Tag>
209  KOKKOS_INLINE_FUNCTION
210  void operator()( Tag tag, const team_handle& team ) const
211  {
212  const size_type i = team.league_rank()*team.team_size() + team.team_rank();
213  if (i < m_v.extent(0))
214  (*this)(tag, i);
215  }
216 
217  // Kernel launch
218  template <typename Tag>
219  static void apply(Tag tag, const ViewType& v, const ScalarViewType& s) {
220  const size_type nrow = v.extent(0);
221 
222 #if defined (KOKKOS_ENABLE_CUDA) && defined (SACADO_VIEW_CUDA_HIERARCHICAL)
223  const bool use_team =
226  ( stride > 1 );
227 #elif defined (KOKKOS_ENABLE_CUDA) && defined (SACADO_VIEW_CUDA_HIERARCHICAL_DFAD)
228  const bool use_team =
232 #else
233  const bool use_team = false;
234 #endif
235 
236  if (use_team) {
237  const size_type team_size = 256 / stride;
238  Kokkos::TeamPolicy<execution_space, Tag> policy(
239  (nrow+team_size-1)/team_size, team_size, stride );
240  Kokkos::parallel_for( policy, AtomicKernel(v,s) );
241  }
242  else {
243  Kokkos::RangePolicy<execution_space, Tag> policy( 0, nrow );
244  Kokkos::parallel_for( policy, AtomicKernel(v,s) );
245  }
246  }
247 };
248 
249 template <typename FadType, typename Layout, typename Device, bool OperFetch,
250  typename TagType>
251 bool testAtomic(const TagType& tag, Teuchos::FancyOStream& out)
252 {
253  typedef Kokkos::View<FadType*,Layout,Device> ViewType;
254  typedef Kokkos::View<FadType,Layout,Device> ScalarViewType;
255  typedef typename ViewType::size_type size_type;
256  typedef typename ViewType::HostMirror host_view_type;
257  typedef typename ScalarViewType::HostMirror host_scalar_view_type;
258 
259  const size_type num_rows = global_num_rows;
260  const size_type fad_size = global_fad_size;
261 
262  // Create and fill view
263  ViewType v;
264 #if defined (SACADO_DISABLE_FAD_VIEW_SPEC)
265  v = ViewType ("view", num_rows);
266 #else
267  v = ViewType ("view", num_rows, fad_size+1);
268 #endif
269  host_view_type h_v = Kokkos::create_mirror_view(v);
270  for (size_type i=0; i<num_rows; ++i)
271  h_v(i) =
272  generate_fad<FadType>(num_rows, size_type(1), fad_size, i, size_type(0));
273  Kokkos::deep_copy(v, h_v);
274 
275  // Create scalar view
276  ScalarViewType s;
277  FadType s0 = FadType(fad_size,tag.init());
278 #if defined (SACADO_DISABLE_FAD_VIEW_SPEC)
279  s = ScalarViewType ("scalar view");
280 #else
281  s = ScalarViewType ("scalar view", fad_size+1);
282 #endif
283  Kokkos::deep_copy( s, s0 );
284 
285  // Call atomic_add kernel, which adds up entries in v
287 
288  // Copy to host
289  host_scalar_view_type hs = Kokkos::create_mirror_view(s);
290  Kokkos::deep_copy(hs, s);
291 
292  // Compute correct result
293  FadType b = s0;
294  for (size_type i=0; i<num_rows; ++i)
295  b = tag.apply(b, h_v(i));
296 
297  // Check
298  bool success = checkFads(b, hs(), out);
299 
300  return success;
301 }
302 
303 // Test atomic_oper_fetch form
304 
306  Kokkos_View_Fad, AtomicAddFetch, FadType, Layout, Device )
307 {
308  success = testAtomic<FadType, Layout, Device, true>(AddTag(), out);
309 }
310 
312  Kokkos_View_Fad, AtomicSubFetch, FadType, Layout, Device )
313 {
314  success = testAtomic<FadType, Layout, Device, true>(SubTag(), out);
315 }
316 
318  Kokkos_View_Fad, AtomicMulFetch, FadType, Layout, Device )
319 {
320  success = testAtomic<FadType, Layout, Device, true>(MulTag(), out);
321 }
322 
324  Kokkos_View_Fad, AtomicDivFetch, FadType, Layout, Device )
325 {
326  success = testAtomic<FadType, Layout, Device, true>(DivTag(), out);
327 }
328 
330  Kokkos_View_Fad, AtomicMaxFetch, FadType, Layout, Device )
331 {
332  success = testAtomic<FadType, Layout, Device, true>(MaxTag(), out);
333 }
334 
336  Kokkos_View_Fad, AtomicMinFetch, FadType, Layout, Device )
337 {
338  success = testAtomic<FadType, Layout, Device, true>(MinTag(), out);
339 }
340 
341 // Test atomic_fetch_oper form
342 
344  Kokkos_View_Fad, AtomicFetchAdd, FadType, Layout, Device )
345 {
346  success = testAtomic<FadType, Layout, Device, false>(AddTag(), out);
347 }
348 
350  Kokkos_View_Fad, AtomicFetchSub, FadType, Layout, Device )
351 {
352  success = testAtomic<FadType, Layout, Device, false>(SubTag(), out);
353 }
354 
356  Kokkos_View_Fad, AtomicFetchMul, FadType, Layout, Device )
357 {
358  success = testAtomic<FadType, Layout, Device, false>(MulTag(), out);
359 }
360 
362  Kokkos_View_Fad, AtomicFetchDiv, FadType, Layout, Device )
363 {
364  success = testAtomic<FadType, Layout, Device, false>(DivTag(), out);
365 }
366 
368  Kokkos_View_Fad, AtomicFetchMax, FadType, Layout, Device )
369 {
370  success = testAtomic<FadType, Layout, Device, false>(MaxTag(), out);
371 }
372 
374  Kokkos_View_Fad, AtomicFetchMin, FadType, Layout, Device )
375 {
376  success = testAtomic<FadType, Layout, Device, false>(MinTag(), out);
377 }
378 
379 #define VIEW_FAD_TESTS_FLD( F, L, D ) \
380  TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( Kokkos_View_Fad, AtomicAddFetch, F, L, D ) \
381  TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( Kokkos_View_Fad, AtomicSubFetch, F, L, D ) \
382  TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( Kokkos_View_Fad, AtomicMulFetch, F, L, D ) \
383  TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( Kokkos_View_Fad, AtomicDivFetch, F, L, D ) \
384  TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( Kokkos_View_Fad, AtomicMaxFetch, F, L, D ) \
385  TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( Kokkos_View_Fad, AtomicMinFetch, F, L, D ) \
386  TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( Kokkos_View_Fad, AtomicFetchAdd, F, L, D ) \
387  TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( Kokkos_View_Fad, AtomicFetchSub, F, L, D ) \
388  TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( Kokkos_View_Fad, AtomicFetchMul, F, L, D ) \
389  TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( Kokkos_View_Fad, AtomicFetchDiv, F, L, D ) \
390  TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( Kokkos_View_Fad, AtomicFetchMax, F, L, D ) \
391  TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( Kokkos_View_Fad, AtomicFetchMin, F, L, D )
392 
393 using Kokkos::LayoutLeft;
394 using Kokkos::LayoutRight;
397 
398 #define VIEW_FAD_TESTS_FD( F, D ) \
399  VIEW_FAD_TESTS_FLD( F, LayoutLeft, D ) \
400  VIEW_FAD_TESTS_FLD( F, LayoutRight, D ) \
401  VIEW_FAD_TESTS_FLD( F, LeftContiguous, D ) \
402  VIEW_FAD_TESTS_FLD( F, RightContiguous, D )
403 
404 // Full set of atomics only implemented for new design
405 #if SACADO_ENABLE_NEW_DESIGN
409 
410 #if SACADO_TEST_DFAD
411 #define VIEW_FAD_TESTS_D( D ) \
412  VIEW_FAD_TESTS_FD( SFadType, D ) \
413  VIEW_FAD_TESTS_FD( SLFadType, D ) \
414  VIEW_FAD_TESTS_FD( DFadType, D )
415 #else
416 #define VIEW_FAD_TESTS_D( D ) \
417  VIEW_FAD_TESTS_FD( SFadType, D ) \
418  VIEW_FAD_TESTS_FD( SLFadType, D )
419 #endif
420 
421 #else
422 
423 #define VIEW_FAD_TESTS_D( D ) /* */
424 
425 #endif
const int global_num_cols
static double init()
KOKKOS_INLINE_FUNCTION void operator()(AddTag tag, const size_type i) const
static auto apply(const T1 &a, const T2 &b) -> decltype(a/b)
Kokkos::LayoutContiguous< Kokkos::LayoutRight > RightContiguous
Kokkos::LayoutContiguous< Kokkos::LayoutLeft > LeftContiguous
Kokkos::ThreadLocalScalarType< ViewType >::type local_scalar_type
static const bool value
Sacado::Fad::DFad< double > FadType
static double init()
ViewType::size_type size_type
const int global_fad_size
AtomicKernel(const ViewType &v, const ScalarViewType &s)
Sacado::Fad::SFad< double, fad_dim > SFadType
static double init()
static const size_type stride
static auto apply(const T1 &a, const T2 &b) -> decltype(a-b)
const int global_num_rows
#define T2(r, f)
Definition: Sacado_rad.hpp:578
GeneralFad< DynamicStorage< T > > DFad
SimpleFad< ValueT > min(const SimpleFad< ValueT > &a, const SimpleFad< ValueT > &b)
Forward-mode AD class templated on the storage for the derivative array.
static auto apply(const T1 &a, const T2 &b) -> decltype(a+b)
static double init()
static double init()
#define T1(r, f)
Definition: Sacado_rad.hpp:603
Sacado::Fad::SLFad< double, fad_dim > SLFadType
Sacado::Fad::DFad< double > DFadType
KOKKOS_INLINE_FUNCTION void operator()(Tag tag, const team_handle &team) const
static double init()
bool checkFads(const FadType1 &x, const FadType2 &x2, Teuchos::FancyOStream &out, double tol=1.0e-15)
static auto apply(const T1 &a, const T2 &b) -> decltype(min(a, b))
static void apply(Tag tag, const ViewType &v, const ScalarViewType &s)
#define GLOBAL_FAD_SIZE
int value
Kokkos::TeamPolicy< execution_space >::member_type team_handle
const ScalarViewType m_s
const double tol
SimpleFad< ValueT > max(const SimpleFad< ValueT > &a, const SimpleFad< ValueT > &b)
ViewType::execution_space execution_space
fadtype generate_fad(const ordinal num_rows, const ordinal num_cols, const ordinal fad_size, const ordinal row, const ordinal col)
TEUCHOS_UNIT_TEST_TEMPLATE_3_DECL(Kokkos_View_Fad, AtomicAddFetch, FadType, Layout, Device)
bool testAtomic(const TagType &tag, Teuchos::FancyOStream &out)
static auto apply(const T1 &a, const T2 &b) -> decltype(a *b)
static auto apply(const T1 &a, const T2 &b) -> decltype(max(a, b))