Ifpack2 Templated Preconditioning Package  Version 1.0
Ifpack2_ILUT_def.hpp
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 //@HEADER
41 */
42 
43 #ifndef IFPACK2_ILUT_DEF_HPP
44 #define IFPACK2_ILUT_DEF_HPP
45 
46 // disable clang warnings
47 #if defined (__clang__) && !defined (__INTEL_COMPILER)
48 #pragma clang system_header
49 #endif
50 
51 #include "Ifpack2_Heap.hpp"
52 #include "Ifpack2_LocalFilter.hpp"
53 #include "Ifpack2_LocalSparseTriangularSolver.hpp"
54 #include "Ifpack2_Parameters.hpp"
55 #include "Tpetra_CrsMatrix.hpp"
56 #include "Teuchos_Time.hpp"
58 
59 namespace Ifpack2 {
60 
61  namespace {
62 
87  template<class ScalarType>
89  ilutDefaultDropTolerance () {
90  using Teuchos::as;
92  typedef typename STS::magnitudeType magnitude_type;
94 
95  // 1/2. Hopefully this can be represented in magnitude_type.
96  const magnitude_type oneHalf = STM::one() / (STM::one() + STM::one());
97 
98  // The min ensures that in case magnitude_type has very low
99  // precision, we'll at least get some value strictly less than
100  // one.
101  return std::min (as<magnitude_type> (1000) * STS::magnitude (STS::eps ()), oneHalf);
102  }
103 
104  // Full specialization for ScalarType = double.
105  // This specialization preserves ILUT's previous default behavior.
106  template<>
108  ilutDefaultDropTolerance<double> () {
109  return 1e-12;
110  }
111 
112  } // namespace (anonymous)
113 
114 
115 template <class MatrixType>
117  A_ (A),
118  Athresh_ (Teuchos::ScalarTraits<magnitude_type>::zero ()),
119  Rthresh_ (Teuchos::ScalarTraits<magnitude_type>::one ()),
120  RelaxValue_ (Teuchos::ScalarTraits<magnitude_type>::zero ()),
121  LevelOfFill_ (1),
122  DropTolerance_ (ilutDefaultDropTolerance<scalar_type> ()),
123  InitializeTime_ (0.0),
124  ComputeTime_ (0.0),
125  ApplyTime_ (0.0),
126  NumInitialize_ (0),
127  NumCompute_ (0),
128  NumApply_ (0),
129  IsInitialized_ (false),
130  IsComputed_ (false)
131 {
132  allocateSolvers();
133 }
134 
135 template <class MatrixType>
137 {}
138 
139 template<class MatrixType>
141 {
142  L_solver_ = Teuchos::rcp (new LocalSparseTriangularSolver<row_matrix_type> ());
143  U_solver_ = Teuchos::rcp (new LocalSparseTriangularSolver<row_matrix_type> ());
144 }
145 
146 
147 template <class MatrixType>
149 {
150  using Teuchos::as;
153 
154  // Default values of the various parameters.
155  int fillLevel = 1;
156  magnitude_type absThresh = STM::zero ();
157  magnitude_type relThresh = STM::one ();
158  magnitude_type relaxValue = STM::zero ();
159  magnitude_type dropTol = ilutDefaultDropTolerance<scalar_type> ();
160 
161  bool gotFillLevel = false;
162  try {
163  // Try getting the fill level as an int.
164  fillLevel = params.get<int> ("fact: ilut level-of-fill");
165  gotFillLevel = true;
166  }
167  catch (InvalidParameterName&) {
168  // We didn't really get it, but this tells us to stop looking.
169  gotFillLevel = true;
170  }
171  catch (InvalidParameterType&) {
172  // The name is right, but the type is wrong; try different types.
173  // We don't have to check InvalidParameterName again, since we
174  // checked it above, and it has nothing to do with the type.
175  }
176 
177  if (! gotFillLevel) {
178  // Try magnitude_type, for backwards compatibility.
179  try {
180  fillLevel = as<int> (params.get<magnitude_type> ("fact: ilut level-of-fill"));
181  }
182  catch (InvalidParameterType&) {}
183  }
184  if (! gotFillLevel) {
185  // Try double, for backwards compatibility.
186  try {
187  fillLevel = as<int> (params.get<double> ("fact: ilut level-of-fill"));
188  }
189  catch (InvalidParameterType&) {}
190  }
191  // If none of the above attempts succeed, accept the default value.
192 
194  fillLevel <= 0, std::runtime_error,
195  "Ifpack2::ILUT: The \"fact: ilut level-of-fill\" parameter must be "
196  "strictly greater than zero, but you specified a value of " << fillLevel
197  << ". Remember that for ILUT, the fill level p means something different "
198  "than it does for ILU(k). ILU(0) produces factors with the same sparsity "
199  "structure as the input matrix A; ILUT with p = 0 always produces a "
200  "diagonal matrix, and is thus probably not what you want.");
201 
202  try {
203  absThresh = params.get<magnitude_type> ("fact: absolute threshold");
204  }
205  catch (InvalidParameterType&) {
206  // Try double, for backwards compatibility.
207  // The cast from double to magnitude_type must succeed.
208  absThresh = as<magnitude_type> (params.get<double> ("fact: absolute threshold"));
209  }
210  catch (InvalidParameterName&) {
211  // Accept the default value.
212  }
213 
214  try {
215  relThresh = params.get<magnitude_type> ("fact: relative threshold");
216  }
217  catch (InvalidParameterType&) {
218  // Try double, for backwards compatibility.
219  // The cast from double to magnitude_type must succeed.
220  relThresh = as<magnitude_type> (params.get<double> ("fact: relative threshold"));
221  }
222  catch (InvalidParameterName&) {
223  // Accept the default value.
224  }
225 
226  try {
227  relaxValue = params.get<magnitude_type> ("fact: relax value");
228  }
229  catch (InvalidParameterType&) {
230  // Try double, for backwards compatibility.
231  // The cast from double to magnitude_type must succeed.
232  relaxValue = as<magnitude_type> (params.get<double> ("fact: relax value"));
233  }
234  catch (InvalidParameterName&) {
235  // Accept the default value.
236  }
237 
238  try {
239  dropTol = params.get<magnitude_type> ("fact: drop tolerance");
240  }
241  catch (InvalidParameterType&) {
242  // Try double, for backwards compatibility.
243  // The cast from double to magnitude_type must succeed.
244  dropTol = as<magnitude_type> (params.get<double> ("fact: drop tolerance"));
245  }
246  catch (InvalidParameterName&) {
247  // Accept the default value.
248  }
249 
250  // Forward to trisolvers.
251  L_solver_->setParameters(params);
252  U_solver_->setParameters(params);
253 
254  // "Commit" the values only after validating all of them. This
255  // ensures that there are no side effects if this routine throws an
256  // exception.
257 
258  // mfh 28 Nov 2012: The previous code would not assign Athresh_,
259  // Rthresh_, RelaxValue_, or DropTolerance_ if the read-in value was
260  // -1. I don't know if keeping this behavior is correct, but I'll
261  // keep it just so as not to change previous behavior.
262 
263  LevelOfFill_ = fillLevel;
264  if (absThresh != -STM::one ()) {
265  Athresh_ = absThresh;
266  }
267  if (relThresh != -STM::one ()) {
268  Rthresh_ = relThresh;
269  }
270  if (relaxValue != -STM::one ()) {
271  RelaxValue_ = relaxValue;
272  }
273  if (dropTol != -STM::one ()) {
274  DropTolerance_ = dropTol;
275  }
276 }
277 
278 
279 template <class MatrixType>
283  A_.is_null (), std::runtime_error, "Ifpack2::ILUT::getComm: "
284  "The matrix is null. Please call setMatrix() with a nonnull input "
285  "before calling this method.");
286  return A_->getComm ();
287 }
288 
289 
290 template <class MatrixType>
293  return A_;
294 }
295 
296 
297 template <class MatrixType>
300 {
302  A_.is_null (), std::runtime_error, "Ifpack2::ILUT::getDomainMap: "
303  "The matrix is null. Please call setMatrix() with a nonnull input "
304  "before calling this method.");
305  return A_->getDomainMap ();
306 }
307 
308 
309 template <class MatrixType>
312 {
314  A_.is_null (), std::runtime_error, "Ifpack2::ILUT::getRangeMap: "
315  "The matrix is null. Please call setMatrix() with a nonnull input "
316  "before calling this method.");
317  return A_->getRangeMap ();
318 }
319 
320 
321 template <class MatrixType>
323  return true;
324 }
325 
326 
327 template <class MatrixType>
329  return NumInitialize_;
330 }
331 
332 
333 template <class MatrixType>
335  return NumCompute_;
336 }
337 
338 
339 template <class MatrixType>
341  return NumApply_;
342 }
343 
344 
345 template <class MatrixType>
347  return InitializeTime_;
348 }
349 
350 
351 template<class MatrixType>
353  return ComputeTime_;
354 }
355 
356 
357 template<class MatrixType>
359  return ApplyTime_;
360 }
361 
362 
363 template<class MatrixType>
366  A_.is_null (), std::runtime_error, "Ifpack2::ILUT::getNodeSmootherComplexity: "
367  "The input matrix A is null. Please call setMatrix() with a nonnull "
368  "input matrix, then call compute(), before calling this method.");
369  // ILUT methods cost roughly one apply + the nnz in the upper+lower triangles
370  return A_->getNodeNumEntries() + getNodeNumEntries();
371 }
372 
373 
374 template<class MatrixType>
376  return L_->getGlobalNumEntries () + U_->getGlobalNumEntries ();
377 }
378 
379 
380 template<class MatrixType>
382  return L_->getNodeNumEntries () + U_->getNodeNumEntries ();
383 }
384 
385 
386 template<class MatrixType>
388 {
389  if (A.getRawPtr () != A_.getRawPtr ()) {
390  // Check in serial or one-process mode if the matrix is square.
392  ! A.is_null () && A->getComm ()->getSize () == 1 &&
393  A->getNodeNumRows () != A->getNodeNumCols (),
394  std::runtime_error, "Ifpack2::ILUT::setMatrix: If A's communicator only "
395  "contains one process, then A must be square. Instead, you provided a "
396  "matrix A with " << A->getNodeNumRows () << " rows and "
397  << A->getNodeNumCols () << " columns.");
398 
399  // It's legal for A to be null; in that case, you may not call
400  // initialize() until calling setMatrix() with a nonnull input.
401  // Regardless, setting the matrix invalidates any previous
402  // factorization.
403  IsInitialized_ = false;
404  IsComputed_ = false;
405  A_local_ = Teuchos::null;
406 
407  // The sparse triangular solvers get a triangular factor as their
408  // input matrix. The triangular factors L_ and U_ are getting
409  // reset, so we reset the solvers' matrices to null. Do that
410  // before setting L_ and U_ to null, so that latter step actually
411  // frees the factors.
412  if (! L_solver_.is_null ()) {
413  L_solver_->setMatrix (Teuchos::null);
414  }
415  if (! U_solver_.is_null ()) {
416  U_solver_->setMatrix (Teuchos::null);
417  }
418 
419  L_ = Teuchos::null;
420  U_ = Teuchos::null;
421  A_ = A;
422  }
423 }
424 
425 
426 template<class MatrixType>
428 {
429  Teuchos::Time timer ("ILUT::initialize");
430  {
431  Teuchos::TimeMonitor timeMon (timer);
432 
433  // Check that the matrix is nonnull.
435  A_.is_null (), std::runtime_error, "Ifpack2::ILUT::initialize: "
436  "The matrix to precondition is null. Please call setMatrix() with a "
437  "nonnull input before calling this method.");
438 
439  // Clear any previous computations.
440  IsInitialized_ = false;
441  IsComputed_ = false;
442  A_local_ = Teuchos::null;
443  L_ = Teuchos::null;
444  U_ = Teuchos::null;
445 
446  A_local_ = makeLocalFilter (A_); // Compute the local filter.
447 
448  IsInitialized_ = true;
449  ++NumInitialize_;
450  }
451  InitializeTime_ += timer.totalElapsedTime ();
452 }
453 
454 
455 template<typename ScalarType>
457 scalar_mag (const ScalarType& s)
458 {
460 }
461 
462 
463 template<class MatrixType>
465 {
466  using Teuchos::Array;
467  using Teuchos::ArrayRCP;
468  using Teuchos::ArrayView;
469  using Teuchos::as;
470  using Teuchos::rcp;
471  using Teuchos::reduceAll;
472 
473  //--------------------------------------------------------------------------
474  // Ifpack2::ILUT is a translation of the Aztec ILUT implementation. The Aztec
475  // ILUT implementation was written by Ray Tuminaro.
476  //
477  // This isn't an exact translation of the Aztec ILUT algorithm, for the
478  // following reasons:
479  // 1. Minor differences result from the fact that Aztec factors a MSR format
480  // matrix in place, while the code below factors an input CrsMatrix which
481  // remains untouched and stores the resulting factors in separate L and U
482  // CrsMatrix objects.
483  // Also, the Aztec code begins by shifting the matrix pointers back
484  // by one, and the pointer contents back by one, and then using 1-based
485  // Fortran-style indexing in the algorithm. This Ifpack2 code uses C-style
486  // 0-based indexing throughout.
487  // 2. Aztec stores the inverse of the diagonal of U. This Ifpack2 code
488  // stores the non-inverted diagonal in U.
489  // The triangular solves (in Ifpack2::ILUT::apply()) are performed by
490  // calling the Tpetra::CrsMatrix::solve method on the L and U objects, and
491  // this requires U to contain the non-inverted diagonal.
492  //
493  // ABW.
494  //--------------------------------------------------------------------------
495 
496  // Don't count initialization in the compute() time.
497  if (! isInitialized ()) {
498  initialize ();
499  }
500 
501  Teuchos::Time timer ("ILUT::compute");
502  { // Timer scope for timing compute()
503  Teuchos::TimeMonitor timeMon (timer, true);
504  const scalar_type zero = STS::zero ();
505  const scalar_type one = STS::one ();
506 
507  const local_ordinal_type myNumRows = A_local_->getNodeNumRows ();
508  L_ = rcp (new crs_matrix_type (A_local_->getRowMap (), A_local_->getColMap (), 0));
509  U_ = rcp (new crs_matrix_type (A_local_->getRowMap (), A_local_->getColMap (), 0));
510 
511  // CGB: note, this caching approach may not be necessary anymore
512  // We will store ArrayView objects that are views of the rows of U, so that
513  // we don't have to repeatedly retrieve the view for each row. These will
514  // be populated row by row as the factorization proceeds.
515  Array<ArrayView<const local_ordinal_type> > Uindices (myNumRows);
516  Array<ArrayView<const scalar_type> > Ucoefs (myNumRows);
517 
518  // If this macro is defined, files containing the L and U factors
519  // will be written. DON'T CHECK IN THE CODE WITH THIS MACRO ENABLED!!!
520  // #define IFPACK2_WRITE_FACTORS
521 #ifdef IFPACK2_WRITE_FACTORS
522  std::ofstream ofsL("L.tif.mtx", std::ios::out);
523  std::ofstream ofsU("U.tif.mtx", std::ios::out);
524 #endif
525 
526  // The code here uses double for fill calculations, even though
527  // the fill level is actually an integer. The point is to avoid
528  // rounding and overflow for integer calculations. If int is <=
529  // 32 bits, it can never overflow double, so this cast is always
530  // OK as long as int has <= 32 bits.
531 
532  // Calculate how much fill will be allowed in addition to the
533  // space that corresponds to the input matrix entries.
534  double local_nnz = static_cast<double> (A_local_->getNodeNumEntries ());
535  double fill;
536  {
537  const double fillLevel = as<double> (getLevelOfFill ());
538  fill = ((fillLevel - 1) * local_nnz) / (2 * myNumRows);
539  }
540 
541  // std::ceil gives the smallest integer larger than the argument.
542  // this may give a slightly different result than Aztec's fill value in
543  // some cases.
544  double fill_ceil=std::ceil(fill);
545 
546  // Similarly to Aztec, we will allow the same amount of fill for each
547  // row, half in L and half in U.
548  size_type fillL = static_cast<size_type>(fill_ceil);
549  size_type fillU = static_cast<size_type>(fill_ceil);
550 
551  Array<scalar_type> InvDiagU (myNumRows, zero);
552 
553  Array<local_ordinal_type> tmp_idx;
554  Array<scalar_type> tmpv;
555 
556  enum { UNUSED, ORIG, FILL };
557  local_ordinal_type max_col = myNumRows;
558 
559  Array<int> pattern(max_col, UNUSED);
560  Array<scalar_type> cur_row(max_col, zero);
561  Array<magnitude_type> unorm(max_col);
562  magnitude_type rownorm;
563  Array<local_ordinal_type> L_cols_heap;
564  Array<local_ordinal_type> U_cols;
565  Array<local_ordinal_type> L_vals_heap;
566  Array<local_ordinal_type> U_vals_heap;
567 
568  // A comparison object which will be used to create 'heaps' of indices
569  // that are ordered according to the corresponding values in the
570  // 'cur_row' array.
571  greater_indirect<scalar_type,local_ordinal_type> vals_comp(cur_row);
572 
573  // =================== //
574  // start factorization //
575  // =================== //
576 
577  ArrayRCP<local_ordinal_type> ColIndicesARCP;
578  ArrayRCP<scalar_type> ColValuesARCP;
579  if (! A_local_->supportsRowViews ()) {
580  const size_t maxnz = A_local_->getNodeMaxNumRowEntries ();
581  ColIndicesARCP.resize (maxnz);
582  ColValuesARCP.resize (maxnz);
583  }
584 
585  for (local_ordinal_type row_i = 0 ; row_i < myNumRows ; ++row_i) {
586  ArrayView<const local_ordinal_type> ColIndicesA;
587  ArrayView<const scalar_type> ColValuesA;
588  size_t RowNnz;
589 
590  if (A_local_->supportsRowViews ()) {
591  A_local_->getLocalRowView (row_i, ColIndicesA, ColValuesA);
592  RowNnz = ColIndicesA.size ();
593  }
594  else {
595  A_local_->getLocalRowCopy (row_i, ColIndicesARCP (), ColValuesARCP (), RowNnz);
596  ColIndicesA = ColIndicesARCP (0, RowNnz);
597  ColValuesA = ColValuesARCP (0, RowNnz);
598  }
599 
600  // Always include the diagonal in the U factor. The value should get
601  // set in the next loop below.
602  U_cols.push_back(row_i);
603  cur_row[row_i] = zero;
604  pattern[row_i] = ORIG;
605 
606  size_type L_cols_heaplen = 0;
607  rownorm = STM::zero ();
608  for (size_t i = 0; i < RowNnz; ++i) {
609  if (ColIndicesA[i] < myNumRows) {
610  if (ColIndicesA[i] < row_i) {
611  add_to_heap(ColIndicesA[i], L_cols_heap, L_cols_heaplen);
612  }
613  else if (ColIndicesA[i] > row_i) {
614  U_cols.push_back(ColIndicesA[i]);
615  }
616 
617  cur_row[ColIndicesA[i]] = ColValuesA[i];
618  pattern[ColIndicesA[i]] = ORIG;
619  rownorm += scalar_mag(ColValuesA[i]);
620  }
621  }
622 
623  // Alter the diagonal according to the absolute-threshold and
624  // relative-threshold values. If not set, those values default
625  // to zero and one respectively.
626  const magnitude_type rthresh = getRelativeThreshold();
627  const scalar_type& v = cur_row[row_i];
628  cur_row[row_i] = as<scalar_type> (getAbsoluteThreshold() * IFPACK2_SGN(v)) + rthresh*v;
629 
630  size_type orig_U_len = U_cols.size();
631  RowNnz = L_cols_heap.size() + orig_U_len;
632  rownorm = getDropTolerance() * rownorm/RowNnz;
633 
634  // The following while loop corresponds to the 'L30' goto's in Aztec.
635  size_type L_vals_heaplen = 0;
636  while (L_cols_heaplen > 0) {
637  local_ordinal_type row_k = L_cols_heap.front();
638 
639  scalar_type multiplier = cur_row[row_k] * InvDiagU[row_k];
640  cur_row[row_k] = multiplier;
641  magnitude_type mag_mult = scalar_mag(multiplier);
642  if (mag_mult*unorm[row_k] < rownorm) {
643  pattern[row_k] = UNUSED;
644  rm_heap_root(L_cols_heap, L_cols_heaplen);
645  continue;
646  }
647  if (pattern[row_k] != ORIG) {
648  if (L_vals_heaplen < fillL) {
649  add_to_heap(row_k, L_vals_heap, L_vals_heaplen, vals_comp);
650  }
651  else if (L_vals_heaplen==0 ||
652  mag_mult < scalar_mag(cur_row[L_vals_heap.front()])) {
653  pattern[row_k] = UNUSED;
654  rm_heap_root(L_cols_heap, L_cols_heaplen);
655  continue;
656  }
657  else {
658  pattern[L_vals_heap.front()] = UNUSED;
659  rm_heap_root(L_vals_heap, L_vals_heaplen, vals_comp);
660  add_to_heap(row_k, L_vals_heap, L_vals_heaplen, vals_comp);
661  }
662  }
663 
664  /* Reduce current row */
665 
666  ArrayView<const local_ordinal_type>& ColIndicesU = Uindices[row_k];
667  ArrayView<const scalar_type>& ColValuesU = Ucoefs[row_k];
668  size_type ColNnzU = ColIndicesU.size();
669 
670  for(size_type j=0; j<ColNnzU; ++j) {
671  if (ColIndicesU[j] > row_k) {
672  scalar_type tmp = multiplier * ColValuesU[j];
673  local_ordinal_type col_j = ColIndicesU[j];
674  if (pattern[col_j] != UNUSED) {
675  cur_row[col_j] -= tmp;
676  }
677  else if (scalar_mag(tmp) > rownorm) {
678  cur_row[col_j] = -tmp;
679  pattern[col_j] = FILL;
680  if (col_j > row_i) {
681  U_cols.push_back(col_j);
682  }
683  else {
684  add_to_heap(col_j, L_cols_heap, L_cols_heaplen);
685  }
686  }
687  }
688  }
689 
690  rm_heap_root(L_cols_heap, L_cols_heaplen);
691  }//end of while(L_cols_heaplen) loop
692 
693 
694  // Put indices and values for L into arrays and then into the L_ matrix.
695 
696  // first, the original entries from the L section of A:
697  for (size_type i = 0; i < ColIndicesA.size (); ++i) {
698  if (ColIndicesA[i] < row_i) {
699  tmp_idx.push_back(ColIndicesA[i]);
700  tmpv.push_back(cur_row[ColIndicesA[i]]);
701  pattern[ColIndicesA[i]] = UNUSED;
702  }
703  }
704 
705  // next, the L entries resulting from fill:
706  for (size_type j = 0; j < L_vals_heaplen; ++j) {
707  tmp_idx.push_back(L_vals_heap[j]);
708  tmpv.push_back(cur_row[L_vals_heap[j]]);
709  pattern[L_vals_heap[j]] = UNUSED;
710  }
711 
712  // L has a one on the diagonal, but we don't explicitly store
713  // it. If we don't store it, then the kernel which performs the
714  // triangular solve can assume a unit diagonal, take a short-cut
715  // and perform faster.
716 
717  L_->insertLocalValues (row_i, tmp_idx (), tmpv ());
718 #ifdef IFPACK2_WRITE_FACTORS
719  for (size_type ii = 0; ii < tmp_idx.size (); ++ii) {
720  ofsL << row_i << " " << tmp_idx[ii] << " " << tmpv[ii] << std::endl;
721  }
722 #endif
723 
724  tmp_idx.clear();
725  tmpv.clear();
726 
727  // Pick out the diagonal element, store its reciprocal.
728  if (cur_row[row_i] == zero) {
729  std::cerr << "Ifpack2::ILUT::Compute: zero pivot encountered! Replacing with rownorm and continuing...(You may need to set the parameter 'fact: absolute threshold'.)" << std::endl;
730  cur_row[row_i] = rownorm;
731  }
732  InvDiagU[row_i] = one / cur_row[row_i];
733 
734  // Non-inverted diagonal is stored for U:
735  tmp_idx.push_back(row_i);
736  tmpv.push_back(cur_row[row_i]);
737  unorm[row_i] = scalar_mag(cur_row[row_i]);
738  pattern[row_i] = UNUSED;
739 
740  // Now put indices and values for U into arrays and then into the U_ matrix.
741  // The first entry in U_cols is the diagonal, which we just handled, so we'll
742  // start our loop at j=1.
743 
744  size_type U_vals_heaplen = 0;
745  for(size_type j=1; j<U_cols.size(); ++j) {
746  local_ordinal_type col = U_cols[j];
747  if (pattern[col] != ORIG) {
748  if (U_vals_heaplen < fillU) {
749  add_to_heap(col, U_vals_heap, U_vals_heaplen, vals_comp);
750  }
751  else if (U_vals_heaplen!=0 && scalar_mag(cur_row[col]) >
752  scalar_mag(cur_row[U_vals_heap.front()])) {
753  rm_heap_root(U_vals_heap, U_vals_heaplen, vals_comp);
754  add_to_heap(col, U_vals_heap, U_vals_heaplen, vals_comp);
755  }
756  }
757  else {
758  tmp_idx.push_back(col);
759  tmpv.push_back(cur_row[col]);
760  unorm[row_i] += scalar_mag(cur_row[col]);
761  }
762  pattern[col] = UNUSED;
763  }
764 
765  for(size_type j=0; j<U_vals_heaplen; ++j) {
766  tmp_idx.push_back(U_vals_heap[j]);
767  tmpv.push_back(cur_row[U_vals_heap[j]]);
768  unorm[row_i] += scalar_mag(cur_row[U_vals_heap[j]]);
769  }
770 
771  unorm[row_i] /= (orig_U_len + U_vals_heaplen);
772 
773  U_->insertLocalValues(row_i, tmp_idx(), tmpv() );
774 #ifdef IFPACK2_WRITE_FACTORS
775  for(int ii=0; ii<tmp_idx.size(); ++ii) {
776  ofsU <<row_i<< " " <<tmp_idx[ii]<< " " <<tmpv[ii]<< std::endl;
777  }
778 #endif
779  tmp_idx.clear();
780  tmpv.clear();
781 
782  U_->getLocalRowView(row_i, Uindices[row_i], Ucoefs[row_i] );
783 
784  L_cols_heap.clear();
785  U_cols.clear();
786  L_vals_heap.clear();
787  U_vals_heap.clear();
788  } // end of for(row_i) loop
789 
790  // FIXME (mfh 03 Apr 2013) Do we need to supply a domain and range Map?
791  L_->fillComplete();
792  U_->fillComplete();
793 
794  L_solver_->setMatrix(L_);
795  L_solver_->initialize ();
796  L_solver_->compute ();
797 
798  U_solver_->setMatrix(U_);
799  U_solver_->initialize ();
800  U_solver_->compute ();
801  }
802  ComputeTime_ += timer.totalElapsedTime ();
803  IsComputed_ = true;
804  ++NumCompute_;
805 }
806 
807 
808 template <class MatrixType>
810 apply (const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
811  Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
812  Teuchos::ETransp mode,
813  scalar_type alpha,
814  scalar_type beta) const
815 {
816  using Teuchos::RCP;
817  using Teuchos::rcp;
818  using Teuchos::rcpFromRef;
820 
822  ! isComputed (), std::runtime_error,
823  "Ifpack2::ILUT::apply: You must call compute() to compute the incomplete "
824  "factorization, before calling apply().");
825 
827  X.getNumVectors() != Y.getNumVectors(), std::runtime_error,
828  "Ifpack2::ILUT::apply: X and Y must have the same number of columns. "
829  "X has " << X.getNumVectors () << " columns, but Y has "
830  << Y.getNumVectors () << " columns.");
831 
832  const scalar_type one = STS::one ();
833  const scalar_type zero = STS::zero ();
834 
835  Teuchos::Time timer ("ILUT::apply");
836  { // Start timing
837  Teuchos::TimeMonitor timeMon (timer, true);
838 
839  if (alpha == one && beta == zero) {
840  if (mode == Teuchos::NO_TRANS) { // Solve L (U Y) = X for Y.
841  // Start by solving L Y = X for Y.
842  L_solver_->apply (X, Y, mode);
843 
844  // Solve U Y = Y.
845  U_solver_->apply (Y, Y, mode);
846  }
847  else { // Solve U^P (L^P Y)) = X for Y (where P is * or T).
848 
849  // Start by solving U^P Y = X for Y.
850  U_solver_->apply (X, Y, mode);
851 
852  // Solve L^P Y = Y.
853  L_solver_->apply (Y, Y, mode);
854  }
855  }
856  else { // alpha != 1 or beta != 0
857  if (alpha == zero) {
858  // The special case for beta == 0 ensures that if Y contains Inf
859  // or NaN values, we replace them with 0 (following BLAS
860  // convention), rather than multiplying them by 0 to get NaN.
861  if (beta == zero) {
862  Y.putScalar (zero);
863  } else {
864  Y.scale (beta);
865  }
866  } else { // alpha != zero
867  MV Y_tmp (Y.getMap (), Y.getNumVectors ());
868  apply (X, Y_tmp, mode);
869  Y.update (alpha, Y_tmp, beta);
870  }
871  }
872  }//end timing
873 
874  ++NumApply_;
875  ApplyTime_ += timer.totalElapsedTime ();
876 }
877 
878 
879 template <class MatrixType>
880 std::string ILUT<MatrixType>::description () const
881 {
882  std::ostringstream os;
883 
884  // Output is a valid YAML dictionary in flow style. If you don't
885  // like everything on a single line, you should call describe()
886  // instead.
887  os << "\"Ifpack2::ILUT\": {";
888  os << "Initialized: " << (isInitialized () ? "true" : "false") << ", "
889  << "Computed: " << (isComputed () ? "true" : "false") << ", ";
890 
891  os << "Level-of-fill: " << getLevelOfFill() << ", "
892  << "absolute threshold: " << getAbsoluteThreshold() << ", "
893  << "relative threshold: " << getRelativeThreshold() << ", "
894  << "relaxation value: " << getRelaxValue() << ", ";
895 
896  if (A_.is_null ()) {
897  os << "Matrix: null";
898  }
899  else {
900  os << "Global matrix dimensions: ["
901  << A_->getGlobalNumRows () << ", " << A_->getGlobalNumCols () << "]"
902  << ", Global nnz: " << A_->getGlobalNumEntries();
903  }
904 
905  os << "}";
906  return os.str ();
907 }
908 
909 
910 template <class MatrixType>
911 void
914  const Teuchos::EVerbosityLevel verbLevel) const
915 {
916  using Teuchos::Comm;
917  using Teuchos::OSTab;
918  using Teuchos::RCP;
920  using std::endl;
921  using Teuchos::VERB_DEFAULT;
922  using Teuchos::VERB_NONE;
923  using Teuchos::VERB_LOW;
924  using Teuchos::VERB_MEDIUM;
925  using Teuchos::VERB_HIGH;
926  using Teuchos::VERB_EXTREME;
927 
928  const Teuchos::EVerbosityLevel vl =
929  (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
930  OSTab tab0 (out);
931 
932  if (vl > VERB_NONE) {
933  out << "\"Ifpack2::ILUT\":" << endl;
934  OSTab tab1 (out);
935  out << "MatrixType: " << TypeNameTraits<MatrixType>::name () << endl;
936  if (this->getObjectLabel () != "") {
937  out << "Label: \"" << this->getObjectLabel () << "\"" << endl;
938  }
939  out << "Initialized: " << (isInitialized () ? "true" : "false")
940  << endl
941  << "Computed: " << (isComputed () ? "true" : "false")
942  << endl
943  << "Level of fill: " << getLevelOfFill () << endl
944  << "Absolute threshold: " << getAbsoluteThreshold () << endl
945  << "Relative threshold: " << getRelativeThreshold () << endl
946  << "Relax value: " << getRelaxValue () << endl;
947 
948  if (isComputed () && vl >= VERB_HIGH) {
949  const double fillFraction =
950  (double) getGlobalNumEntries () / (double) A_->getGlobalNumEntries ();
951  const double nnzToRows =
952  (double) getGlobalNumEntries () / (double) U_->getGlobalNumRows ();
953 
954  out << "Dimensions of L: [" << L_->getGlobalNumRows () << ", "
955  << L_->getGlobalNumRows () << "]" << endl
956  << "Dimensions of U: [" << U_->getGlobalNumRows () << ", "
957  << U_->getGlobalNumRows () << "]" << endl
958  << "Number of nonzeros in factors: " << getGlobalNumEntries () << endl
959  << "Fill fraction of factors over A: " << fillFraction << endl
960  << "Ratio of nonzeros to rows: " << nnzToRows << endl;
961  }
962 
963  out << "Number of initialize calls: " << getNumInitialize () << endl
964  << "Number of compute calls: " << getNumCompute () << endl
965  << "Number of apply calls: " << getNumApply () << endl
966  << "Total time in seconds for initialize: " << getInitializeTime () << endl
967  << "Total time in seconds for compute: " << getComputeTime () << endl
968  << "Total time in seconds for apply: " << getApplyTime () << endl;
969 
970  out << "Local matrix:" << endl;
971  A_local_->describe (out, vl);
972  }
973 }
974 
975 template <class MatrixType>
978 {
979  if (A->getComm ()->getSize () > 1) {
980  return Teuchos::rcp (new LocalFilter<row_matrix_type> (A));
981  } else {
982  return A;
983  }
984 }
985 
986 }//namespace Ifpack2
987 
988 
989 // FIXME (mfh 16 Sep 2014) We should really only use RowMatrix here!
990 // There's no need to instantiate for CrsMatrix too. All Ifpack2
991 // preconditioners can and should do dynamic casts if they need a type
992 // more specific than RowMatrix.
993 
994 #define IFPACK2_ILUT_INSTANT(S,LO,GO,N) \
995  template class Ifpack2::ILUT< Tpetra::RowMatrix<S, LO, GO, N> >;
996 
997 #endif /* IFPACK2_ILUT_DEF_HPP */
int getNumCompute() const
Returns the number of calls to Compute().
Definition: Ifpack2_ILUT_def.hpp:334
ILUT(const Teuchos::RCP< const row_matrix_type > &A)
Constructor.
Definition: Ifpack2_ILUT_def.hpp:116
VERB_DEFAULT
virtual ~ILUT()
Destructor.
Definition: Ifpack2_ILUT_def.hpp:136
basic_OSTab< char > OSTab
global_size_t getGlobalNumEntries() const
Returns the number of nonzero entries in the global graph.
Definition: Ifpack2_ILUT_def.hpp:375
bool hasTransposeApply() const
Whether this object&#39;s apply() method can apply the transpose (or conjugate transpose, if applicable).
Definition: Ifpack2_ILUT_def.hpp:322
VERB_LOW
size_t getNodeNumEntries() const
Returns the number of nonzero entries in the local graph.
Definition: Ifpack2_ILUT_def.hpp:381
VERB_NONE
T & get(const std::string &name, T def_value)
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Teuchos::ScalarTraits< scalar_type >::magnitudeType magnitude_type
The type of the magnitude (absolute value) of a matrix entry.
Definition: Ifpack2_ILUT_decl.hpp:118
void initialize()
Clear any previously computed factors.
Definition: Ifpack2_ILUT_def.hpp:427
VERB_EXTREME
int getNumApply() const
Returns the number of calls to apply().
Definition: Ifpack2_ILUT_def.hpp:340
int getNumInitialize() const
Returns the number of calls to Initialize().
Definition: Ifpack2_ILUT_def.hpp:328
ILUT (incomplete LU factorization with threshold) of a Tpetra sparse matrix.
Definition: Ifpack2_ILUT_decl.hpp:91
bool is_null() const
VERB_MEDIUM
Teuchos::RCP< const map_type > getRangeMap() const
Tpetra::Map representing the range of this operator.
Definition: Ifpack2_ILUT_def.hpp:311
void rm_heap_root(Teuchos::Array< Ordinal > &heap, SizeType &heap_len)
Definition: Ifpack2_Heap.hpp:92
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
VERB_HIGH
double getInitializeTime() const
Returns the time spent in Initialize().
Definition: Ifpack2_ILUT_def.hpp:346
LinearOp zero(const VectorSpace &vs)
"Preconditioner" that solves local sparse triangular systems.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:77
Teuchos::RCP< const map_type > getDomainMap() const
Tpetra::Map representing the domain of this operator.
Definition: Ifpack2_ILUT_def.hpp:299
MatrixType::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition: Ifpack2_ILUT_decl.hpp:106
void compute()
Compute factors L and U using the specified diagonal perturbation thresholds and relaxation parameter...
Definition: Ifpack2_ILUT_def.hpp:464
virtual void setMatrix(const Teuchos::RCP< const row_matrix_type > &A)
Change the matrix to be preconditioned.
Definition: Ifpack2_ILUT_def.hpp:387
double totalElapsedTime(bool readCurrentTime=false) const
static magnitudeType magnitude(T a)
Teuchos::RCP< const row_matrix_type > getMatrix() const
Returns a reference to the matrix to be preconditioned.
Definition: Ifpack2_ILUT_def.hpp:292
std::string description() const
Return a simple one-line description of this object.
Definition: Ifpack2_ILUT_def.hpp:880
void push_back(const value_type &x)
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object with some verbosity level to an FancyOStream object.
Definition: Ifpack2_ILUT_def.hpp:913
double getComputeTime() const
Returns the time spent in Compute().
Definition: Ifpack2_ILUT_def.hpp:352
Tpetra::CrsMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > crs_matrix_type
Type of the Tpetra::CrsMatrix specialization that this class uses for the L and U factors...
Definition: Ifpack2_ILUT_decl.hpp:126
TypeTo as(const TypeFrom &t)
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Returns the input matrix&#39;s communicator.
Definition: Ifpack2_ILUT_def.hpp:281
T * getRawPtr() const
void add_to_heap(const Ordinal &idx, Teuchos::Array< Ordinal > &heap, SizeType &heap_len)
Definition: Ifpack2_Heap.hpp:70
size_t getNodeSmootherComplexity() const
Get a rough estimate of cost per iteration.
Definition: Ifpack2_ILUT_def.hpp:364
Access only local rows and columns of a sparse matrix.
Definition: Ifpack2_LocalFilter_decl.hpp:160
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the ILUT preconditioner to X, resulting in Y.
Definition: Ifpack2_ILUT_def.hpp:810
Preconditioners and smoothers for Tpetra sparse matrices.
Definition: Ifpack2_AdditiveSchwarz_decl.hpp:72
double getApplyTime() const
Returns the time spent in apply().
Definition: Ifpack2_ILUT_def.hpp:358
void setParameters(const Teuchos::ParameterList &params)
Set preconditioner parameters.
Definition: Ifpack2_ILUT_def.hpp:148
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition: Ifpack2_ILUT_decl.hpp:109