Ifpack2 Templated Preconditioning Package  Version 1.0
Ifpack2_LocalSparseTriangularSolver_def.hpp
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 //@HEADER
41 */
42 
43 #ifndef IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP
44 #define IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP
45 
46 #include "Tpetra_CrsMatrix.hpp"
47 #include "Tpetra_Core.hpp"
48 #include "Teuchos_StandardParameterEntryValidators.hpp"
49 #include "Tpetra_Details_determineLocalTriangularStructure.hpp"
50 
51 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
52 # include "shylu_hts.hpp"
53 #endif
54 
55 namespace Ifpack2 {
56 
57 namespace Details {
58 struct TrisolverType {
59  enum Enum {
60  Internal,
61  HTS
62  };
63 
64  static void loadPLTypeOption (Teuchos::Array<std::string>& type_strs, Teuchos::Array<Enum>& type_enums) {
65  type_strs.resize(2);
66  type_strs[0] = "Internal";
67  type_strs[1] = "HTS";
68  type_enums.resize(2);
69  type_enums[0] = Internal;
70  type_enums[1] = HTS;
71  }
72 };
73 }
74 
75 template<class MatrixType>
76 class LocalSparseTriangularSolver<MatrixType>::HtsImpl {
77 public:
78  typedef Tpetra::CrsMatrix<scalar_type, local_ordinal_type, global_ordinal_type, node_type> crs_matrix_type;
79 
80  void reset () {
81 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
82  Timpl_ = Teuchos::null;
83  levelset_block_size_ = 1;
84 #endif
85  }
86 
87  void setParameters (const Teuchos::ParameterList& pl) {
88  (void)pl;
89 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
90  const char* block_size_s = "trisolver: block size";
91  if (pl.isParameter(block_size_s)) {
92  TEUCHOS_TEST_FOR_EXCEPT_MSG( ! pl.isType<int>(block_size_s),
93  "The parameter \"" << block_size_s << "\" must be of type int.");
94  levelset_block_size_ = pl.get<int>(block_size_s);
95  }
96  if (levelset_block_size_ < 1)
97  levelset_block_size_ = 1;
98 #endif
99  }
100 
101  // HTS has the phases symbolic+numeric, numeric, and apply. Hence the first
102  // call to compute() will trigger the symbolic+numeric phase, and subsequent
103  // calls (with the same Timpl_) will trigger the numeric phase. In the call to
104  // initialize(), essentially nothing happens.
105  void initialize (const crs_matrix_type& /* unused */) {
106 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
107  reset();
108  transpose_ = conjugate_ = false;
109 #endif
110  }
111 
112  void compute (const crs_matrix_type& T_in, const Teuchos::RCP<Teuchos::FancyOStream>& out) {
113  (void)T_in;
114  (void)out;
115 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
116  using Teuchos::ArrayRCP;
117 
121  T_in.getAllValues(rowptr, colidx, val);
122 
123  Teuchos::RCP<HtsCrsMatrix> T_hts = Teuchos::rcpWithDealloc(
124  HTST::make_CrsMatrix(rowptr.size() - 1,
125  rowptr.getRawPtr(), colidx.getRawPtr(), val.getRawPtr(),
126  transpose_, conjugate_),
127  HtsCrsMatrixDeleter());
128 
129  if (Teuchos::nonnull(Timpl_)) {
130  // Reuse the nonzero pattern.
131  HTST::reprocess_numeric(Timpl_.get(), T_hts.get());
132  } else {
133  // Build from scratch.
134  if (T_in.getCrsGraph().is_null()) {
135  if (Teuchos::nonnull(out))
136  *out << "HTS compute failed because T_in.getCrsGraph().is_null().\n";
137  return;
138  }
139  if ( ! T_in.getCrsGraph()->isSorted()) {
140  if (Teuchos::nonnull(out))
141  *out << "HTS compute failed because ! T_in.getCrsGraph().isSorted().\n";
142  return;
143  }
144  if ( ! T_in.isStorageOptimized()) {
145  if (Teuchos::nonnull(out))
146  *out << "HTS compute failed because ! T_in.isStorageOptimized().\n";
147  return;
148  }
149 
150  typename HTST::PreprocessArgs args;
151  args.T = T_hts.get();
152  args.max_nrhs = 1;
153 #ifdef _OPENMP
154  args.nthreads = omp_get_max_threads();
155 #else
156  args.nthreads = 1;
157 #endif
158  args.save_for_reprocess = true;
159  typename HTST::Options opts;
160  opts.levelset_block_size = levelset_block_size_;
161  args.options = &opts;
162 
163  try {
164  Timpl_ = Teuchos::rcpWithDealloc(HTST::preprocess(args), TImplDeleter());
165  } catch (const std::exception& e) {
166  if (Teuchos::nonnull(out))
167  *out << "HTS preprocess threw: " << e.what() << "\n";
168  }
169  }
170 #endif
171  }
172 
173  // HTS may not be able to handle a matrix, so query whether compute()
174  // succeeded.
175  bool isComputed () {
176 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
177  return Teuchos::nonnull(Timpl_);
178 #else
179  return false;
180 #endif
181  }
182 
183  // Y := beta * Y + alpha * (M * X)
184  void localApply (const MV& X, MV& Y,
185  const Teuchos::ETransp /* mode */,
186  const scalar_type& alpha, const scalar_type& beta) const {
187  (void)X;
188  (void)Y;
189  (void)alpha;
190  (void)beta;
191 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
192  const auto& X_view = X.template getLocalView<Kokkos::HostSpace>();
193  const auto& Y_view = Y.template getLocalView<Kokkos::HostSpace>();
194  // Only does something if #rhs > current capacity.
195  HTST::reset_max_nrhs(Timpl_.get(), X_view.extent(1));
196  // Switch alpha and beta because of HTS's opposite convention.
197  HTST::solve_omp(Timpl_.get(),
198  // For std/Kokkos::complex.
199  reinterpret_cast<const scalar_type*>(X_view.data()),
200  X_view.extent(1),
201  // For std/Kokkos::complex.
202  reinterpret_cast<scalar_type*>(Y_view.data()),
203  beta, alpha);
204 #endif
205  }
206 
207 private:
208 #ifdef HAVE_IFPACK2_SHYLU_NODEHTS
209  typedef ::Experimental::HTS<local_ordinal_type, size_t, scalar_type> HTST;
210  typedef typename HTST::Impl TImpl;
211  typedef typename HTST::CrsMatrix HtsCrsMatrix;
212 
213  struct TImplDeleter {
214  void free (TImpl* impl) {
215  HTST::delete_Impl(impl);
216  }
217  };
218 
219  struct HtsCrsMatrixDeleter {
220  void free (HtsCrsMatrix* T) {
221  HTST::delete_CrsMatrix(T);
222  }
223  };
224 
225  Teuchos::RCP<TImpl> Timpl_;
226  bool transpose_, conjugate_;
227  int levelset_block_size_;
228 #endif
229 };
230 
231 template<class MatrixType>
234  A_ (A)
235 {
236  initializeState();
237  typedef typename Tpetra::CrsMatrix<scalar_type, local_ordinal_type,
239  if (! A.is_null ()) {
241  Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A);
243  (A_crs.is_null (), std::invalid_argument,
244  "Ifpack2::LocalSparseTriangularSolver constructor: "
245  "The input matrix A is not a Tpetra::CrsMatrix.");
246  A_crs_ = A_crs;
247  }
248 }
249 
250 template<class MatrixType>
254  A_ (A),
255  out_ (out)
256 {
257  initializeState();
258  if (! out_.is_null ()) {
259  *out_ << ">>> DEBUG Ifpack2::LocalSparseTriangularSolver constructor"
260  << std::endl;
261  }
262  typedef typename Tpetra::CrsMatrix<scalar_type, local_ordinal_type,
264  if (! A.is_null ()) {
266  Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A);
268  (A_crs.is_null (), std::invalid_argument,
269  "Ifpack2::LocalSparseTriangularSolver constructor: "
270  "The input matrix A is not a Tpetra::CrsMatrix.");
271  A_crs_ = A_crs;
272  }
273 }
274 
275 template<class MatrixType>
278 {
279  initializeState();
280 }
281 
282 template<class MatrixType>
285  out_ (out)
286 {
287  initializeState();
288  if (! out_.is_null ()) {
289  *out_ << ">>> DEBUG Ifpack2::LocalSparseTriangularSolver constructor"
290  << std::endl;
291  }
292 }
293 
294 template<class MatrixType>
296 {
297  isInitialized_ = false;
298  isComputed_ = false;
299  reverseStorage_ = false;
300  isInternallyChanged_ = false;
301  numInitialize_ = 0;
302  numCompute_ = 0;
303  numApply_ = 0;
304  initializeTime_ = 0.0;
305  computeTime_ = 0.0;
306  applyTime_ = 0.0;
307  uplo_ = "N";
308  diag_ = "N";
309 }
310 
311 template<class MatrixType>
314 {}
315 
316 template<class MatrixType>
317 void
320 {
321  using Teuchos::RCP;
323  using Teuchos::Array;
324 
325  Details::TrisolverType::Enum trisolverType = Details::TrisolverType::Internal;
326  do {
327  static const char typeName[] = "trisolver: type";
328 
329  if ( ! pl.isType<std::string>(typeName)) break;
330 
331  // Map std::string <-> TrisolverType::Enum.
332  Array<std::string> trisolverTypeStrs;
333  Array<Details::TrisolverType::Enum> trisolverTypeEnums;
334  Details::TrisolverType::loadPLTypeOption (trisolverTypeStrs, trisolverTypeEnums);
336  s2i(trisolverTypeStrs (), trisolverTypeEnums (), typeName, false);
337 
338  trisolverType = s2i.getIntegralValue(pl.get<std::string>(typeName));
339  } while (0);
340 
341  if (trisolverType == Details::TrisolverType::HTS) {
342  htsImpl_ = Teuchos::rcp (new HtsImpl ());
343  htsImpl_->setParameters (pl);
344  }
345 
346  if (pl.isParameter("trisolver: reverse U"))
347  reverseStorage_ = pl.get<bool>("trisolver: reverse U");
348 
350  (reverseStorage_ && trisolverType == Details::TrisolverType::HTS,
351  std::logic_error, "Ifpack2::LocalSparseTriangularSolver::setParameters: "
352  "You are not allowed to enable both HTS and the \"trisolver: reverse U\" "
353  "options. See GitHub issue #2647.");
354 }
355 
356 template<class MatrixType>
357 void
360 {
361  using crs_matrix_type = Tpetra::CrsMatrix<scalar_type, local_ordinal_type,
363  using local_matrix_type = typename crs_matrix_type::local_matrix_type;
364 
365  const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::initialize: ";
366  if (! out_.is_null ()) {
367  *out_ << ">>> DEBUG " << prefix << std::endl;
368  }
369 
371  (A_.is_null (), std::runtime_error, prefix << "You must call "
372  "setMatrix() with a nonnull input matrix before you may call "
373  "initialize() or compute().");
374  if (A_crs_.is_null ()) {
375  auto A_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A_);
377  (A_crs.get () == nullptr, std::invalid_argument,
378  prefix << "The input matrix A is not a Tpetra::CrsMatrix.");
379  A_crs_ = A_crs;
380  }
381  auto G = A_crs_->getGraph ();
383  (G.is_null (), std::logic_error, prefix << "A_ and A_crs_ are nonnull, "
384  "but A_crs_'s RowGraph G is null. "
385  "Please report this bug to the Ifpack2 developers.");
386  // At this point, the graph MUST be fillComplete. The "initialize"
387  // (symbolic) part of setup only depends on the graph structure, so
388  // the matrix itself need not be fill complete.
390  (! G->isFillComplete (), std::runtime_error, "If you call this method, "
391  "the matrix's graph must be fill complete. It is not.");
392 
393  // FIXME (mfh 01,02 Jun 2018) isUpperTriangular has been DEPRECATED.
394  // See GitHub Issue #2630. I'm using isUpperTriangularImpl ONLY to
395  // avoid deprecated warnings. Users may NOT call this method.
396  //
397  // FIXME (mfh 02 Jun 2018) Move the
398  // determineLocalTriangularStructure call above this test, so we can
399  // use that result, rather than the deprecated method.
400  if (reverseStorage_ && A_crs_->isUpperTriangularImpl() && htsImpl_.is_null()) {
401  // Reverse the storage for an upper triangular matrix
402  auto Alocal = A_crs_->getLocalMatrix();
403  auto ptr = Alocal.graph.row_map;
404  auto ind = Alocal.graph.entries;
405  auto val = Alocal.values;
406 
407  auto numRows = Alocal.numRows();
408  auto numCols = Alocal.numCols();
409  auto numNnz = Alocal.nnz();
410 
411  typename decltype(ptr)::non_const_type newptr ("ptr", ptr.extent (0));
412  typename decltype(ind)::non_const_type newind ("ind", ind.extent (0));
413  decltype(val) newval ("val", val.extent (0));
414 
415  // FIXME: The code below assumes UVM
416  crs_matrix_type::execution_space::fence();
417  newptr(0) = 0;
418  for (local_ordinal_type row = 0, rowStart = 0; row < numRows; ++row) {
419  auto A_r = Alocal.row(numRows-1 - row);
420 
421  auto numEnt = A_r.length;
422  for (local_ordinal_type k = 0; k < numEnt; ++k) {
423  newind(rowStart + k) = numCols-1 - A_r.colidx(numEnt-1 - k);
424  newval(rowStart + k) = A_r.value (numEnt-1 - k);
425  }
426  rowStart += numEnt;
427  newptr(row+1) = rowStart;
428  }
429  crs_matrix_type::execution_space::fence();
430 
431  // Reverse maps
432  using map_type = typename crs_matrix_type::map_type;
433  Teuchos::RCP<map_type> newRowMap, newColMap;
434  {
435  // Reverse row map
436  auto rowMap = A_->getRowMap();
437  auto numElems = rowMap->getNodeNumElements();
438  auto rowElems = rowMap->getNodeElementList();
439 
440  Teuchos::Array<global_ordinal_type> newRowElems(rowElems.size());
441  for (size_t i = 0; i < numElems; i++)
442  newRowElems[i] = rowElems[numElems-1 - i];
443 
444  newRowMap = Teuchos::rcp(new map_type(rowMap->getGlobalNumElements(), newRowElems, rowMap->getIndexBase(), rowMap->getComm()));
445  }
446  {
447  // Reverse column map
448  auto colMap = A_->getColMap();
449  auto numElems = colMap->getNodeNumElements();
450  auto colElems = colMap->getNodeElementList();
451 
452  Teuchos::Array<global_ordinal_type> newColElems(colElems.size());
453  for (size_t i = 0; i < numElems; i++)
454  newColElems[i] = colElems[numElems-1 - i];
455 
456  newColMap = Teuchos::rcp(new map_type(colMap->getGlobalNumElements(), newColElems, colMap->getIndexBase(), colMap->getComm()));
457  }
458 
459  // Construct new matrix
460  local_matrix_type newLocalMatrix("Upermuted", numRows, numCols, numNnz, newval, newptr, newind);
461 
462  A_crs_ = Teuchos::rcp(new crs_matrix_type(newLocalMatrix, newRowMap, newColMap, A_crs_->getDomainMap(), A_crs_->getRangeMap()));
463 
464  isInternallyChanged_ = true;
465  }
466 
467  if (Teuchos::nonnull (htsImpl_))
468  {
469  htsImpl_->initialize (*A_crs_);
470  isInternallyChanged_ = true;
471  }
472 
473  auto lclMatrix = A_crs_->getLocalMatrix ();
474  auto lclRowMap = A_crs_->getRowMap ()->getLocalMap ();
475  auto lclColMap = A_crs_->getColMap ()->getLocalMap ();
476  using Tpetra::Details::determineLocalTriangularStructure;
477  // mfh 30 Apr 2018: See GitHub Issue #2658 for why this is false.
478  constexpr bool ignoreMapsForTriangularStructure = true;
479  auto result =
480  determineLocalTriangularStructure (lclMatrix.graph, lclRowMap, lclColMap,
481  ignoreMapsForTriangularStructure);
482  using LO = local_ordinal_type;
483  const LO lclNumRows = lclRowMap.getNodeNumElements ();
484  const LO lclNumCols = lclColMap.getNodeNumElements ();
485  // NOTE (mfh 30 Apr 2018) Original test for implicit unit diagonal was
486  //
487  // (A_crs_->getNodeNumDiags () < A_crs_->getNodeNumRows ()) ? "U" : "N";
488  //
489  // I don't agree with this test -- it's not an implicitly stored
490  // unit diagonal if there are SOME entries -- but I'm leaving it for
491  // backwards compatibility.
492  this->diag_ = (result.diagCount < lclNumRows) ? "U" : "N";
493  this->uplo_ = result.couldBeLowerTriangular ? "L" :
494  (result.couldBeUpperTriangular ? "U" : "N");
495 
496  isInitialized_ = true;
497  ++numInitialize_;
498 }
499 
500 template<class MatrixType>
501 void
504 {
505  const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::compute: ";
506  if (! out_.is_null ()) {
507  *out_ << ">>> DEBUG " << prefix << std::endl;
508  }
509 
511  (A_.is_null (), std::runtime_error, prefix << "You must call "
512  "setMatrix() with a nonnull input matrix before you may call "
513  "initialize() or compute().");
515  (A_crs_.is_null (), std::logic_error, prefix << "A_ is nonnull, but "
516  "A_crs_ is null. Please report this bug to the Ifpack2 developers.");
517  // At this point, the matrix MUST be fillComplete.
519  (! A_crs_->isFillComplete (), std::runtime_error, "If you call this "
520  "method, the matrix must be fill complete. It is not.");
521 
522  if (! isInitialized_) {
523  initialize ();
524  }
526  (! isInitialized_, std::logic_error, prefix << "initialize() should have "
527  "been called by this point, but isInitialized_ is false. "
528  "Please report this bug to the Ifpack2 developers.");
529 
530  if (Teuchos::nonnull (htsImpl_))
531  htsImpl_->compute (*A_crs_, out_);
532 
533  isComputed_ = true;
534  ++numCompute_;
535 }
536 
537 template<class MatrixType>
539 apply (const Tpetra::MultiVector<scalar_type, local_ordinal_type,
541  Tpetra::MultiVector<scalar_type, local_ordinal_type,
543  Teuchos::ETransp mode,
544  scalar_type alpha,
545  scalar_type beta) const
546 {
547  using Teuchos::RCP;
548  using Teuchos::rcp;
549  using Teuchos::rcpFromRef;
550  typedef scalar_type ST;
551  typedef Teuchos::ScalarTraits<ST> STS;
552  const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::apply: ";
553  if (! out_.is_null ()) {
554  *out_ << ">>> DEBUG " << prefix;
555  if (A_crs_.is_null ()) {
556  *out_ << "A_crs_ is null!" << std::endl;
557  }
558  else {
560  Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A_);
561  const std::string uplo = this->uplo_;
562  const std::string trans = (mode == Teuchos::CONJ_TRANS) ? "C" :
563  (mode == Teuchos::TRANS ? "T" : "N");
564  const std::string diag = this->diag_;
565  *out_ << "uplo=\"" << uplo
566  << "\", trans=\"" << trans
567  << "\", diag=\"" << diag << "\"" << std::endl;
568  }
569  }
570 
572  (! isComputed (), std::runtime_error, prefix << "If compute() has not yet "
573  "been called, or if you have changed the matrix via setMatrix(), you must "
574  "call compute() before you may call this method.");
575  // If isComputed() is true, it's impossible for the matrix to be
576  // null, or for it not to be a Tpetra::CrsMatrix.
578  (A_.is_null (), std::logic_error, prefix << "A_ is null. "
579  "Please report this bug to the Ifpack2 developers.");
581  (A_crs_.is_null (), std::logic_error, prefix << "A_crs_ is null. "
582  "Please report this bug to the Ifpack2 developers.");
583  // However, it _is_ possible that the user called resumeFill() on
584  // the matrix, after calling compute(). This is NOT allowed.
586  (! A_crs_->isFillComplete (), std::runtime_error, "If you call this "
587  "method, the matrix must be fill complete. It is not. This means that "
588  " you must have called resumeFill() on the matrix before calling apply(). "
589  "This is NOT allowed. Note that this class may use the matrix's data in "
590  "place without copying it. Thus, you cannot change the matrix and expect "
591  "the solver to stay the same. If you have changed the matrix, first call "
592  "fillComplete() on it, then call compute() on this object, before you call"
593  " apply(). You do NOT need to call setMatrix, as long as the matrix "
594  "itself (that is, its address in memory) is the same.");
595 
596  auto G = A_crs_->getGraph ();
598  (G.is_null (), std::logic_error, prefix << "A_ and A_crs_ are nonnull, "
599  "but A_crs_'s RowGraph G is null. "
600  "Please report this bug to the Ifpack2 developers.");
601  auto importer = G->getImporter ();
602  auto exporter = G->getExporter ();
603 
604  if (! importer.is_null ()) {
605  if (X_colMap_.is_null () || X_colMap_->getNumVectors () != X.getNumVectors ()) {
606  X_colMap_ = rcp (new MV (importer->getTargetMap (), X.getNumVectors ()));
607  }
608  else {
609  X_colMap_->putScalar (STS::zero ());
610  }
611  // See discussion of Github Issue #672 for why the Import needs to
612  // use the ZERO CombineMode. The case where the Export is
613  // nontrivial is likely never exercised.
614  X_colMap_->doImport (X, *importer, Tpetra::ZERO);
615  }
616  RCP<const MV> X_cur = importer.is_null () ? rcpFromRef (X) :
617  Teuchos::rcp_const_cast<const MV> (X_colMap_);
618 
619  if (! exporter.is_null ()) {
620  if (Y_rowMap_.is_null () || Y_rowMap_->getNumVectors () != Y.getNumVectors ()) {
621  Y_rowMap_ = rcp (new MV (exporter->getSourceMap (), Y.getNumVectors ()));
622  }
623  else {
624  Y_rowMap_->putScalar (STS::zero ());
625  }
626  Y_rowMap_->doExport (Y, *importer, Tpetra::ADD);
627  }
628  RCP<MV> Y_cur = exporter.is_null () ? rcpFromRef (Y) : Y_rowMap_;
629 
630  localApply (*X_cur, *Y_cur, mode, alpha, beta);
631 
632  if (! exporter.is_null ()) {
633  Y.putScalar (STS::zero ());
634  Y.doExport (*Y_cur, *exporter, Tpetra::ADD);
635  }
636 
637  ++numApply_;
638 }
639 
640 template<class MatrixType>
641 void
643 localTriangularSolve (const MV& Y,
644  MV& X,
645  const Teuchos::ETransp mode) const
646 {
647  using Teuchos::CONJ_TRANS;
648  using Teuchos::NO_TRANS;
649  using Teuchos::TRANS;
650  typedef Kokkos::HostSpace host_memory_space;
651  using device_type = typename MV::device_type;
652  using dev_memory_space = typename device_type::memory_space;
653  const char tfecfFuncName[] = "localTriangularSolve: ";
654 
656  (! A_crs_->isFillComplete (), std::runtime_error,
657  "The matrix is not fill complete.");
659  (! X.isConstantStride () || ! Y.isConstantStride (), std::invalid_argument,
660  "X and Y must be constant stride.");
662  ( A_crs_->getNodeNumRows() > 0 && this->uplo_ == "N", std::runtime_error,
663  "The matrix is neither upper triangular or lower triangular. "
664  "You may only call this method if the matrix is triangular. "
665  "Remember that this is a local (per MPI process) property, and that "
666  "Tpetra only knows how to do a local (per process) triangular solve.");
669  (STS::isComplex && mode == TRANS, std::logic_error, "This method does "
670  "not currently support non-conjugated transposed solve (mode == "
671  "Teuchos::TRANS) for complex scalar types.");
672 
673  // FIXME (mfh 19 May 2016) This makes some Ifpack2 tests fail.
674  //
675  // TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
676  // (Y.template need_sync<device_type> () && !
677  // Y.template need_sync<Kokkos::HostSpace> (), std::runtime_error,
678  // "Y must be sync'd to device memory before you may call this method.");
679 
680  const std::string uplo = this->uplo_;
681  const std::string trans = (mode == Teuchos::CONJ_TRANS) ? "C" :
682  (mode == Teuchos::TRANS ? "T" : "N");
683  const std::string diag = this->diag_;
684  auto A_lcl = this->A_crs_->getLocalMatrix ();
685 
686  // NOTE (mfh 20 Aug 2017): KokkosSparse::trsv currently is a
687  // sequential, host-only code. See
688  // https://github.com/kokkos/kokkos-kernels/issues/48. This
689  // means that we need to sync to host, then sync back to device
690  // when done.
691  X.template sync<host_memory_space> ();
692  const_cast<MV&> (Y).template sync<host_memory_space> ();
693  X.template modify<host_memory_space> (); // we will write to X
694 
695  if (X.isConstantStride () && Y.isConstantStride ()) {
696  auto X_lcl = X.template getLocalView<host_memory_space> ();
697  auto Y_lcl = Y.template getLocalView<host_memory_space> ();
698  KokkosSparse::trsv (uplo.c_str (), trans.c_str (), diag.c_str (),
699  A_lcl, Y_lcl, X_lcl);
700  }
701  else {
702  const size_t numVecs =
703  std::min (X.getNumVectors (), Y.getNumVectors ());
704  for (size_t j = 0; j < numVecs; ++j) {
705  auto X_j = X.getVector (j);
706  auto Y_j = X.getVector (j);
707  auto X_lcl = X_j->template getLocalView<host_memory_space> ();
708  auto Y_lcl = Y_j->template getLocalView<host_memory_space> ();
709  KokkosSparse::trsv (uplo.c_str (), trans.c_str (),
710  diag.c_str (), A_lcl, Y_lcl, X_lcl);
711  }
712  }
713 
714  X.template sync<dev_memory_space> ();
715  const_cast<MV&> (Y).template sync<dev_memory_space> ();
716 }
717 
718 template<class MatrixType>
719 void
720 LocalSparseTriangularSolver<MatrixType>::
721 localApply (const MV& X,
722  MV& Y,
723  const Teuchos::ETransp mode,
724  const scalar_type& alpha,
725  const scalar_type& beta) const
726 {
727  if (mode == Teuchos::NO_TRANS && Teuchos::nonnull (htsImpl_) &&
728  htsImpl_->isComputed ()) {
729  htsImpl_->localApply (X, Y, mode, alpha, beta);
730  return;
731  }
732 
733  using Teuchos::RCP;
734  typedef scalar_type ST;
735  typedef Teuchos::ScalarTraits<ST> STS;
736 
737  if (beta == STS::zero ()) {
738  if (alpha == STS::zero ()) {
739  Y.putScalar (STS::zero ()); // Y := 0 * Y (ignore contents of Y)
740  }
741  else { // alpha != 0
742  this->localTriangularSolve (X, Y, mode);
743  if (alpha != STS::one ()) {
744  Y.scale (alpha);
745  }
746  }
747  }
748  else { // beta != 0
749  if (alpha == STS::zero ()) {
750  Y.scale (beta); // Y := beta * Y
751  }
752  else { // alpha != 0
753  MV Y_tmp (Y, Teuchos::Copy);
754  this->localTriangularSolve (X, Y_tmp, mode); // Y_tmp := M * X
755  Y.update (alpha, Y_tmp, beta); // Y := beta * Y + alpha * Y_tmp
756  }
757  }
758 }
759 
760 
761 template <class MatrixType>
762 int
765  return numInitialize_;
766 }
767 
768 template <class MatrixType>
769 int
771 getNumCompute () const {
772  return numCompute_;
773 }
774 
775 template <class MatrixType>
776 int
778 getNumApply () const {
779  return numApply_;
780 }
781 
782 template <class MatrixType>
783 double
786  return initializeTime_;
787 }
788 
789 template<class MatrixType>
790 double
792 getComputeTime () const {
793  return computeTime_;
794 }
795 
796 template<class MatrixType>
797 double
799 getApplyTime() const {
800  return applyTime_;
801 }
802 
803 template <class MatrixType>
804 std::string
806 description () const
807 {
808  std::ostringstream os;
809 
810  // Output is a valid YAML dictionary in flow style. If you don't
811  // like everything on a single line, you should call describe()
812  // instead.
813  os << "\"Ifpack2::LocalSparseTriangularSolver\": {";
814  if (this->getObjectLabel () != "") {
815  os << "Label: \"" << this->getObjectLabel () << "\", ";
816  }
817  os << "Initialized: " << (isInitialized () ? "true" : "false") << ", "
818  << "Computed: " << (isComputed () ? "true" : "false") << ", ";
819 
820  if (A_.is_null ()) {
821  os << "Matrix: null";
822  }
823  else {
824  os << "Matrix: not null"
825  << ", Global matrix dimensions: ["
826  << A_->getGlobalNumRows () << ", "
827  << A_->getGlobalNumCols () << "]";
828  }
829 
830  if (Teuchos::nonnull (htsImpl_))
831  os << ", HTS computed: " << (htsImpl_->isComputed () ? "true" : "false");
832 
833  os << "}";
834  return os.str ();
835 }
836 
837 template <class MatrixType>
840  const Teuchos::EVerbosityLevel verbLevel) const
841 {
842  using std::endl;
843  // Default verbosity level is VERB_LOW, which prints only on Process
844  // 0 of the matrix's communicator.
845  const Teuchos::EVerbosityLevel vl
846  = (verbLevel == Teuchos::VERB_DEFAULT) ? Teuchos::VERB_LOW : verbLevel;
847 
848  if (vl != Teuchos::VERB_NONE) {
849  // Print only on Process 0 in the matrix's communicator. If the
850  // matrix is null, though, we have to get the communicator from
851  // somewhere, so we ask Tpetra for its default communicator. If
852  // MPI is enabled, this wraps MPI_COMM_WORLD or a clone thereof.
853  auto comm = A_.is_null () ?
854  Tpetra::getDefaultComm () :
855  A_->getComm ();
856 
857  // Users aren't supposed to do anything with the matrix on
858  // processes where its communicator is null.
859  if (! comm.is_null () && comm->getRank () == 0) {
860  // By convention, describe() should always begin with a tab.
861  Teuchos::OSTab tab0 (out);
862  // Output is in YAML format. We have to escape the class name,
863  // because it has a colon.
864  out << "\"Ifpack2::LocalSparseTriangularSolver\":" << endl;
865  Teuchos::OSTab tab1 (out);
866  out << "Scalar: " << Teuchos::TypeNameTraits<scalar_type>::name () << endl
867  << "LocalOrdinal: " << Teuchos::TypeNameTraits<local_ordinal_type>::name () << endl
868  << "GlobalOrdinal: " << Teuchos::TypeNameTraits<global_ordinal_type>::name () << endl
869  << "Node: " << Teuchos::TypeNameTraits<node_type>::name () << endl;
870  }
871  }
872 }
873 
874 template <class MatrixType>
877 getDomainMap () const
878 {
880  (A_.is_null (), std::runtime_error,
881  "Ifpack2::LocalSparseTriangularSolver::getDomainMap: "
882  "The matrix is null. Please call setMatrix() with a nonnull input "
883  "before calling this method.");
884  return A_->getDomainMap ();
885 }
886 
887 template <class MatrixType>
890 getRangeMap () const
891 {
893  (A_.is_null (), std::runtime_error,
894  "Ifpack2::LocalSparseTriangularSolver::getRangeMap: "
895  "The matrix is null. Please call setMatrix() with a nonnull input "
896  "before calling this method.");
897  return A_->getRangeMap ();
898 }
899 
900 template<class MatrixType>
903 {
904  const char prefix[] = "Ifpack2::LocalSparseTriangularSolver::setMatrix: ";
905 
906  // If the pointer didn't change, do nothing. This is reasonable
907  // because users are supposed to call this method with the same
908  // object over all participating processes, and pointer identity
909  // implies object identity.
910  if (A.getRawPtr () != A_.getRawPtr () || isInternallyChanged_) {
911  // Check in serial or one-process mode if the matrix is square.
913  (! A.is_null () && A->getComm ()->getSize () == 1 &&
914  A->getNodeNumRows () != A->getNodeNumCols (),
915  std::runtime_error, prefix << "If A's communicator only contains one "
916  "process, then A must be square. Instead, you provided a matrix A with "
917  << A->getNodeNumRows () << " rows and " << A->getNodeNumCols ()
918  << " columns.");
919 
920  // It's legal for A to be null; in that case, you may not call
921  // initialize() until calling setMatrix() with a nonnull input.
922  // Regardless, setting the matrix invalidates the preconditioner.
923  isInitialized_ = false;
924  isComputed_ = false;
925 
926  typedef typename Tpetra::CrsMatrix<scalar_type, local_ordinal_type,
928  if (A.is_null ()) {
929  A_crs_ = Teuchos::null;
930  A_ = Teuchos::null;
931  }
932  else { // A is not null
934  Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A);
936  (A_crs.is_null (), std::invalid_argument, prefix <<
937  "The input matrix A is not a Tpetra::CrsMatrix.");
938  A_crs_ = A_crs;
939  A_ = A;
940  }
941 
942  if (Teuchos::nonnull (htsImpl_))
943  htsImpl_->reset ();
944  } // pointers are not the same
945 }
946 
947 } // namespace Ifpack2
948 
949 #define IFPACK2_LOCALSPARSETRIANGULARSOLVER_INSTANT(S,LO,GO,N) \
950  template class Ifpack2::LocalSparseTriangularSolver< Tpetra::RowMatrix<S, LO, GO, N> >;
951 
952 #endif // IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the preconditioner to X, and put the result in Y.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:539
int getNumInitialize() const
Return the number of calls to initialize().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:764
VERB_DEFAULT
Teuchos::RCP< const map_type > getDomainMap() const
The domain of this operator.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:877
size_type size() const
VERB_LOW
VERB_NONE
Teuchos::RCP< const map_type > getRangeMap() const
The range of this operator.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:890
int getNumCompute() const
Return the number of calls to compute().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:771
double getApplyTime() const
Return the time spent in apply().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:799
T & get(const std::string &name, T def_value)
#define TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(throw_exception_test, Exception, msg)
void compute()
"Numeric" phase of setup
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:503
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print this object with given verbosity to the given output stream.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:839
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
MatrixType::global_ordinal_type global_ordinal_type
Type of the global indices of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:93
T * get() const
bool is_null() const
Ifpack2 implementation details.
MatrixType::node_type node_type
Node type of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:95
bool isComputed() const
Return true if compute() has been called.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:209
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
#define TEUCHOS_TEST_FOR_EXCEPT_MSG(throw_exception_test, msg)
double getComputeTime() const
Return the time spent in compute().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:792
int getNumApply() const
Return the number of calls to apply().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:778
"Preconditioner" that solves local sparse triangular systems.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:77
void initialize()
"Symbolic" phase of setup
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:359
bool isType(const std::string &name) const
Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > map_type
Specialization of Tpetra::Map used by this class.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:100
virtual void setMatrix(const Teuchos::RCP< const row_matrix_type > &A)
Set this preconditioner&#39;s matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:902
void resize(size_type new_size, const value_type &x=value_type())
double getInitializeTime() const
Return the time spent in initialize().
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:785
IntegralType getIntegralValue(const std::string &str, const std::string &paramName="", const std::string &sublistName="") const
bool nonnull(const boost::shared_ptr< T > &p)
MatrixType::local_ordinal_type local_ordinal_type
Type of the local indices of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:91
T * getRawPtr() const
bool isParameter(const std::string &name) const
Tpetra::CrsMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > crs_matrix_type
Specialization of Tpetra::CrsMatrix used by this class.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:106
T * getRawPtr() const
void setParameters(const Teuchos::ParameterList &params)
Set this object&#39;s parameters.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:319
MatrixType::scalar_type scalar_type
Type of the entries of the input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:89
std::string description() const
A one-line description of this object.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:806
Preconditioners and smoothers for Tpetra sparse matrices.
Definition: Ifpack2_AdditiveSchwarz_decl.hpp:72
virtual ~LocalSparseTriangularSolver()
Destructor (virtual for memory safety).
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:313
LocalSparseTriangularSolver()
Constructor that takes no input matrix.
Definition: Ifpack2_LocalSparseTriangularSolver_def.hpp:277
static std::string name()
std::string typeName(const T &t)