Skip to content

Commit

Permalink
Implement hierarhial unpacking
Browse files Browse the repository at this point in the history
  • Loading branch information
tjfulle committed May 27, 2020
1 parent 1455ce4 commit 3e69851
Show file tree
Hide file tree
Showing 7 changed files with 794 additions and 145 deletions.
24 changes: 9 additions & 15 deletions packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4747,9 +4747,7 @@ namespace Tpetra {
// Read parameters from the input ParameterList.
//
{
Details::ProfilingRegion region(
"Tpetra::CrsMatrix::fillCompete",
"ParameterList");
Details::ProfilingRegion region_fc("Tpetra::CrsMatrix::fillCompete", "ParameterList");

// If true, the caller promises that no process did nonlocal
// changes since the last call to fillComplete.
Expand Down Expand Up @@ -4798,9 +4796,7 @@ namespace Tpetra {
}
}
if (this->isStaticGraph ()) {
Details::ProfilingRegion region(
"Tpetra::CrsMatrix::fillCompete",
"isStaticGraph");
Details::ProfilingRegion region_isg("Tpetra::CrsMatrix::fillCompete", "isStaticGraph");
// FIXME (mfh 14 Nov 2016) In order to fix #843, I enable the
// checks below only in debug mode. It would be nicer to do a
// local check, then propagate the error state in a deferred
Expand Down Expand Up @@ -4850,9 +4846,7 @@ namespace Tpetra {
this->fillLocalMatrix (params);
}
else {
Details::ProfilingRegion region(
"Tpetra::CrsMatrix::fillCompete",
"isNotStaticGraph");
Details::ProfilingRegion region_insg("Tpetra::CrsMatrix::fillCompete", "isNotStaticGraph");
// Set the graph's domain and range Maps. This will clear the
// Import if the domain Map has changed (is a different
// pointer), and the Export if the range Map has changed (is a
Expand Down Expand Up @@ -4906,9 +4900,9 @@ namespace Tpetra {
}

{
Details::ProfilingRegion region(
"Tpetra::CrsMatrix::fillCompete",
"callComputeGlobalConstamnts");
Details::ProfilingRegion region_ccgc(
"Tpetra::CrsMatrix::fillCompete", "callComputeGlobalConstamnts"
);
const bool callComputeGlobalConstants = params.get () == nullptr ||
params->get ("compute global constants", true);
if (callComputeGlobalConstants) {
Expand All @@ -4920,9 +4914,9 @@ namespace Tpetra {

this->fillComplete_ = true; // Now we're fill complete!
{
Details::ProfilingRegion region(
"Tpetra::CrsMatrix::fillCompete",
"checkInternalState");
Details::ProfilingRegion region_cis(
"Tpetra::CrsMatrix::fillCompete", "checkInternalState"
);
this->checkInternalState ();
}
}
Expand Down
42 changes: 36 additions & 6 deletions packages/tpetra/core/src/Tpetra_Details_Behavior.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -189,12 +189,16 @@ namespace { // (anonymous)
else {
// This could throw invalid_argument or out_of_range.
// Go ahead and let it do so.
const long long val = std::stoll(stringToUpper(varVal));
TEUCHOS_TEST_FOR_EXCEPTION
(val < static_cast<long long>(0), std::out_of_range,
prefix << "Environment variable \""
<< environmentVariableName << "\" is supposed to be a size, "
"but it has a negative integer value " << val << ".");
long long val = std::stoll(stringToUpper(varVal));
if (val < static_cast<long long>(0)) {
// If negative - user has requested threshold be lifted
return std::numeric_limits<size_t>::max();
}
// TEUCHOS_TEST_FOR_EXCEPTION
// (val < static_cast<long long>(0), std::out_of_range,
// prefix << "Environment variable \""
// << environmentVariableName << "\" is supposed to be a size, "
// "but it has a negative integer value " << val << ".");
if (sizeof(long long) > sizeof(size_t)) {
// It's hard to test this code, but I want to try writing it
// at least, in case we ever have to run on 32-bit machines or
Expand Down Expand Up @@ -284,6 +288,10 @@ namespace { // (anonymous)
#endif // TPETRA_ASSUME_CUDA_AWARE_MPI
}

constexpr bool hierarchicalUnpackDefault () {
return true;
}

} // namespace (anonymous)

bool Behavior::debug ()
Expand Down Expand Up @@ -459,6 +467,28 @@ bool Behavior::timing (const char name[])
envVarName,
defaultValue);
}

void Behavior::enable_timing() {
BehaviorDetails::timingDisabled_ = false;
}

void Behavior::disable_timing() {
BehaviorDetails::timingDisabled_ = true;
}

bool Behavior::hierarchicalUnpack ()
{
constexpr char envVarName[] = "TPETRA_HIERARCHICAL_UNPACK";
constexpr bool defaultValue = hierarchicalUnpackDefault();

static bool value_ = defaultValue;
static bool initialized_ = false;
return idempotentlyGetEnvironmentVariableAsBool (value_,
initialized_,
envVarName,
defaultValue);
}

} // namespace Details
} // namespace Tpetra

11 changes: 11 additions & 0 deletions packages/tpetra/core/src/Tpetra_Details_Behavior.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,12 @@ class Behavior {
/// "CrsGraph::insertLocalIndices".
static bool timing (const char name[]);

/// \brief Disable timing, programatically
static void disable_timing();

/// \brief Enable timing, programatically
static void enable_timing();

/// \brief Whether to assume that MPI is CUDA aware.
///
/// An MPI implementation is "CUDA aware" if it can accept CUDA
Expand Down Expand Up @@ -214,6 +220,7 @@ class Behavior {
/// separate question.
static size_t longRowMinNumEntries ();

<<<<<<< HEAD
/// \brief the threshold for transitioning from device to host
///
/// If the number of elements in the multivector does not exceed this
Expand All @@ -222,6 +229,10 @@ class Behavior {
/// By default this is 10000, but may be altered by the environment
/// variable TPETRA_VECTOR_DEVICE_THRESHOLD
static size_t multivectorKernelLocationThreshold ();
=======
/// \brief Unpack rows of a matrix using hierarchical unpacking
static bool hierarchicalUnpack ();
>>>>>>> Implement hierarhial unpacking

/// \brief Use Teuchos::Timer in Tpetra::ProfilingRegion
///
Expand Down
17 changes: 8 additions & 9 deletions packages/tpetra/core/src/Tpetra_Details_packCrsMatrix_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -935,15 +935,14 @@ packCrsMatrix (const CrsMatrix<ST, LO, GO, NT>& sourceMatrix,

template<typename ST, typename LO, typename GO, typename NT>
void
packCrsMatrixNew (const CrsMatrix<ST, LO, GO, NT>& sourceMatrix,
Kokkos::DualView<char*,
typename DistObject<char, LO, GO, NT>::buffer_device_type>& exports,
const Kokkos::DualView<size_t*,
typename DistObject<char, LO, GO, NT>::buffer_device_type>& numPacketsPerLID,
const Kokkos::DualView<const LO*,
typename DistObject<char, LO, GO, NT>::buffer_device_type>& exportLIDs,
size_t& constantNumPackets,
Distributor& distor)
packCrsMatrixNew(
const CrsMatrix<ST, LO, GO, NT>& sourceMatrix,
Kokkos::DualView<char*, typename DistObject<char, LO, GO, NT>::buffer_device_type>& exports,
const Kokkos::DualView<size_t*, typename DistObject<char, LO, GO, NT>::buffer_device_type>& numPacketsPerLID,
const Kokkos::DualView<const LO*, typename DistObject<char, LO, GO, NT>::buffer_device_type>& exportLIDs,
size_t& constantNumPackets,
Distributor& distor
)
{
using device_type = typename CrsMatrix<ST, LO, GO, NT>::device_type;
using buffer_device_type = typename DistObject<char, LO, GO, NT>::buffer_device_type;
Expand Down
Loading

0 comments on commit 3e69851

Please sign in to comment.