Skip to content

Commit

Permalink
Added a function to VTKUtilities to avoid empty ranks after redistrib…
Browse files Browse the repository at this point in the history
…uteByKdTree (#2569)

* added a function to try to avoid empty ranks
* fixed unused variables from previous PR
  • Loading branch information
francoishamon authored Jul 21, 2023
1 parent f4b32f1 commit 18e7bde
Show file tree
Hide file tree
Showing 2 changed files with 121 additions and 1 deletion.
6 changes: 5 additions & 1 deletion src/coreComponents/common/LifoStorageHost.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,11 @@ class LifoStorageHost : public LifoStorageCommon< T, INDEX_TYPE >
* @param maxNumberOfBuffers Maximum number of buffers to store in the LIFO storage
* @return The maximum number of buffer to allocate to fit in the percentage of the available memory.
*/
static int computeNumberOfBufferOnDevice( int percent, size_t bufferSize, int maxNumberOfBuffers ) { return 0; }
static int computeNumberOfBufferOnDevice( int percent, size_t bufferSize, int maxNumberOfBuffers )
{
GEOS_UNUSED_VAR( percent, bufferSize, maxNumberOfBuffers );
return 0;
}

private:

Expand Down
116 changes: 116 additions & 0 deletions src/coreComponents/mesh/generators/VTKUtilities.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -640,6 +640,114 @@ vtkSmartPointer< vtkDataSet > manageGlobalIds( vtkSmartPointer< vtkDataSet > mes
return output;
}

/**
* @brief This function tries to make sure that no MPI rank is empty
*
* @param[in] mesh a vtk grid
* @param[in] comm the MPI communicator
* @return the vtk grid redistributed
*/
vtkSmartPointer< vtkDataSet >
ensureNoEmptyRank( vtkDataSet & mesh,
MPI_Comm const comm )
{
GEOS_MARK_FUNCTION;

// step 1: figure out who is a donor and who is a recipient
localIndex const numElems = LvArray::integerConversion< localIndex >( mesh.GetNumberOfCells() );
integer const numProcs = MpiWrapper::commSize( comm );

array1d< localIndex > elemCounts( numProcs );
MpiWrapper::allGather( numElems, elemCounts, comm );

SortedArray< integer > recipientRanks;
array1d< integer > donorRanks;
recipientRanks.reserve( numProcs );
donorRanks.reserve( numProcs );

for( integer iRank = 0; iRank < numProcs; ++iRank )
{
if( elemCounts[iRank] == 0 )
{
recipientRanks.insert( iRank );
}
else if( elemCounts[iRank] > 1 ) // need at least two elems to be a donor
{
donorRanks.emplace_back( iRank );
}
}

// step 2: at this point, we need to determine the donors and which cells they donate

// First we sort the donor in order of the number of elems they contain
std::stable_sort( donorRanks.begin(), donorRanks.end(),
[&elemCounts] ( auto i1, auto i2 )
{ return elemCounts[i1] < elemCounts[i2]; } );

// Then, if my position is "i" in the donorRanks array, I will send half of my elems to the i-th recipient
integer const myRank = MpiWrapper::commRank();
auto const myPosition =
LvArray::sortedArrayManipulation::find( donorRanks.begin(), donorRanks.size(), myRank );
bool const isDonor = myPosition != donorRanks.size();

// step 3: my rank was selected to donate cells, let's proceed
// we need to make a distinction between two configurations

array1d< localIndex > newParts( numElems );
newParts.setValues< parallelHostPolicy >( myRank );

// step 3.1: donorRanks.size() >= recipientRanks.size()
// we use a strategy that preserves load balancing
if( isDonor && donorRanks.size() >= recipientRanks.size() )
{
if( myPosition < recipientRanks.size() )
{
integer const recipientRank = recipientRanks[myPosition];
for( localIndex iElem = numElems/2; iElem < numElems; ++iElem )
{
newParts[iElem] = recipientRank; // I donate half of my cells
}
}
}
// step 3.2: donorRanks.size() < recipientRanks.size()
// this is the unhappy path: we don't care anymore about load balancing at this stage
// we just want the simulation to run and count on ParMetis/PTScotch to restore load balancing
else if( isDonor && donorRanks.size() < recipientRanks.size() )
{
localIndex firstRecipientPosition = 0;
for( integer iRank = 0; iRank < myPosition; ++iRank )
{
firstRecipientPosition += elemCounts[iRank] - 1;
}
if( firstRecipientPosition < recipientRanks.size() )
{
bool const isLastDonor = myPosition == donorRanks.size() - 1;
localIndex const lastRecipientPosition = firstRecipientPosition + numElems - 1;
GEOS_THROW_IF( isLastDonor && ( lastRecipientPosition < recipientRanks.size() ),
"The current implementation is unable to guarantee that all ranks have at least one element",
std::runtime_error );

for( localIndex iElem = 1; iElem < numElems; ++iElem ) // I only keep my first element
{
// this is the brute force approach
// each donor donates all its elems except the first one
localIndex const recipientPosition = firstRecipientPosition + iElem - 1;
if( recipientPosition < recipientRanks.size() )
{
newParts[iElem] = recipientRanks[recipientPosition];
}
}
}
}

GEOS_LOG_RANK_0_IF( donorRanks.size() < recipientRanks.size(),
"\nWarning! We strongly encourage the use of partitionRefinement > 5 for this number of MPI ranks \n" );

vtkSmartPointer< vtkPartitionedDataSet > const splitMesh = splitMeshByPartition( mesh, numProcs, newParts.toViewConst() );
return vtk::redistribute( *splitMesh, MPI_COMM_GEOSX );
}


vtkSmartPointer< vtkDataSet >
redistributeMesh( vtkSmartPointer< vtkDataSet > loadedMesh,
MPI_Comm const comm,
Expand All @@ -660,6 +768,14 @@ redistributeMesh( vtkSmartPointer< vtkDataSet > loadedMesh,
mesh = redistributeByKdTree( *mesh );
}

// Check if a rank does not have a cell after the redistribution
// If this is the case, we need a fix otherwise the next redistribution will fail
// We expect this function to only be called in some pathological cases
if( MpiWrapper::min( mesh->GetNumberOfCells(), comm ) == 0 )
{
mesh = ensureNoEmptyRank( *mesh, comm );
}

// Redistribute the mesh again using higher-quality graph partitioner
if( partitionRefinement > 0 )
{
Expand Down

0 comments on commit 18e7bde

Please sign in to comment.