diff --git a/src/input/NetCDFMesh.h b/src/input/NetCDFMesh.h index 9f487e7..934b6fb 100644 --- a/src/input/NetCDFMesh.h +++ b/src/input/NetCDFMesh.h @@ -55,10 +55,10 @@ class NetCDFMesh : public MeshInput { checkNcError(nc_inq_dimlen(ncFile, ncDimPart, &nPartitions)); // Local partitions - unsigned int nMaxLocalPart = (nPartitions + nProcs - 1) / nProcs; - unsigned int nLocalPart = nMaxLocalPart; - if (nPartitions < (rank + 1) * nMaxLocalPart) { - nLocalPart = std::max(0, static_cast(nPartitions - rank * nMaxLocalPart)); + const std::size_t nMaxLocalPart = (nPartitions + nProcs - 1) / nProcs; + std::size_t nLocalPart = nMaxLocalPart; + if (nPartitions < (rank + 1) * nMaxLocalPart && nPartitions >= rank * nMaxLocalPart) { + nLocalPart = static_cast(nPartitions - rank * nMaxLocalPart); } MPI_Comm commIO; @@ -73,8 +73,8 @@ class NetCDFMesh : public MeshInput { PCU_Switch_Comm(commIO); - unsigned int nElements = 0; - unsigned int nVertices = 0; + std::size_t nElements = 0; + std::size_t nVertices = 0; std::vector elements; std::vector vertices; std::vector boundaries; @@ -115,8 +115,10 @@ class NetCDFMesh : public MeshInput { // Read elements logInfo(rank) << "Reading netCDF file"; - for (unsigned int i = 0; i < nMaxLocalPart; i++) { - unsigned int j = i % nLocalPart; + for (std::size_t i = 0; i < nMaxLocalPart; i++) { + std::size_t j = i % nLocalPart; + + // for now, each partition stays limited to about 2^31 maximum elements size_t start[3] = {j + rank * nMaxLocalPart, 0, 0}; @@ -152,26 +154,26 @@ class NetCDFMesh : public MeshInput { checkNcError(nc_close(ncFile)); - for (unsigned int i = 0; i < nLocalPart; i++) { + for (std::size_t i = 0; i < nLocalPart; i++) { nElements += partitions[i].nElements(); nVertices += partitions[i].nVertices(); } // Copy to the buffer - std::vector elementsLocal(nElements * 4); + std::vector elementsLocal(nElements * 4); elements.resize(nElements * 4); vertices.resize(nVertices * 3); boundaries.resize(nElements * 4); groups.resize(nElements); - unsigned int elementOffset = 0; - unsigned int vertexOffset = 0; - for (unsigned int i = 0; i < nLocalPart; i++) { + std::size_t elementOffset = 0; + std::size_t vertexOffset = 0; + for (std::size_t i = 0; i < nLocalPart; i++) { #ifdef _OPENMP -#pragma omp parallel +#pragma omp parallel schedule(static) #endif - for (unsigned int j = 0; j < partitions[i].nElements() * 4; j++) { + for (std::size_t j = 0; j < partitions[i].nElements() * 4; j++) { elementsLocal[elementOffset * 4 + j] = partitions[i].elements()[j] + vertexOffset; } @@ -190,7 +192,7 @@ class NetCDFMesh : public MeshInput { logInfo(rank) << "Running vertex filter"; ParallelVertexFilter filter(commIO); - filter.filter(nVertices, vertices.data()); + filter.filter(nVertices, vertices); // Create filtered vertex list @@ -202,7 +204,7 @@ class NetCDFMesh : public MeshInput { #ifdef _OPENMP #pragma omp parallel #endif - for (unsigned int i = 0; i < nElements * 4; i++) { + for (std::size_t i = 0; i < nElements * 4; i++) { elements[i] = filter.globalIds()[elementsLocal[i]]; } } @@ -220,12 +222,12 @@ class NetCDFMesh : public MeshInput { // Set boundaries apf::MeshTag* boundaryTag = m_mesh->createIntTag("boundary condition", 1); apf::MeshIterator* it = m_mesh->begin(3); - unsigned int i = 0; + std::size_t i = 0; while (apf::MeshEntity* element = m_mesh->iterate(it)) { apf::Adjacent adjacent; m_mesh->getAdjacent(element, 2, adjacent); - for (unsigned int j = 0; j < 4; j++) { + for (int j = 0; j < 4; j++) { if (!boundaries[i * 4 + j]) continue; diff --git a/src/input/NetCDFPartition.h b/src/input/NetCDFPartition.h index 57e76ef..71953d1 100644 --- a/src/input/NetCDFPartition.h +++ b/src/input/NetCDFPartition.h @@ -20,8 +20,8 @@ */ class Partition { private: - unsigned int m_nElements; - unsigned int m_nVertices; + std::size_t m_nElements; + std::size_t m_nVertices; int* m_elements; double* m_vertices; @@ -40,7 +40,7 @@ class Partition { delete[] m_groups; } - void setElemSize(unsigned int nElements) { + void setElemSize(std::size_t nElements) { if (m_nElements != 0) return; @@ -53,7 +53,7 @@ class Partition { memset(m_groups, 0, nElements * sizeof(int)); } - void setVrtxSize(unsigned int nVertices) { + void setVrtxSize(std::size_t nVertices) { if (m_nVertices != 0) return; @@ -72,9 +72,9 @@ class Partition { } } - unsigned int nElements() const { return m_nElements; } + std::size_t nElements() const { return m_nElements; } - unsigned int nVertices() const { return m_nVertices; } + std::size_t nVertices() const { return m_nVertices; } int* elements() { return m_elements; } diff --git a/src/input/ParallelVertexFilter.h b/src/input/ParallelVertexFilter.h index 870b271..96b1136 100644 --- a/src/input/ParallelVertexFilter.h +++ b/src/input/ParallelVertexFilter.h @@ -19,13 +19,17 @@ #include #include +#include #include #include #include +#include #include #include "utils/logger.h" +#include "third_party/MPITraits.h" + /** * Filters duplicate vertices in parallel */ @@ -36,12 +40,12 @@ class ParallelVertexFilter { */ class IndexedVertexComparator { private: - const double* m_vertices; + const std::vector& m_vertices; public: - IndexedVertexComparator(const double* vertices) : m_vertices(vertices) {} + IndexedVertexComparator(const std::vector& vertices) : m_vertices(vertices) {} - bool operator()(unsigned int i, unsigned int j) { + bool operator()(std::size_t i, std::size_t j) { i *= 3; j *= 3; @@ -63,17 +67,16 @@ class ParallelVertexFilter { int m_numProcs; /** Global id after filtering */ - unsigned long* m_globalIds; + std::vector m_globalIds; /** Number of local vertices after filtering */ - unsigned int m_numLocalVertices; + std::size_t m_numLocalVertices; /** Local vertices after filtering */ - double* m_localVertices; + std::vector m_localVertices; public: - ParallelVertexFilter(MPI_Comm comm = MPI_COMM_WORLD) - : m_comm(comm), m_globalIds(0L), m_numLocalVertices(0), m_localVertices(0L) { + ParallelVertexFilter(MPI_Comm comm = MPI_COMM_WORLD) : m_comm(comm), m_numLocalVertices(0) { MPI_Comm_rank(comm, &m_rank); MPI_Comm_size(comm, &m_numProcs); @@ -83,26 +86,23 @@ class ParallelVertexFilter { } } - virtual ~ParallelVertexFilter() { - delete[] m_globalIds; - delete[] m_localVertices; - } + virtual ~ParallelVertexFilter() {} /** * @param vertices Vertices that should be filtered, must have the size * numVertices * 3 */ - void filter(unsigned int numVertices, const double* vertices) { + void filter(std::size_t numVertices, const std::vector& vertices) { // Chop the last 4 bits to avoid numerical errors - double* roundVertices = new double[numVertices * 3]; - removeRoundError(vertices, numVertices * 3, roundVertices); + std::vector roundVertices(numVertices * 3); + removeRoundError(vertices, roundVertices); // Create indices and sort them locally - unsigned int* sortIndices = new unsigned int[numVertices]; - createSortedIndices(roundVertices, numVertices, sortIndices); + std::vector sortIndices(numVertices); + createSortedIndices(roundVertices, sortIndices); // Select BUCKETS_PER_RANK-1 splitter elements - double localSplitters[BUCKETS_PER_RANK - 1]; + std::array localSplitters; #if 0 // Use omp only if we create a larger amount of buckets #ifdef _OPENMP #pragma omp parallel for schedule(static) @@ -118,20 +118,22 @@ class ParallelVertexFilter { } // Collect all splitter elements on rank 0 - double* allSplitters = 0L; + std::vector allSplitters; - if (m_rank == 0) - allSplitters = new double[m_numProcs * (BUCKETS_PER_RANK - 1)]; + if (m_rank == 0) { + allSplitters.resize(m_numProcs * (BUCKETS_PER_RANK - 1)); + } - MPI_Gather(localSplitters, BUCKETS_PER_RANK - 1, MPI_DOUBLE, allSplitters, BUCKETS_PER_RANK - 1, - MPI_DOUBLE, 0, m_comm); + MPI_Gather(localSplitters.data(), BUCKETS_PER_RANK - 1, MPI_DOUBLE, allSplitters.data(), + BUCKETS_PER_RANK - 1, MPI_DOUBLE, 0, m_comm); // Sort splitter elements - if (m_rank == 0) - std::sort(allSplitters, allSplitters + (m_numProcs * (BUCKETS_PER_RANK - 1))); + if (m_rank == 0) { + std::sort(allSplitters.begin(), allSplitters.end()); + } // Distribute splitter to all processes - double* splitters = new double[m_numProcs - 1]; + std::vector splitters(m_numProcs - 1); if (m_rank == 0) { #ifdef _OPENMP @@ -145,157 +147,141 @@ class ParallelVertexFilter { } } - MPI_Bcast(splitters, m_numProcs - 1, MPI_DOUBLE, 0, m_comm); - - delete[] allSplitters; + MPI_Bcast(splitters.data(), m_numProcs - 1, MPI_DOUBLE, 0, m_comm); // Determine the bucket for each vertex - unsigned int* bucket = new unsigned int[numVertices]; + std::vector bucket(numVertices); #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif - for (unsigned int i = 0; i < numVertices; i++) { - double* ub = std::upper_bound(splitters, splitters + m_numProcs - 1, roundVertices[i * 3]); + for (std::size_t i = 0; i < numVertices; i++) { + auto ub = std::upper_bound(splitters.begin(), splitters.end(), roundVertices[i * 3]); - bucket[i] = ub - splitters; + bucket[i] = std::distance(splitters.begin(), ub); } - delete[] roundVertices; - delete[] splitters; - // Determine the (local and total) bucket size - int* bucketSize = new int[m_numProcs]; - memset(bucketSize, 0, sizeof(int) * m_numProcs); - for (unsigned int i = 0; i < numVertices; i++) - bucketSize[bucket[i]]++; - - delete[] bucket; + std::vector bucketSize(m_numProcs); + for (std::size_t i = 0; i < numVertices; i++) { + ++bucketSize[bucket[i]]; + } // Tell all processes what we are going to send them - int* recvSize = new int[m_numProcs]; + std::vector recvSize(m_numProcs); - MPI_Alltoall(bucketSize, 1, MPI_INT, recvSize, 1, MPI_INT, m_comm); + MPI_Alltoall(bucketSize.data(), 1, MPI_INT, recvSize.data(), 1, MPI_INT, m_comm); - unsigned int numSortVertices = 0; + std::size_t numSortVertices = 0; #ifdef _OPENMP #pragma omp parallel for schedule(static) reduction(+ : numSortVertices) #endif - for (int i = 0; i < m_numProcs; i++) + for (int i = 0; i < m_numProcs; i++) { numSortVertices += recvSize[i]; + } // Create sorted send buffer - double* sendVertices = new double[3 * numVertices]; + std::vector sendVertices(3 * numVertices); #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif - for (unsigned int i = 0; i < numVertices; i++) { - memcpy(&sendVertices[i * 3], &vertices[sortIndices[i] * 3], sizeof(double) * 3); + for (std::size_t i = 0; i < numVertices; i++) { + std::copy_n(vertices.begin() + sortIndices[i] * 3, 3, sendVertices.begin() + i * 3); } // Allocate buffer for the vertices and exchange them - double* sortVertices = new double[3 * numSortVertices]; + std::vector sortVertices(3 * numSortVertices); - int* sDispls = new int[m_numProcs]; - int* rDispls = new int[m_numProcs]; + std::vector sDispls(m_numProcs); + std::vector rDispls(m_numProcs); sDispls[0] = 0; rDispls[0] = 0; for (int i = 1; i < m_numProcs; i++) { sDispls[i] = sDispls[i - 1] + bucketSize[i - 1]; rDispls[i] = rDispls[i - 1] + recvSize[i - 1]; } - MPI_Alltoallv(sendVertices, bucketSize, sDispls, vertexType, sortVertices, recvSize, rDispls, - vertexType, m_comm); - - delete[] sendVertices; + MPI_Alltoallv(sendVertices.data(), bucketSize.data(), sDispls.data(), vertexType, + sortVertices.data(), recvSize.data(), rDispls.data(), vertexType, m_comm); // Chop the last 4 bits to avoid numerical errors - roundVertices = new double[numSortVertices * 3]; - removeRoundError(sortVertices, numSortVertices * 3, roundVertices); + roundVertices.resize(numSortVertices * 3); + removeRoundError(sortVertices, roundVertices); // Create indices and sort them (such that the vertices are sorted) - unsigned int* sortSortIndices = new unsigned int[numSortVertices]; - createSortedIndices(roundVertices, numSortVertices, sortSortIndices); - - delete[] roundVertices; + std::vector sortSortIndices(numSortVertices); + createSortedIndices(roundVertices, sortSortIndices); // Initialize the global ids we send back to the other processors - unsigned long* gids = new unsigned long[numSortVertices]; + std::vector gids(numSortVertices); if (numSortVertices > 0) { gids[sortSortIndices[0]] = 0; - for (unsigned int i = 1; i < numSortVertices; i++) { + for (std::size_t i = 1; i < numSortVertices; i++) { if (equals(&sortVertices[sortSortIndices[i - 1] * 3], - &sortVertices[sortSortIndices[i] * 3])) + &sortVertices[sortSortIndices[i] * 3])) { gids[sortSortIndices[i]] = gids[sortSortIndices[i - 1]]; - else + } else { gids[sortSortIndices[i]] = gids[sortSortIndices[i - 1]] + 1; + } } } // Create the local vertices list - if (numSortVertices > 0) + if (numSortVertices > 0) { m_numLocalVertices = gids[sortSortIndices[numSortVertices - 1]] + 1; - else + } else { m_numLocalVertices = 0; - delete[] m_localVertices; - m_localVertices = new double[m_numLocalVertices * 3]; - for (unsigned int i = 0; i < numSortVertices; i++) - memcpy(&m_localVertices[gids[i] * 3], &sortVertices[i * 3], sizeof(double) * 3); + } - delete[] sortVertices; + m_localVertices.resize(m_numLocalVertices * 3); + for (std::size_t i = 0; i < numSortVertices; i++) { + std::copy_n(sortVertices.begin() + i * 3, 3, m_localVertices.begin() + gids[i] * 3); + } // Get the vertices offset - unsigned int offset = m_numLocalVertices; - MPI_Scan(MPI_IN_PLACE, &offset, 1, MPI_UNSIGNED, MPI_SUM, m_comm); + std::size_t offset = m_numLocalVertices; + MPI_Scan(MPI_IN_PLACE, &offset, 1, tndm::mpi_type_t(), MPI_SUM, m_comm); offset -= m_numLocalVertices; // Add offset to the global ids #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif - for (unsigned int i = 0; i < numSortVertices; i++) + for (std::size_t i = 0; i < numSortVertices; i++) { gids[i] += offset; + } // Send result back - unsigned long* globalIds = new unsigned long[numVertices]; - MPI_Alltoallv(gids, recvSize, rDispls, MPI_UNSIGNED_LONG, globalIds, bucketSize, sDispls, - MPI_UNSIGNED_LONG, m_comm); - - delete[] bucketSize; - delete[] recvSize; - delete[] sDispls; - delete[] rDispls; - delete[] gids; + std::vector globalIds(numVertices); + MPI_Alltoallv(gids.data(), recvSize.data(), rDispls.data(), tndm::mpi_type_t(), + globalIds.data(), bucketSize.data(), sDispls.data(), + tndm::mpi_type_t(), m_comm); // Assign the global ids to the correct vertices - delete[] m_globalIds; - m_globalIds = new unsigned long[numVertices]; + m_globalIds.resize(numVertices); #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif - for (unsigned int i = 0; i < numVertices; i++) + for (std::size_t i = 0; i < numVertices; i++) { m_globalIds[sortIndices[i]] = globalIds[i]; - - delete[] sortIndices; - delete[] globalIds; + } } /** * @return The list of the global identifiers after filtering */ - const unsigned long* globalIds() const { return m_globalIds; } + const std::vector& globalIds() const { return m_globalIds; } /** * @return Number of vertices this process is responsible for after filtering */ - unsigned int numLocalVertices() const { return m_numLocalVertices; } + std::size_t numLocalVertices() const { return m_numLocalVertices; } /** * @return The list of vertices this process is responsible for after * filtering */ - const double* localVertices() const { return m_localVertices; } + const std::vector& localVertices() const { return m_localVertices; } private: /** @@ -329,29 +315,34 @@ class ParallelVertexFilter { * @param[out] roundValues The list of rounded values * (the caller is responsible for allocating the memory) */ - static void removeRoundError(const double* values, unsigned int count, double* roundValues) { + static void removeRoundError(const std::vector& values, + std::vector& roundValues) { + assert(values.size() == roundValues.size()); + #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif - for (unsigned int i = 0; i < count; i++) + for (std::size_t i = 0; i < roundValues.size(); i++) { roundValues[i] = removeRoundError(values[i]); + } } /** * Creates the list of sorted indices for the vertices. * The caller is responsible for allocating the memory. */ - static void createSortedIndices(const double* vertices, unsigned int numVertices, - unsigned int* sortedIndices) { + static void createSortedIndices(const std::vector& vertices, + std::vector& sortedIndices) { #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif - for (unsigned int i = 0; i < numVertices; i++) + for (std::size_t i = 0; i < sortedIndices.size(); i++) { sortedIndices[i] = i; + } IndexedVertexComparator comparator(vertices); - std::sort(sortedIndices, sortedIndices + numVertices, comparator); + std::sort(sortedIndices.begin(), sortedIndices.end(), comparator); } /** @@ -366,7 +357,7 @@ class ParallelVertexFilter { static MPI_Datatype vertexType; /** The total buckets we create is BUCKETS_PER_RANK * numProcs */ - const static int BUCKETS_PER_RANK = 8; + constexpr static int BUCKETS_PER_RANK = 8; }; #endif // PARALLEL_VERTEX_FILTER_H diff --git a/src/input/SerialMeshFile.h b/src/input/SerialMeshFile.h index 25b8c45..5377314 100644 --- a/src/input/SerialMeshFile.h +++ b/src/input/SerialMeshFile.h @@ -71,10 +71,10 @@ template class SerialMeshFile : public MeshInput { void open(const char* meshFile) { m_meshReader.open(meshFile); - unsigned int nVertices = m_meshReader.nVertices(); - unsigned int nElements = m_meshReader.nElements(); - unsigned int nLocalVertices = (nVertices + m_nProcs - 1) / m_nProcs; - unsigned int nLocalElements = (nElements + m_nProcs - 1) / m_nProcs; + const std::size_t nVertices = m_meshReader.nVertices(); + const std::size_t nElements = m_meshReader.nElements(); + std::size_t nLocalVertices = (nVertices + m_nProcs - 1) / m_nProcs; + std::size_t nLocalElements = (nElements + m_nProcs - 1) / m_nProcs; if (m_rank == m_nProcs - 1) { nLocalVertices = nVertices - (m_nProcs - 1) * nLocalVertices; nLocalElements = nElements - (m_nProcs - 1) * nLocalElements; @@ -89,7 +89,7 @@ template class SerialMeshFile : public MeshInput { { std::vector elements; { - std::vector elementsInt(nLocalElements * 4); + std::vector elementsInt(nLocalElements * 4); m_meshReader.readElements(elementsInt.data()); elements = std::vector(elementsInt.begin(), elementsInt.end()); } @@ -114,12 +114,12 @@ template class SerialMeshFile : public MeshInput { std::vector boundaries(nLocalElements * 4); m_meshReader.readBoundaries(boundaries.data()); apf::MeshIterator* it = m_mesh->begin(3); - unsigned int i = 0; + std::size_t i = 0; while (apf::MeshEntity* element = m_mesh->iterate(it)) { apf::Adjacent adjacent; m_mesh->getAdjacent(element, 2, adjacent); - for (unsigned int j = 0; j < 4; j++) { + for (int j = 0; j < 4; j++) { if (!boundaries[i * 4 + j]) { continue; } @@ -138,7 +138,7 @@ template class SerialMeshFile : public MeshInput { std::vector groups(nLocalElements); m_meshReader.readGroups(groups.data()); apf::MeshIterator* it = m_mesh->begin(3); - unsigned int i = 0; + std::size_t i = 0; while (apf::MeshEntity* element = m_mesh->iterate(it)) { m_mesh->setIntTag(element, groupTag, &groups[i]); i++; diff --git a/src/meshreader/FidapReader.h b/src/meshreader/FidapReader.h index f071ff2..cbc39cc 100644 --- a/src/meshreader/FidapReader.h +++ b/src/meshreader/FidapReader.h @@ -14,6 +14,7 @@ #define FIDAP_READER_H #include +#include #include #include #include @@ -25,7 +26,7 @@ struct FidapBoundaryFace { /** The vertices of the face */ - int vertices[3]; + std::array vertices; /** The type of the boundary */ int type; }; @@ -34,16 +35,16 @@ struct FidapBoundaryFace { struct FaceVertex { FaceVertex() {} - FaceVertex(int v1, int v2, int v3) { + FaceVertex(std::size_t v1, std::size_t v2, std::size_t v3) { vertices[0] = v1; vertices[1] = v2; vertices[2] = v3; - std::sort(vertices, vertices + 3); + std::sort(vertices.begin(), vertices.end()); } - FaceVertex(const int v[3]) { - memcpy(vertices, v, sizeof(vertices)); - std::sort(vertices, vertices + 3); + FaceVertex(const std::array& v) { + std::copy(v.begin(), v.end(), vertices.begin()); + std::sort(vertices.begin(), vertices.end()); } bool operator<(const FaceVertex& other) const { @@ -53,7 +54,7 @@ struct FaceVertex { vertices[2] < other.vertices[2]); } - int vertices[3]; + std::array vertices; }; /** Defines a face by element and face side */ @@ -63,12 +64,12 @@ struct FaceElement { side = -1; } - FaceElement(int e, int s) { + FaceElement(std::size_t e, int s) { element = e; side = s; } - int element; + std::size_t element; int side; }; @@ -109,7 +110,7 @@ class FidapReader : public MeshReader { getline(m_mesh, line); // Date getline(m_mesh, line); // Empty line - unsigned int nElements, nZones, t, dimensions; + std::size_t nElements, nZones, t, dimensions; m_mesh >> m_vertices.nLines; m_mesh >> nElements; m_mesh >> nZones; @@ -147,10 +148,10 @@ class FidapReader : public MeshReader { if (line.find(ELEMENT_GROUPS) == std::string::npos) logError() << "Invalid Fidap format: Element groups expected, found" << line; - for (unsigned int i = 0; i < nZones; i++) { + for (std::size_t i = 0; i < nZones; i++) { FileSection sec; - // Group + // Group (unused) m_mesh >> name; if (name.find(ZONE_GROUP) == std::string::npos) logError() << "Invalid Fidap format: Expected group, found" << name; @@ -167,13 +168,13 @@ class FidapReader : public MeshReader { logError() << "Invalid Fidap format: Expected nodes, found" << name; unsigned int nodeType; m_mesh >> nodeType; - // Geometry + // Geometry (unused) m_mesh >> name; if (name.find(ZONE_GEOMETRY) == std::string::npos) logError() << "Invalid Fidap format: Expected geometry, found" << name; unsigned int geoType; m_mesh >> geoType; - // Type + // Type (unused) m_mesh >> name; if (name.find(ZONE_TYPE) == std::string::npos) logError() << "Invalid Fidap format: Expected type, found" << name; @@ -223,35 +224,33 @@ class FidapReader : public MeshReader { } } - unsigned int nVertices() const { return m_vertices.nLines; } + std::size_t nVertices() const { return m_vertices.nLines; } - unsigned int nElements() const { - unsigned int count = 0; - for (std::vector::const_iterator i = m_elements.begin(); i != m_elements.end(); - i++) { - count += i->nLines; + std::size_t nElements() const { + std::size_t count = 0; + for (const auto& element : m_elements) { + count += element.nLines; } return count; } - unsigned int nBoundaries() const { - unsigned int count = 0; - for (std::vector::const_iterator i = m_boundaries.begin(); - i != m_boundaries.end(); i++) { - count += i->nLines; + std::size_t nBoundaries() const { + std::size_t count = 0; + for (const auto& boundary : m_boundaries) { + count += boundary.nLines; } return count; } - void readVertices(unsigned int start, unsigned int count, double* vertices) { + void readVertices(std::size_t start, std::size_t count, double* vertices) { m_mesh.clear(); m_mesh.seekg(m_vertices.seekPosition + start * m_vertices.lineSize + m_vertices.lineSize - 3 * COORDINATE_SIZE - 1); - for (unsigned int i = 0; i < count; i++) { + for (std::size_t i = 0; i < count; i++) { for (int j = 0; j < 3; j++) m_mesh >> vertices[i * 3 + j]; @@ -259,7 +258,7 @@ class FidapReader : public MeshReader { } } - void readElements(unsigned int start, unsigned int count, int* elements) { + void readElements(std::size_t start, std::size_t count, std::size_t* elements) { m_mesh.clear(); // Get the boundary, were we should start reading @@ -271,7 +270,7 @@ class FidapReader : public MeshReader { m_mesh.seekg(section->seekPosition + start * section->lineSize + section->lineSize - 4 * VERTEX_ID_SIZE - 1); - for (unsigned int i = 0; i < count; i++) { + for (std::size_t i = 0; i < count; i++) { if (start >= section->nLines) { // Are we starting a new section in this iteration? start = 0; @@ -280,7 +279,7 @@ class FidapReader : public MeshReader { m_mesh.seekg(section->seekPosition + section->lineSize - 4 * VERTEX_ID_SIZE - 1); } - for (unsigned int j = 0; j < 4; j++) { + for (int j = 0; j < 4; j++) { m_mesh >> elements[i * 4 + j]; elements[i * 4 + j]--; } @@ -295,14 +294,14 @@ class FidapReader : public MeshReader { * Read group ids from start to start+count. The group ids are sorted * in the same order as the elements. */ - void readGroups(unsigned int start, unsigned int count, int* groups) { + void readGroups(std::size_t start, std::size_t count, int* groups) { // Get the boundary, were we should start reading std::vector::const_iterator section; for (section = m_elements.begin(); section != m_elements.end() && section->nLines < start; section++) start -= section->nLines; - for (unsigned int i = 0; i < count; i++) { + for (std::size_t i = 0; i < count; i++) { if (start >= section->nLines) { // Are we starting a new section in this iteration? start = 0; @@ -320,7 +319,7 @@ class FidapReader : public MeshReader { */ void readGroups(int* groups) { readGroups(0, nElements(), groups); } - void readBoundaries(unsigned int start, unsigned int count, FidapBoundaryFace* boundaries) { + void readBoundaries(std::size_t start, std::size_t count, FidapBoundaryFace* boundaries) { m_mesh.clear(); // Get the boundary, were we should start reading @@ -332,7 +331,7 @@ class FidapReader : public MeshReader { m_mesh.seekg(section->seekPosition + start * section->lineSize + section->lineSize - 3 * VERTEX_ID_SIZE - 1); - for (unsigned int i = 0; i < count; i++) { + for (std::size_t i = 0; i < count; i++) { if (start >= section->nLines) { // Are we starting a new section in this iteration? start = 0; @@ -341,7 +340,7 @@ class FidapReader : public MeshReader { m_mesh.seekg(section->seekPosition + section->lineSize - 3 * VERTEX_ID_SIZE - 1); } - for (unsigned int j = 0; j < 3; j++) { + for (int j = 0; j < 3; j++) { m_mesh >> boundaries[i].vertices[j]; boundaries[i].vertices[j]--; } @@ -354,7 +353,7 @@ class FidapReader : public MeshReader { } } - // TODO void readBoundaries(int* boundaries) {} + // TODO void readBoundaries(std::size_t* boundaries) {} public: /** @@ -364,9 +363,9 @@ class FidapReader : public MeshReader { * @param n Number of elements * @param[out] map The map */ - static void createFaceMap(const int* elements, unsigned int n, + static void createFaceMap(const std::size_t* elements, std::size_t n, std::map& map) { - for (unsigned int i = 0; i < n; i++) { + for (std::size_t i = 0; i < n; i++) { FaceVertex v1(elements[i * 4], elements[i * 4 + 1], elements[i * 4 + 2]); FaceElement e1(i, 0); map[v1] = e1; diff --git a/src/meshreader/GMSHBuilder.h b/src/meshreader/GMSHBuilder.h index 02e0322..3c547ac 100644 --- a/src/meshreader/GMSHBuilder.h +++ b/src/meshreader/GMSHBuilder.h @@ -10,15 +10,27 @@ namespace puml { template struct GMSHSimplexType {}; -template <> struct GMSHSimplexType<0u> { static constexpr long type = 15; }; // 15 -> point -template <> struct GMSHSimplexType<1u> { static constexpr long type = 1; }; // 1 -> line -template <> struct GMSHSimplexType<2u> { static constexpr long type = 2; }; // 2 -> triangle -template <> struct GMSHSimplexType<3u> { static constexpr long type = 4; }; // 4 -> tetrahedron + +// clang format seems to mess up the following lines from time to time, hence we disable it +// clang-format off +template <> struct GMSHSimplexType<0u> { + static constexpr long type = 15; +}; // 15 -> point +template <> struct GMSHSimplexType<1u> { + static constexpr long type = 1; +}; // 1 -> line +template <> struct GMSHSimplexType<2u> { + static constexpr long type = 2; +}; // 2 -> triangle +template <> struct GMSHSimplexType<3u> { + static constexpr long type = 4; +}; // 4 -> tetrahedron +// clang-format on template class GMSHBuilder : public tndm::GMSHMeshBuilder { public: using vertex_t = std::array; - using element_t = std::array; + using element_t = std::array; using facet_t = std::array; std::vector vertices; diff --git a/src/meshreader/GambitReader.h b/src/meshreader/GambitReader.h index e919fd0..8410087 100644 --- a/src/meshreader/GambitReader.h +++ b/src/meshreader/GambitReader.h @@ -30,7 +30,7 @@ namespace puml { */ struct ElementGroup { /** The element */ - unsigned int element; + std::size_t element; /** The group for this element */ int group; }; @@ -40,7 +40,7 @@ struct ElementGroup { */ struct GambitBoundaryFace { /** The element the face belongs to */ - unsigned int element; + std::size_t element; /** The face of the element */ unsigned int face; /** The type of the boundary */ @@ -107,7 +107,7 @@ class GambitReader : public MeshReader { getline(m_mesh, line); // Skip problem size names - unsigned int nGroups, nBoundaries, dimensions; + std::size_t nGroups, nBoundaries, dimensions; m_mesh >> m_vertices.nLines; m_mesh >> m_elements.nLines; m_mesh >> nGroups; @@ -166,7 +166,7 @@ class GambitReader : public MeshReader { // Groups m_groups.resize(nGroups); - for (unsigned int i = 0; i < nGroups; i++) { + for (std::size_t i = 0; i < nGroups; i++) { getline(m_mesh, line); if (line.find(ELEMENT_GROUP) == std::string::npos) logError() << "Invalid Gambit format: Group expected, found" << line; @@ -213,7 +213,7 @@ class GambitReader : public MeshReader { // Boundaries m_boundaries.resize(nBoundaries); - for (unsigned int i = 0; i < nBoundaries; i++) { + for (std::size_t i = 0; i < nBoundaries; i++) { getline(m_mesh, line); if (line.find(BOUNDARY_CONDITIONS) == std::string::npos) logError() << "Invalid Gambit format: Boundaries expected, found" << line; @@ -236,7 +236,7 @@ class GambitReader : public MeshReader { // Get element size std::istringstream ssE(line); - unsigned int element, type, face; + std::size_t element, type, face; ssE >> element; m_boundaries[i].elementSize = ssE.tellg(); ssE >> type; @@ -278,35 +278,34 @@ class GambitReader : public MeshReader { } } - unsigned int nVertices() const { return m_vertices.nLines; } + std::size_t nVertices() const { return m_vertices.nLines; } - unsigned int nElements() const { return m_elements.nLines; } + std::size_t nElements() const { return m_elements.nLines; } - unsigned int nBoundaries() const { - unsigned int count = 0; - for (std::vector::const_iterator i = m_boundaries.begin(); - i != m_boundaries.end(); i++) { - count += i->nLines; + std::size_t nBoundaries() const { + std::size_t count = 0; + for (const auto& boundary : m_boundaries) { + count += boundary.nLines; } return count; } /** - * @copydoc MeshReader::readVertices(unsigned int, unsigned int, double*) + * @copydoc MeshReader::readVertices(size_t, size_t, double*) * * @todo Only 3 dimensional meshes are supported */ - void readVertices(unsigned int start, unsigned int count, double* vertices) { + void readVertices(std::size_t start, std::size_t count, double* vertices) { m_mesh.clear(); m_mesh.seekg(m_vertices.seekPosition + start * m_vertices.lineSize + m_vertices.lineSize - 3 * COORDINATE_SIZE - 1); - char* buf = new char[3 * COORDINATE_SIZE]; + std::vector buf(3 * COORDINATE_SIZE); - for (unsigned int i = 0; i < count; i++) { - m_mesh.read(buf, 3 * COORDINATE_SIZE); + for (std::size_t i = 0; i < count; i++) { + m_mesh.read(buf.data(), 3 * COORDINATE_SIZE); for (int j = 0; j < 3; j++) { std::istringstream ss(std::string(&buf[j * COORDINATE_SIZE], COORDINATE_SIZE)); @@ -315,25 +314,23 @@ class GambitReader : public MeshReader { m_mesh.seekg(m_vertices.lineSize - 3 * COORDINATE_SIZE, std::fstream::cur); } - - delete[] buf; } /** - * @copydoc MeshReader::readElements(unsigned int, unsigned int, int*) + * @copydoc MeshReader::readElements(size_t, size_t, size_t*) * * @todo Only tetrahedral meshes are supported * @todo Support for varying coordinate/vertexid fields */ - void readElements(unsigned int start, unsigned int count, int* elements) { + void readElements(std::size_t start, std::size_t count, std::size_t* elements) { m_mesh.clear(); m_mesh.seekg(m_elements.seekPosition + start * m_elements.lineSize + m_elements.vertexStart); - char* buf = new char[4 * m_elements.vertexSize]; + std::vector buf(4 * m_elements.vertexSize); - for (unsigned int i = 0; i < count; i++) { - m_mesh.read(buf, 4 * m_elements.vertexSize); + for (std::size_t i = 0; i < count; i++) { + m_mesh.read(buf.data(), 4 * m_elements.vertexSize); for (int j = 0; j < 4; j++) { std::istringstream ss(std::string(&buf[j * m_elements.vertexSize], m_elements.vertexSize)); @@ -343,8 +340,6 @@ class GambitReader : public MeshReader { m_mesh.seekg(m_elements.lineSize - 4 * m_elements.vertexSize, std::fstream::cur); } - - delete[] buf; } /** @@ -353,24 +348,25 @@ class GambitReader : public MeshReader { * @param groups Buffer for storing the group numbers. The caller is * responsible for allocating the buffer. */ - void readGroups(unsigned int start, unsigned int count, ElementGroup* groups) { + void readGroups(std::size_t start, std::size_t count, ElementGroup* groups) { m_mesh.clear(); // Get the group, were we should start reading std::vector::const_iterator section; for (section = m_groups.begin(); section != m_groups.end() && section->nLines < start; - section++) + section++) { start -= section->nLines; + } m_mesh.seekg(section->seekPosition + (start / ELEMENTS_PER_LINE_GROUP) * section->lineSize + (start % ELEMENTS_PER_LINE_GROUP) * section->elementSize); - char* buf = new char[section->elementSize]; + std::vector buf(section->elementSize); - for (unsigned int i = 0; i < count; i++) { - m_mesh.read(buf, section->elementSize); + for (std::size_t i = 0; i < count; i++) { + m_mesh.read(buf.data(), section->elementSize); - std::istringstream ss(std::string(buf, section->elementSize)); + std::istringstream ss(std::string(buf.begin(), buf.end())); ss >> groups[i].element; groups[i].element--; groups[i].group = section->group; @@ -387,28 +383,25 @@ class GambitReader : public MeshReader { m_mesh.seekg(section->lineSize - section->elementSize * ELEMENTS_PER_LINE_GROUP, std::fstream::cur); } - - delete[] buf; } /** * Reads all groups numbers. - * In contrast to readGroups(unsigned int, unsigned int, ElementGroup*) it + * In contrast to readGroups(size_t, size_t, ElementGroup*) it * returns the group numbers sorted according to the elements. */ void readGroups(int* groups) { logInfo() << "Reading group information"; - ElementGroup* map = new ElementGroup[nElements()]; - readGroups(0, nElements(), map); + std::vector map(nElements()); + readGroups(0, nElements(), map.data()); #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif - for (unsigned int i = 0; i < nElements(); i++) + for (std::size_t i = 0; i < nElements(); i++) { groups[map[i].element] = map[i].group; - - delete[] map; + } } /** @@ -418,7 +411,7 @@ class GambitReader : public MeshReader { * @param elements Buffer to store boundaries. Each boundary consists of the * element, the face and the boundary type. */ - void readBoundaries(unsigned int start, unsigned int count, GambitBoundaryFace* boundaries) { + void readBoundaries(std::size_t start, std::size_t count, GambitBoundaryFace* boundaries) { m_mesh.clear(); // Get the boundary, were we should start reading @@ -427,21 +420,21 @@ class GambitReader : public MeshReader { section++) start -= section->nLines; - char* buf; + std::vector buf; if (section->variableLineLength) { m_mesh.seekg(section->seekPosition); for (size_t i = 0; i < start; i++) m_mesh.ignore(std::numeric_limits::max(), '\n'); - buf = 0L; + buf.resize(0); } else { m_mesh.seekg(section->seekPosition + start * section->lineSize); - buf = new char[section->elementSize + section->elementTypeSize + section->faceSize]; + buf.resize(section->elementSize + section->elementTypeSize + section->faceSize); } - for (unsigned int i = 0; i < count; i++) { + for (std::size_t i = 0; i < count; i++) { if (start >= section->nLines) { // Are we starting a new section in this iteration? start = 0; @@ -449,11 +442,11 @@ class GambitReader : public MeshReader { m_mesh.seekg(section->seekPosition); - delete[] buf; - if (section->variableLineLength) - buf = 0L; - else - buf = new char[section->elementSize + section->elementTypeSize + section->faceSize]; + if (section->variableLineLength) { + buf.resize(0); + } else { + buf.resize(section->elementSize + section->elementTypeSize + section->faceSize); + } } if (section->variableLineLength) { @@ -462,13 +455,15 @@ class GambitReader : public MeshReader { m_mesh >> elementType; m_mesh >> boundaries[i].face; } else { - m_mesh.read(buf, section->elementSize + section->elementTypeSize + section->faceSize); + m_mesh.read(buf.data(), + section->elementSize + section->elementTypeSize + section->faceSize); - std::istringstream ssE(std::string(buf, section->elementSize)); + std::istringstream ssE(std::string(buf.begin(), buf.begin() + section->elementSize)); ssE >> boundaries[i].element; - std::istringstream ssF( - std::string(&buf[section->elementSize + section->elementTypeSize], section->faceSize)); + std::istringstream ssF(std::string( + buf.begin() + section->elementSize + section->elementTypeSize, + buf.begin() + section->elementSize + section->elementTypeSize + section->faceSize)); ssF >> boundaries[i].face; // Seek to next position @@ -481,10 +476,8 @@ class GambitReader : public MeshReader { boundaries[i].face--; boundaries[i].type = section->type; - start++; // Line in the current section + ++start; // Line in the current section } - - delete[] buf; } /** @@ -499,14 +492,13 @@ class GambitReader : public MeshReader { void readBoundaries(int* boundaries) { logInfo() << "Reading boundary conditions"; - unsigned int nBnds = nBoundaries(); - GambitBoundaryFace* faces = new GambitBoundaryFace[nBoundaries()]; - readBoundaries(0, nBnds, faces); + std::size_t nBnds = nBoundaries(); + std::vector faces(nBnds); + readBoundaries(0, nBnds, faces.data()); - for (unsigned int i = 0; i < nBnds; i++) + for (std::size_t i = 0; i < nBnds; i++) { boundaries[faces[i].element * 4 + faces[i].face] = faces[i].type; - - delete[] faces; + } } private: diff --git a/src/meshreader/MeshReader.h b/src/meshreader/MeshReader.h index ee7d968..6d97832 100644 --- a/src/meshreader/MeshReader.h +++ b/src/meshreader/MeshReader.h @@ -23,7 +23,7 @@ class MeshReader { /** Start of the section */ size_t seekPosition; /** Number of elements (lines) in the section */ - unsigned int nLines; + std::size_t nLines; /** Line size */ size_t lineSize; }; @@ -45,12 +45,12 @@ class MeshReader { logError() << "Could not open mesh file" << meshFile; } - virtual unsigned int nVertices() const = 0; - virtual unsigned int nElements() const = 0; + virtual std::size_t nVertices() const = 0; + virtual std::size_t nElements() const = 0; /** * @return Number of boundary faces */ - virtual unsigned int nBoundaries() const = 0; + virtual std::size_t nBoundaries() const = 0; /** * Reads vertices from start tp start+count from the file and stores them in @@ -60,12 +60,12 @@ class MeshReader { * responsible for allocating the buffer. The size of the buffer must be * count*dimensions */ - virtual void readVertices(unsigned int start, unsigned int count, double* vertices) = 0; + virtual void readVertices(std::size_t start, std::size_t count, double* vertices) = 0; /** * Read all vertices * - * @see readVertices(unsigned int, unsigned int, double*) + * @see readVertices(size_t, size_t, double*) */ void readVertices(double* vertices) { logInfo() << "Reading vertices"; @@ -80,14 +80,14 @@ class MeshReader { * responsible for allocating the buffer. The Size of the buffer must be * count*vertices_per_element. */ - virtual void readElements(unsigned int start, unsigned int count, int* elements) = 0; + virtual void readElements(std::size_t start, std::size_t count, std::size_t* elements) = 0; /** * Reads all elements * - * @see readElements(unsigned int, unsigned int, unsinged int*) + * @see readElements(size_t, size_t, size_t*) */ - void readElements(int* elements) { + void readElements(std::size_t* elements) { logInfo() << "Reading elements"; readElements(0, nElements(), elements); } diff --git a/src/meshreader/ParallelFidapReader.h b/src/meshreader/ParallelFidapReader.h index 5f563f6..dea856a 100644 --- a/src/meshreader/ParallelFidapReader.h +++ b/src/meshreader/ParallelFidapReader.h @@ -19,6 +19,7 @@ #include "FidapReader.h" #include "ParallelMeshReader.h" +#include "third_party/MPITraits.h" class ParallelFidapReader : public ParallelMeshReader { private: @@ -31,30 +32,32 @@ class ParallelFidapReader : public ParallelMeshReader { ParallelFidapReader(const char* meshFile, MPI_Comm comm = MPI_COMM_WORLD) : ParallelMeshReader(meshFile, comm) {} - void readElements(int* elements) { + void readElements(std::size_t* elements) { ParallelMeshReader::readElements(elements); // Create the face map - unsigned int chunkSize = (nElements() + m_nProcs - 1) / m_nProcs; - unsigned int maxChunkSize = chunkSize; - if (m_rank == m_nProcs - 1) + std::size_t chunkSize = (nElements() + m_nProcs - 1) / m_nProcs; + const std::size_t maxChunkSize = chunkSize; + if (m_rank == m_nProcs - 1) { chunkSize = nElements() - (m_nProcs - 1) * chunkSize; + } FidapReader::createFaceMap(elements, chunkSize, m_faceMap); - for (std::map::iterator i = m_faceMap.begin(); i != m_faceMap.end(); - i++) - i->second.element += m_rank * maxChunkSize; + for (auto& face : m_faceMap) { + face.second.element += m_rank * maxChunkSize; + } } /** * @copydoc ParallelGambitReader::readGroups */ void readGroups(int* groups) { - unsigned int chunkSize = (nElements() + m_nProcs - 1) / m_nProcs; + std::size_t chunkSize = (nElements() + m_nProcs - 1) / m_nProcs; if (m_rank == 0) { // Allocate second buffer so we can read and send in parallel - int* groups2 = new int[chunkSize]; + std::vector tempGroups(chunkSize); + int* groups2 = tempGroups.data(); if (m_nProcs % 2 == 0) // swap once so we have the correct buffer at the end swap(groups, groups2); @@ -71,7 +74,7 @@ class ParallelFidapReader : public ParallelMeshReader { if (m_nProcs > 1) { // Read last one - unsigned int lastChunkSize = nElements() - (m_nProcs - 1) * chunkSize; + const std::size_t lastChunkSize = nElements() - (m_nProcs - 1) * chunkSize; logInfo() << "Reading group information part" << (m_nProcs - 1) << "of" << m_nProcs; m_serialReader.readGroups((m_nProcs - 1) * chunkSize, lastChunkSize, groups); MPI_Wait(&request, MPI_STATUS_IGNORE); @@ -83,8 +86,6 @@ class ParallelFidapReader : public ParallelMeshReader { logInfo() << "Reading group information part" << m_nProcs << "of" << m_nProcs; m_serialReader.readGroups(0, chunkSize, groups); MPI_Wait(&request, MPI_STATUS_IGNORE); - - delete[] groups2; } else { if (m_rank == m_nProcs - 1) chunkSize = nElements() - (m_nProcs - 1) * chunkSize; @@ -123,36 +124,34 @@ class ParallelFidapReader : public ParallelMeshReader { std::vector facePos; std::vector faceType; - unsigned int chunkSize = (nElements() + m_nProcs - 1) / m_nProcs; - unsigned int maxChunkSize = chunkSize; + std::size_t chunkSize = (nElements() + m_nProcs - 1) / m_nProcs; + const std::size_t maxChunkSize = chunkSize; if (m_rank == 0) { - FidapBoundaryFace* faces = new FidapBoundaryFace[maxChunkSize]; + std::vector faces(maxChunkSize); - std::vector* aggregator = new std::vector[m_nProcs - 1]; - unsigned int* sizes = new unsigned int[m_nProcs - 1]; - MPI_Request* requests = new MPI_Request[(m_nProcs - 1) * 2]; - for (int i = 0; i < m_nProcs - 1; i++) { - requests[i * 2] = MPI_REQUEST_NULL; - requests[i * 2 + 1] = MPI_REQUEST_NULL; - } + std::vector> aggregator(m_nProcs - 1); + std::vector sizes(m_nProcs - 1); + std::vector requests((m_nProcs - 1) * 2, MPI_REQUEST_NULL); - unsigned int nChunks = (nBoundaries() + chunkSize - 1) / chunkSize; - for (unsigned int i = 0; i < nChunks; i++) { + const std::size_t nChunks = (nBoundaries() + chunkSize - 1) / chunkSize; + for (std::size_t i = 0; i < nChunks; i++) { logInfo() << "Reading boundary conditions part" << (i + 1) << "of" << nChunks; - if (i == nChunks - 1) + if (i == nChunks - 1) { chunkSize = nBoundaries() - (nChunks - 1) * chunkSize; + } - m_serialReader.readBoundaries(i * maxChunkSize, chunkSize, faces); + m_serialReader.readBoundaries(i * maxChunkSize, chunkSize, faces.data()); // Wait for all sending from last iteration - MPI_Waitall((m_nProcs - 1) * 2, requests, MPI_STATUSES_IGNORE); - for (int j = 0; j < m_nProcs - 1; j++) + MPI_Waitall((m_nProcs - 1) * 2, requests.data(), MPI_STATUSES_IGNORE); + for (int j = 0; j < m_nProcs - 1; j++) { aggregator[j].clear(); + } // Sort boundary conditions into the corresponding aggregator - for (unsigned int j = 0; j < chunkSize; j++) { + for (std::size_t j = 0; j < chunkSize; j++) { FaceVertex v(faces[j].vertices); if (v.vertices[1] < vertexChunk) { @@ -161,8 +160,9 @@ class ParallelFidapReader : public ParallelMeshReader { } else { // Face for another processor // Serials the struct to make sending easier - unsigned int proc = v.vertices[1] / vertexChunk; - aggregator[proc - 1].insert(aggregator[proc - 1].end(), v.vertices, v.vertices + 3); + int proc = v.vertices[1] / vertexChunk; + aggregator[proc - 1].insert(aggregator[proc - 1].end(), v.vertices.begin(), + v.vertices.end()); aggregator[proc - 1].push_back(faces[j].type); } } @@ -173,54 +173,49 @@ class ParallelFidapReader : public ParallelMeshReader { continue; sizes[j] = aggregator[j].size() / 4; // 3 vertices + face type - MPI_Isend(&sizes[j], 1, MPI_UNSIGNED, j + 1, 0, m_comm, &requests[j * 2]); + MPI_Isend(&sizes[j], 1, tndm::mpi_type_t(), j + 1, 0, m_comm, + &requests[j * 2]); MPI_Isend(&aggregator[j][0], aggregator[j].size(), MPI_INT, j + 1, 0, m_comm, &requests[j * 2 + 1]); } } - MPI_Waitall((m_nProcs - 1) * 2, requests, MPI_STATUSES_IGNORE); - - delete[] faces; - delete[] aggregator; + MPI_Waitall((m_nProcs - 1) * 2, requests.data(), MPI_STATUSES_IGNORE); // Send finish signal to all other processes - memset(sizes, 0, (m_nProcs - 1) * sizeof(unsigned int)); - for (int i = 0; i < m_nProcs - 1; i++) - MPI_Isend(&sizes[i], 1, MPI_UNSIGNED, i + 1, 0, m_comm, &requests[i]); - MPI_Waitall(m_nProcs - 1, requests, MPI_STATUSES_IGNORE); - - delete[] sizes; - delete[] requests; + std::fill(sizes.begin(), sizes.end(), 0); + for (int i = 0; i < m_nProcs - 1; i++) { + MPI_Isend(&sizes[i], 1, tndm::mpi_type_t(), i + 1, 0, m_comm, &requests[i]); + } + MPI_Waitall(m_nProcs - 1, requests.data(), MPI_STATUSES_IGNORE); } else { if (m_rank == m_nProcs - 1) chunkSize = nElements() - (m_nProcs - 1) * chunkSize; // Allocate enough memory - int* buf = new int[chunkSize * 4]; + std::vector buf(chunkSize * 4); while (true) { - unsigned int size; - MPI_Recv(&size, 1, MPI_UNSIGNED, 0, 0, m_comm, MPI_STATUS_IGNORE); + std::size_t size; + MPI_Recv(&size, 1, tndm::mpi_type_t(), 0, 0, m_comm, MPI_STATUS_IGNORE); if (size == 0) // Finished break; - MPI_Recv(buf, size * 4, MPI_INT, 0, 0, m_comm, MPI_STATUS_IGNORE); + MPI_Recv(buf.data(), size * 4, tndm::mpi_type_t(), 0, 0, m_comm, + MPI_STATUS_IGNORE); - for (unsigned int i = 0; i < size * 4; i += 4) { - FaceVertex v(&buf[i]); + for (std::size_t i = 0; i < size * 4; i += 4) { + FaceVertex v(buf[i], buf[i + 1], buf[i + 2]); facePos.push_back(m_faceMap.at(v)); faceType.push_back(buf[i + 3]); } } - - delete[] buf; } // Distribute the faces to the final ranks PCU_Comm_Begin(); - for (unsigned int i = 0; i < facePos.size(); i++) { + for (std::size_t i = 0; i < facePos.size(); i++) { if (facePos[i].element / maxChunkSize == static_cast(m_rank)) boundaries[(facePos[i].element % maxChunkSize) * 4 + facePos[i].side] = faceType[i]; else { diff --git a/src/meshreader/ParallelGMSHReader.h b/src/meshreader/ParallelGMSHReader.h index 4d9426b..66b2902 100644 --- a/src/meshreader/ParallelGMSHReader.h +++ b/src/meshreader/ParallelGMSHReader.h @@ -43,10 +43,10 @@ template class ParallelGMSHReader { MPI_Bcast(&nElements_, 1, tndm::mpi_type_t(), 0, comm_); } - [[nodiscard]] unsigned int nVertices() const { return nVertices_; } - [[nodiscard]] unsigned int nElements() const { return nElements_; } - void readElements(int* elements) const { - static_assert(sizeof(GMSHBuilder::element_t) == (Dim + 1) * sizeof(int)); + [[nodiscard]] std::size_t nVertices() const { return nVertices_; } + [[nodiscard]] std::size_t nElements() const { return nElements_; } + void readElements(std::size_t* elements) const { + static_assert(sizeof(GMSHBuilder::element_t) == (Dim + 1) * sizeof(std::size_t)); scatter(builder_.elements.data()->data(), elements, nElements(), Dim + 1); } void readVertices(double* vertices) const { @@ -88,18 +88,18 @@ template class ParallelGMSHReader { std::sort(v2f.begin(), v2f.end()); } - for (unsigned elNo = 0; elNo < nElements; ++elNo) { - for (unsigned localFctNo = 0; localFctNo < nFacetsPerTet; ++localFctNo) { - unsigned nodes[nVertsPerTri]; - for (unsigned localNo = 0; localNo < nVertsPerTri; ++localNo) { + for (std::size_t elNo = 0; elNo < nElements; ++elNo) { + for (int localFctNo = 0; localFctNo < nFacetsPerTet; ++localFctNo) { + std::array nodes; + for (int localNo = 0; localNo < nVertsPerTri; ++localNo) { const auto localNoElement = Facet2Nodes[localFctNo][localNo]; nodes[localNo] = builder_.elements[elNo][localNoElement]; } - std::vector intersect[nVertsPerTri - 1]; + std::array, nVertsPerTri - 1> intersect; std::set_intersection(vertex2facets[nodes[0]].begin(), vertex2facets[nodes[0]].end(), vertex2facets[nodes[1]].begin(), vertex2facets[nodes[1]].end(), std::back_inserter(intersect[0])); - for (unsigned node = 2; node < nVertsPerTri; ++node) { + for (int node = 2; node < nVertsPerTri; ++node) { std::set_intersection(intersect[node - 2].begin(), intersect[node - 2].end(), vertex2facets[nodes[node]].begin(), vertex2facets[nodes[node]].end(), @@ -158,8 +158,8 @@ template class ParallelGMSHReader { MPI_Comm comm_; GMSHBuilder builder_; std::vector bcs_; - unsigned int nVertices_ = 0; - unsigned int nElements_ = 0; + std::size_t nVertices_ = 0; + std::size_t nElements_ = 0; }; } // namespace puml diff --git a/src/meshreader/ParallelGambitReader.h b/src/meshreader/ParallelGambitReader.h index 08bd5ed..2bfc970 100644 --- a/src/meshreader/ParallelGambitReader.h +++ b/src/meshreader/ParallelGambitReader.h @@ -18,6 +18,8 @@ #include "GambitReader.h" #include "ParallelMeshReader.h" +#include "third_party/MPITraits.h" + namespace puml { class ParallelGambitReader : public ParallelMeshReader { @@ -35,35 +37,33 @@ class ParallelGambitReader : public ParallelMeshReader { * This is a collective operation. */ void readGroups(int* groups) { - unsigned int chunkSize = (nElements() + m_nProcs - 1) / m_nProcs; + std::size_t chunkSize = (nElements() + m_nProcs - 1) / m_nProcs; if (m_rank == 0) { - unsigned int maxChunkSize = chunkSize; - ElementGroup* map = new ElementGroup[maxChunkSize]; + std::size_t maxChunkSize = chunkSize; + std::vector map(maxChunkSize); - std::vector* aggregator = new std::vector[m_nProcs - 1]; - unsigned int* sizes = new unsigned int[m_nProcs - 1]; - MPI_Request* requests = new MPI_Request[(m_nProcs - 1) * 2]; - for (int i = 0; i < m_nProcs - 1; i++) { - requests[i * 2] = MPI_REQUEST_NULL; - requests[i * 2 + 1] = MPI_REQUEST_NULL; - } + std::vector> aggregator(m_nProcs - 1); + std::vector sizes(m_nProcs - 1); + std::vector requests((m_nProcs - 1) * 2, MPI_REQUEST_NULL); for (int i = 0; i < m_nProcs; i++) { logInfo() << "Reading group information part" << (i + 1) << "of" << m_nProcs; - if (i == m_nProcs - 1) + if (i == m_nProcs - 1) { chunkSize = nElements() - (m_nProcs - 1) * chunkSize; + } - m_serialReader.readGroups(i * maxChunkSize, chunkSize, map); + m_serialReader.readGroups(i * maxChunkSize, chunkSize, map.data()); // Wait for all sending from last iteration - MPI_Waitall((m_nProcs - 1) * 2, requests, MPI_STATUSES_IGNORE); - for (int j = 0; j < m_nProcs - 1; j++) + MPI_Waitall((m_nProcs - 1) * 2, requests.data(), MPI_STATUSES_IGNORE); + for (int j = 0; j < m_nProcs - 1; j++) { aggregator[j].clear(); + } // Sort group numbers into the corresponding aggregator - for (unsigned int j = 0; j < chunkSize; j++) { + for (std::size_t j = 0; j < chunkSize; j++) { if (map[j].element < maxChunkSize) // Local element groups[map[j].element] = map[j].group; @@ -78,43 +78,40 @@ class ParallelGambitReader : public ParallelMeshReader { // Send send aggregated mapping for (int j = 0; j < m_nProcs - 1; j++) { - if (aggregator[j].empty()) + if (aggregator[j].empty()) { continue; + } sizes[j] = aggregator[j].size() / 2; // element id + group number - MPI_Isend(&sizes[j], 1, MPI_UNSIGNED, j + 1, 0, m_comm, &requests[j * 2]); + MPI_Isend(&sizes[j], 1, tndm::mpi_type_t(), j + 1, 0, m_comm, + &requests[j * 2]); MPI_Isend(&aggregator[j][0], aggregator[j].size(), MPI_INT, j + 1, 0, m_comm, &requests[j * 2 + 1]); } } - MPI_Waitall((m_nProcs - 1) * 2, requests, MPI_STATUSES_IGNORE); - - delete[] map; - delete[] aggregator; - delete[] sizes; - delete[] requests; + MPI_Waitall((m_nProcs - 1) * 2, requests.data(), MPI_STATUSES_IGNORE); } else { - if (m_rank == m_nProcs - 1) + if (m_rank == m_nProcs - 1) { chunkSize = nElements() - (m_nProcs - 1) * chunkSize; + } // Allocate enough memory - unsigned int* buf = new unsigned int[chunkSize * 2]; + std::vector buf(chunkSize * 2); unsigned int recieved = 0; while (recieved < chunkSize) { - unsigned int size; - MPI_Recv(&size, 1, MPI_UNSIGNED, 0, 0, m_comm, MPI_STATUS_IGNORE); - MPI_Recv(buf, size * 2, MPI_INT, 0, 0, m_comm, MPI_STATUS_IGNORE); + std::size_t size; + MPI_Recv(&size, 1, tndm::mpi_type_t(), 0, 0, m_comm, MPI_STATUS_IGNORE); + MPI_Recv(buf.data(), size * 2, MPI_INT, 0, 0, m_comm, MPI_STATUS_IGNORE); - for (unsigned int i = 0; i < size * 2; i += 2) + for (std::size_t i = 0; i < size * 2; i += 2) { groups[buf[i]] = buf[i + 1]; + } recieved += size; } - - delete[] buf; } } @@ -129,36 +126,33 @@ class ParallelGambitReader : public ParallelMeshReader { * @todo Only tetrahedral meshes are supported */ void readBoundaries(int* boundaries) { - unsigned int chunkSize = (nElements() + m_nProcs - 1) / m_nProcs; + std::size_t chunkSize = (nElements() + m_nProcs - 1) / m_nProcs; if (m_rank == 0) { - unsigned int maxChunkSize = chunkSize; - GambitBoundaryFace* faces = new GambitBoundaryFace[maxChunkSize]; + std::size_t maxChunkSize = chunkSize; + std::vector faces(maxChunkSize); - std::vector* aggregator = new std::vector[m_nProcs - 1]; - unsigned int* sizes = new unsigned int[m_nProcs - 1]; - MPI_Request* requests = new MPI_Request[(m_nProcs - 1) * 2]; - for (int i = 0; i < m_nProcs - 1; i++) { - requests[i * 2] = MPI_REQUEST_NULL; - requests[i * 2 + 1] = MPI_REQUEST_NULL; - } + std::vector> aggregator(m_nProcs - 1); + std::vector sizes(m_nProcs - 1); + std::vector requests((m_nProcs - 1) * 2, MPI_REQUEST_NULL); - unsigned int nChunks = (nBoundaries() + chunkSize - 1) / chunkSize; - for (unsigned int i = 0; i < nChunks; i++) { + std::size_t nChunks = (nBoundaries() + chunkSize - 1) / chunkSize; + for (std::size_t i = 0; i < nChunks; i++) { logInfo() << "Reading boundary conditions part" << (i + 1) << "of" << nChunks; - if (i == nChunks - 1) + if (i == nChunks - 1) { chunkSize = nBoundaries() - (nChunks - 1) * chunkSize; + } - m_serialReader.readBoundaries(i * maxChunkSize, chunkSize, faces); + m_serialReader.readBoundaries(i * maxChunkSize, chunkSize, faces.data()); // Wait for all sending from last iteration - MPI_Waitall((m_nProcs - 1) * 2, requests, MPI_STATUSES_IGNORE); + MPI_Waitall((m_nProcs - 1) * 2, requests.data(), MPI_STATUSES_IGNORE); for (int j = 0; j < m_nProcs - 1; j++) aggregator[j].clear(); // Sort boundary conditions into the corresponding aggregator - for (unsigned int j = 0; j < chunkSize; j++) { + for (std::size_t j = 0; j < chunkSize; j++) { if (faces[j].element < maxChunkSize) // Local element boundaries[faces[j].element * 4 + faces[j].face] = faces[j].type; @@ -173,50 +167,48 @@ class ParallelGambitReader : public ParallelMeshReader { // Send send aggregated values for (int j = 0; j < m_nProcs - 1; j++) { - if (aggregator[j].empty()) + if (aggregator[j].empty()) { continue; + } sizes[j] = aggregator[j].size() / 2; // element id + face type - MPI_Isend(&sizes[j], 1, MPI_UNSIGNED, j + 1, 0, m_comm, &requests[j * 2]); + MPI_Isend(&sizes[j], 1, tndm::mpi_type_t(), j + 1, 0, m_comm, + &requests[j * 2]); MPI_Isend(&aggregator[j][0], aggregator[j].size(), MPI_INT, j + 1, 0, m_comm, &requests[j * 2 + 1]); } } - MPI_Waitall((m_nProcs - 1) * 2, requests, MPI_STATUSES_IGNORE); - - delete[] faces; - delete[] aggregator; + MPI_Waitall((m_nProcs - 1) * 2, requests.data(), MPI_STATUSES_IGNORE); // Send finish signal to all other processes - memset(sizes, 0, (m_nProcs - 1) * sizeof(unsigned int)); - for (int i = 0; i < m_nProcs - 1; i++) - MPI_Isend(&sizes[i], 1, MPI_UNSIGNED, i + 1, 0, m_comm, &requests[i]); - MPI_Waitall(m_nProcs - 1, requests, MPI_STATUSES_IGNORE); - - delete[] sizes; - delete[] requests; + std::fill(sizes.begin(), sizes.end(), 0); + for (int i = 0; i < m_nProcs - 1; i++) { + MPI_Isend(&sizes[i], 1, tndm::mpi_type_t(), i + 1, 0, m_comm, &requests[i]); + } + MPI_Waitall(m_nProcs - 1, requests.data(), MPI_STATUSES_IGNORE); } else { - if (m_rank == m_nProcs - 1) + if (m_rank == m_nProcs - 1) { chunkSize = nElements() - (m_nProcs - 1) * chunkSize; + } // Allocate enough memory - int* buf = new int[chunkSize * 2]; + std::vector buf(chunkSize * 2); while (true) { - unsigned int size; - MPI_Recv(&size, 1, MPI_UNSIGNED, 0, 0, m_comm, MPI_STATUS_IGNORE); - if (size == 0) + std::size_t size; + MPI_Recv(&size, 1, tndm::mpi_type_t(), 0, 0, m_comm, MPI_STATUS_IGNORE); + if (size == 0) { // Finished break; + } - MPI_Recv(buf, size * 2, MPI_INT, 0, 0, m_comm, MPI_STATUS_IGNORE); + MPI_Recv(buf.data(), size * 2, MPI_INT, 0, 0, m_comm, MPI_STATUS_IGNORE); - for (unsigned int i = 0; i < size * 2; i += 2) + for (std::size_t i = 0; i < size * 2; i += 2) { boundaries[buf[i]] = buf[i + 1]; + } } - - delete[] buf; } } }; diff --git a/src/meshreader/ParallelMeshReader.h b/src/meshreader/ParallelMeshReader.h index b851c9e..e76429f 100644 --- a/src/meshreader/ParallelMeshReader.h +++ b/src/meshreader/ParallelMeshReader.h @@ -15,14 +15,17 @@ #include +#include #include +#include "third_party/MPITraits.h" + template class ParallelMeshReader { private: // Some variables that are required by all processes - unsigned int m_nVertices; - unsigned int m_nElements; - unsigned int m_nBoundaries; + std::size_t m_nVertices; + std::size_t m_nElements; + std::size_t m_nBoundaries; protected: const MPI_Comm m_comm; @@ -50,7 +53,7 @@ template class ParallelMeshReader { virtual ~ParallelMeshReader() {} void open(const char* meshFile) { - unsigned int vars[3]; + std::array vars; if (m_rank == 0) { m_serialReader.open(meshFile); @@ -60,21 +63,21 @@ template class ParallelMeshReader { vars[2] = m_serialReader.nBoundaries(); } - MPI_Bcast(vars, 3, MPI_UNSIGNED, 0, m_comm); + MPI_Bcast(vars.data(), 3, tndm::mpi_type_t(), 0, m_comm); m_nVertices = vars[0]; m_nElements = vars[1]; m_nBoundaries = vars[2]; } - unsigned int nVertices() const { return m_nVertices; } + std::size_t nVertices() const { return m_nVertices; } - unsigned int nElements() const { return m_nElements; } + std::size_t nElements() const { return m_nElements; } /** * @return Number of boundary faces */ - unsigned int nBoundaries() const { return m_nBoundaries; } + std::size_t nBoundaries() const { return m_nBoundaries; } /** * Reads all vertices @@ -86,11 +89,12 @@ template class ParallelMeshReader { * @todo Only 3 dimensional meshes are supported */ void readVertices(double* vertices) { - unsigned int chunkSize = (m_nVertices + m_nProcs - 1) / m_nProcs; + std::size_t chunkSize = (m_nVertices + m_nProcs - 1) / m_nProcs; if (m_rank == 0) { // Allocate second buffer so we can read and send in parallel - double* vertices2 = new double[chunkSize * 3]; + std::vector tempVertices(chunkSize * 3); + double* vertices2 = tempVertices.data(); if (m_nProcs % 2 == 0) // swap once so we have the correct buffer at the end swap(vertices, vertices2); @@ -106,7 +110,7 @@ template class ParallelMeshReader { if (m_nProcs > 1) { // Read last one - unsigned int lastChunkSize = m_nVertices - (m_nProcs - 1) * chunkSize; + const std::size_t lastChunkSize = m_nVertices - (m_nProcs - 1) * chunkSize; logInfo() << "Reading vertices part" << (m_nProcs - 1) << "of" << m_nProcs; m_serialReader.readVertices((m_nProcs - 1) * chunkSize, lastChunkSize, vertices); MPI_Wait(&request, MPI_STATUS_IGNORE); @@ -118,8 +122,6 @@ template class ParallelMeshReader { logInfo() << "Reading vertices part" << m_nProcs << "of" << m_nProcs; m_serialReader.readVertices(0, chunkSize, vertices); MPI_Wait(&request, MPI_STATUS_IGNORE); - - delete[] vertices2; } else { if (m_rank == m_nProcs - 1) chunkSize = m_nVertices - (m_nProcs - 1) * chunkSize; @@ -137,12 +139,13 @@ template class ParallelMeshReader { * * @todo Only tetrahedral meshes are supported */ - virtual void readElements(int* elements) { - unsigned int chunkSize = (m_nElements + m_nProcs - 1) / m_nProcs; + virtual void readElements(std::size_t* elements) { + std::size_t chunkSize = (m_nElements + m_nProcs - 1) / m_nProcs; if (m_rank == 0) { // Allocate second buffer so we can read and send in parallel - int* elements2 = new int[chunkSize * 4]; + std::vector tempElements(chunkSize * 4); + std::size_t* elements2 = tempElements.data(); if (m_nProcs % 2 == 0) // swap once so we have the correct buffer at the end swap(elements, elements2); @@ -153,17 +156,18 @@ template class ParallelMeshReader { logInfo() << "Reading elements part" << i << "of" << m_nProcs; m_serialReader.readElements(i * chunkSize, chunkSize, elements); MPI_Wait(&request, MPI_STATUS_IGNORE); - MPI_Isend(elements, chunkSize * 4, MPI_INT, i, 0, m_comm, &request); + MPI_Isend(elements, chunkSize * 4, tndm::mpi_type_t(), i, 0, m_comm, &request); swap(elements, elements2); } if (m_nProcs > 1) { // Read last one - unsigned int lastChunkSize = m_nElements - (m_nProcs - 1) * chunkSize; + const std::size_t lastChunkSize = m_nElements - (m_nProcs - 1) * chunkSize; logInfo() << "Reading elements part" << (m_nProcs - 1) << "of" << m_nProcs; m_serialReader.readElements((m_nProcs - 1) * chunkSize, lastChunkSize, elements); MPI_Wait(&request, MPI_STATUS_IGNORE); - MPI_Isend(elements, lastChunkSize * 4, MPI_INT, m_nProcs - 1, 0, m_comm, &request); + MPI_Isend(elements, lastChunkSize * 4, tndm::mpi_type_t(), m_nProcs - 1, 0, + m_comm, &request); swap(elements, elements2); } @@ -171,13 +175,12 @@ template class ParallelMeshReader { logInfo() << "Reading elements part" << m_nProcs << "of" << m_nProcs; m_serialReader.readElements(0, chunkSize, elements); MPI_Wait(&request, MPI_STATUS_IGNORE); - - delete[] elements2; } else { if (m_rank == m_nProcs - 1) chunkSize = m_nElements - (m_nProcs - 1) * chunkSize; - MPI_Recv(elements, chunkSize * 4, MPI_INT, 0, 0, m_comm, MPI_STATUS_IGNORE); + MPI_Recv(elements, chunkSize * 4, tndm::mpi_type_t(), 0, 0, m_comm, + MPI_STATUS_IGNORE); } } diff --git a/src/pumgen.cpp b/src/pumgen.cpp index 57a046a..b16fec5 100644 --- a/src/pumgen.cpp +++ b/src/pumgen.cpp @@ -10,13 +10,17 @@ * @author Sebastian Rettenberger */ +#include +#include #include #include #include #include #include +#include #include +#include #include #include @@ -25,7 +29,9 @@ #include // #include #include +#include +#include "third_party/MPITraits.h" #include "utils/args.h" #include "utils/logger.h" @@ -43,13 +49,141 @@ #include "meshreader/ParallelGambitReader.h" #include "third_party/GMSH2Parser.h" -template static void _checkH5Err(TT status, const char* file, int line) { - if (status < 0) +template static TT _checkH5Err(TT&& status, const char* file, int line) { + if (status < 0) { logError() << utils::nospace << "An HDF5 error occurred (" << file << ": " << line << ")"; + } + return std::forward(status); } #define checkH5Err(...) _checkH5Err(__VA_ARGS__, __FILE__, __LINE__) +static int ilog(std::size_t value, int expbase = 1) { + int count = 0; + while (value > 0) { + value >>= expbase; + ++count; + } + return count; +} + +template static void iterate(apf::Mesh* mesh, int dim, F&& function) { + apf::MeshIterator* it = mesh->begin(dim); + while (apf::MeshEntity* element = mesh->iterate(it)) { + std::invoke(function, element); + } + mesh->end(it); +} + +constexpr std::size_t NoSecondDim = 0; + +template +static void writeH5Data(F&& handler, hid_t h5file, const std::string& name, apf::Mesh* mesh, + int meshdim, hid_t h5memtype, hid_t h5outtype, std::size_t chunk, + std::size_t localSize, std::size_t globalSize, bool reduceInts, + int filterEnable, std::size_t filterChunksize) { + constexpr std::size_t SecondSize = std::max(SecondDim, static_cast(1)); + constexpr std::size_t Dimensions = SecondDim == 0 ? 1 : 2; + const std::size_t chunkSize = chunk / SecondSize / sizeof(T); + const std::size_t bufferSize = std::min(localSize, chunkSize); + std::vector data(SecondSize * bufferSize); + + std::size_t rounds = (localSize + chunk - 1) / chunk; + + MPI_Allreduce(MPI_IN_PLACE, &rounds, 1, tndm::mpi_type_t(), MPI_MAX, + MPI_COMM_WORLD); + + hsize_t globalSizes[2] = {globalSize, SecondDim}; + hid_t h5space = H5Screate_simple(Dimensions, globalSizes, nullptr); + + hsize_t sizes[2] = {bufferSize, SecondDim}; + hid_t h5memspace = H5Screate_simple(Dimensions, sizes, nullptr); + + std::size_t offset = localSize; + + MPI_Scan(MPI_IN_PLACE, &offset, 1, tndm::mpi_type_t(), MPI_SUM, MPI_COMM_WORLD); + + offset -= localSize; + + hsize_t start[2] = {offset, 0}; + hsize_t count[2] = {bufferSize, SecondDim}; + + hid_t h5dxlist = H5Pcreate(H5P_DATASET_XFER); + checkH5Err(h5dxlist); + checkH5Err(H5Pset_dxpl_mpio(h5dxlist, H5FD_MPIO_COLLECTIVE)); + + hid_t h5type = h5outtype; + if (reduceInts && std::is_integral_v) { + h5type = H5Tcopy(h5outtype); + std::size_t bits = ilog(globalSize); + std::size_t unsignedSize = (bits + 7) / 8; + checkH5Err(h5type); + checkH5Err(H5Tset_size(h5type, unsignedSize)); + checkH5Err(H5Tcommit(h5file, (std::string("/") + name + std::string("Type")).c_str(), h5type, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT)); + } + + hid_t h5filter = H5P_DEFAULT; + if (filterEnable > 0) { + h5filter = checkH5Err(H5Pcreate(H5P_DATASET_CREATE)); + hsize_t chunk[2] = {std::min(filterChunksize, bufferSize), SecondDim}; + checkH5Err(H5Pset_chunk(h5filter, 2, chunk)); + if (filterEnable == 1 && std::is_integral_v) { + checkH5Err(H5Pset_scaleoffset(h5filter, H5Z_SO_INT, H5Z_SO_INT_MINBITS_DEFAULT)); + } else if (filterEnable < 11) { + int deflateStrength = filterEnable - 1; + checkH5Err(H5Pset_deflate(h5filter, deflateStrength)); + } + } + + hid_t h5data = H5Dcreate(h5file, (std::string("/") + name).c_str(), h5type, h5space, H5P_DEFAULT, + h5filter, H5P_DEFAULT); + + std::size_t written = 0; + std::size_t index = 0; + + apf::MeshIterator* it = mesh->begin(meshdim); + + hsize_t nullstart[2] = {0, 0}; + + for (std::size_t i = 0; i < rounds; ++i) { + index = 0; + start[0] = offset + written; + count[0] = std::min(localSize - written, bufferSize); + + while (apf::MeshEntity* element = mesh->iterate(it)) { + if (handler(element, data.begin() + index * SecondSize)) { + ++index; + } + + if (index >= count[0]) { + break; + } + } + + checkH5Err(H5Sselect_hyperslab(h5memspace, H5S_SELECT_SET, nullstart, nullptr, count, nullptr)); + + checkH5Err(H5Sselect_hyperslab(h5space, H5S_SELECT_SET, start, nullptr, count, nullptr)); + + checkH5Err(H5Dwrite(h5data, h5memtype, h5memspace, h5space, h5dxlist, data.data())); + + written += count[0]; + } + + mesh->end(it); + + if (filterEnable > 0) { + checkH5Err(H5Pclose(h5filter)); + } + if (reduceInts) { + checkH5Err(H5Tclose(h5type)); + } + checkH5Err(H5Sclose(h5space)); + checkH5Err(H5Sclose(h5memspace)); + checkH5Err(H5Dclose(h5data)); + checkH5Err(H5Pclose(h5dxlist)); +} + int main(int argc, char* argv[]) { int rank = 0; int processes = 1; @@ -68,6 +202,23 @@ int main(int argc, char* argv[]) { args.addOption("dump", 'd', "Dump APF mesh before partitioning it", utils::Args::Required, false); args.addOption("model", 0, "Dump/Load a specific model file", utils::Args::Required, false); args.addOption("vtk", 0, "Dump mesh to VTK files", utils::Args::Required, false); + + const char* filters[] = {"none", "scaleoffset", "deflate1", "deflate2", + "deflate3", "deflate4", "deflate5", "deflate6", + "deflate7", "deflate8", "deflate9"}; + args.addOption("compactify-datatypes", 0, + "Compress index and group data types to minimum byte size (no HDF5 filters)", + utils::Args::Required, false); + args.addEnumOption("filter-enable", filters, 0, + "Apply HDF5 filters (i.e. compression). Disabled by default.", false); + args.addOption("filter-chunksize", 0, "Chunksize for filters (default=4096).", + utils::Args::Required, false); + args.addOption("chunksize", 0, "Chunksize for writing (default=1 GiB).", utils::Args::Required, + false); + const char* boundarytypes[] = {"int32", "int64"}; + args.addEnumOption("boundarytype", boundarytypes, 0, + "Type for writing out boundary data (default: int32).", false); + args.addOption("license", 'l', "License file (only used by SimModSuite)", utils::Args::Required, false); #ifdef PARASOLID @@ -103,18 +254,56 @@ int main(int argc, char* argv[]) { // Compute default output filename outputFile = inputFile; size_t dotPos = outputFile.find_last_of('.'); - if (dotPos != std::string::npos) + if (dotPos != std::string::npos) { outputFile.erase(dotPos); + } outputFile.append(".puml.h5"); } + hsize_t chunksize = args.getArgument("filter-chunksize", static_cast(1) << 30); + + bool reduceInts = args.isSet("compactify-datatypes"); + int filterEnable = args.getArgument("filter-enable", 0); + hsize_t filterChunksize = args.getArgument("filter-chunksize", 4096); + bool applyFilters = filterEnable > 0; + if (reduceInts) { + logInfo(rank) << "Using compact integer types."; + } + if (filterEnable == 0) { + logInfo(rank) << "No filtering enabled (contiguous storage)"; + } else { + logInfo(rank) << "Using filtering. Chunksize:" << filterChunksize; + if (filterEnable == 1) { + logInfo(rank) << "Compression: scale-offset compression for integers (disabled for floats)"; + } else if (filterEnable < 11) { + logInfo(rank) << "Compression: deflate, strength" << filterEnable - 1 + << "(note: this requires HDF5 to be compiled with GZIP support; this applies " + "to SeisSol as well)"; + } + } + + int boundaryType = args.getArgument("boundarytype", 0); + hid_t boundaryDatatype; + int faceOffset; + if (boundaryType == 0) { + boundaryDatatype = H5T_STD_I32LE; + faceOffset = 8; + logInfo(rank) << "Using 32-bit integer boundary type conditions."; + } else if (boundaryType == 1) { + boundaryDatatype = H5T_STD_I64LE; + faceOffset = 16; + logInfo(rank) << "Using 64-bit integer boundary type conditions."; + } + std::string xdmfFile = outputFile; - if (utils::StringUtils::endsWith(outputFile, ".puml.h5")) + if (utils::StringUtils::endsWith(outputFile, ".puml.h5")) { utils::StringUtils::replaceLast(xdmfFile, ".puml.h5", ".xdmf"); - if (utils::StringUtils::endsWith(outputFile, ".h5")) + } + if (utils::StringUtils::endsWith(outputFile, ".h5")) { utils::StringUtils::replaceLast(xdmfFile, ".h5", ".xdmf"); - else + } else { xdmfFile.append(".xdmf"); + } // Create/read the mesh MeshInput* meshInput = 0L; @@ -168,6 +357,8 @@ int main(int argc, char* argv[]) { mesh = meshInput->getMesh(); + logInfo(rank) << "Parsed mesh successfully, writing output..."; + // Check mesh if (alignMdsMatches(mesh)) logWarning() << "Fixed misaligned matches"; @@ -191,11 +382,30 @@ int main(int argc, char* argv[]) { apf::writeVtkFiles(vtkPrefix, mesh); } + apf::Sharing* sharing = apf::getSharing(mesh); + + // oriented at the apf::countOwned method ... But with size_t + auto countOwnedLong = [sharing, mesh](int dim) { + std::size_t counter = 0; + + apf::MeshIterator* it = mesh->begin(dim); + while (apf::MeshEntity* element = mesh->iterate(it)) { + if (sharing->isOwned(element)) { + ++counter; + } + } + mesh->end(it); + + return counter; + }; + + // TODO(David): replace by apf::countOwned again once extended + // Get local/global size - unsigned int localSize[2] = {static_cast(apf::countOwned(mesh, 3)), - static_cast(apf::countOwned(mesh, 0))}; - unsigned long globalSize[2] = {localSize[0], localSize[1]}; - MPI_Allreduce(MPI_IN_PLACE, globalSize, 2, MPI_UNSIGNED_LONG, MPI_SUM, MPI_COMM_WORLD); + std::size_t localSize[2] = {countOwnedLong(3), countOwnedLong(0)}; + std::size_t globalSize[2] = {localSize[0], localSize[1]}; + MPI_Allreduce(MPI_IN_PLACE, globalSize, 2, tndm::mpi_type_t(), MPI_SUM, + MPI_COMM_WORLD); logInfo(rank) << "Mesh size:" << globalSize[0]; @@ -210,8 +420,8 @@ int main(int argc, char* argv[]) { logInfo(rank) << "Minimum insphere found:" << min; // Get offsets - unsigned long offsets[2] = {localSize[0], localSize[1]}; - MPI_Scan(MPI_IN_PLACE, offsets, 2, MPI_UNSIGNED_LONG, MPI_SUM, MPI_COMM_WORLD); + std::size_t offsets[2] = {localSize[0], localSize[1]}; + MPI_Scan(MPI_IN_PLACE, offsets, 2, tndm::mpi_type_t(), MPI_SUM, MPI_COMM_WORLD); offsets[0] -= localSize[0]; offsets[1] -= localSize[1]; @@ -233,140 +443,64 @@ int main(int argc, char* argv[]) { checkH5Err(H5Pclose(h5falist)); // Write cells + std::size_t connectBytesPerData = 8; logInfo(rank) << "Writing cells"; - - hsize_t sizes[2] = {globalSize[0], 4}; - hid_t h5space = H5Screate_simple(2, sizes, 0L); - checkH5Err(h5space); - - hid_t h5connect = - H5Dcreate(h5file, "/connect", H5T_STD_U64LE, h5space, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - checkH5Err(h5connect); - - hsize_t start[2] = {offsets[0], 0}; - hsize_t count[2] = {localSize[0], 4}; - checkH5Err(H5Sselect_hyperslab(h5space, H5S_SELECT_SET, start, 0L, count, 0L)); - - sizes[0] = localSize[0]; - hid_t h5memspace = H5Screate_simple(2, sizes, 0L); - checkH5Err(h5memspace); - - hid_t h5dxlist = H5Pcreate(H5P_DATASET_XFER); - checkH5Err(h5dxlist); - checkH5Err(H5Pset_dxpl_mpio(h5dxlist, H5FD_MPIO_COLLECTIVE)); - - unsigned long* connect = new unsigned long[localSize[0] * 4]; - it = mesh->begin(3); - unsigned int index = 0; - while (apf::MeshEntity* element = mesh->iterate(it)) { - apf::NewArray vn; - apf::getElementNumbers(vertexNum, element, vn); - - for (unsigned int i = 0; i < 4; i++) - connect[index * 4 + i] = vn[i]; - - index++; - } - mesh->end(it); - - checkH5Err(H5Dwrite(h5connect, H5T_NATIVE_ULONG, h5memspace, h5space, h5dxlist, connect)); - - checkH5Err(H5Sclose(h5space)); - checkH5Err(H5Sclose(h5memspace)); - checkH5Err(H5Dclose(h5connect)); - - delete[] connect; + writeH5Data( + [&](auto& element, auto&& data) { + apf::NewArray vn; + apf::getElementNumbers(vertexNum, element, vn); + + for (int i = 0; i < 4; i++) { + *(data + i) = vn[i]; + } + return true; + }, + h5file, "connect", mesh, 3, H5T_NATIVE_ULONG, H5T_STD_U64LE, chunksize, localSize[0], + globalSize[0], reduceInts, filterEnable, filterChunksize); // Vertices logInfo(rank) << "Writing vertices"; + writeH5Data( + [&](auto& element, auto&& data) { + if (!sharing->isOwned(element)) { + return false; + } - sizes[0] = globalSize[1]; - sizes[1] = 3; - h5space = H5Screate_simple(2, sizes, 0L); - checkH5Err(h5space); - - hid_t h5geometry = H5Dcreate(h5file, "/geometry", H5T_IEEE_F64LE, h5space, H5P_DEFAULT, - H5P_DEFAULT, H5P_DEFAULT); - checkH5Err(h5geometry); - - start[0] = offsets[1]; - count[0] = localSize[1]; - count[1] = 3; - checkH5Err(H5Sselect_hyperslab(h5space, H5S_SELECT_SET, start, 0L, count, 0L)); + /*long gid = apf::getNumber(vertexNum, apf::Node(element, 0)); - sizes[0] = localSize[1]; - h5memspace = H5Screate_simple(2, sizes, 0L); - checkH5Err(h5memspace); + if (gid != static_cast(offsets[1] + index)) { + logError() << "Global vertex numbering is incorrect"; + }*/ - apf::Sharing* sharing = apf::getSharing(mesh); - - double* geometry = new double[localSize[1] * 3]; - it = mesh->begin(0); - index = 0; - while (apf::MeshEntity* element = mesh->iterate(it)) { - if (!sharing->isOwned(element)) - continue; + apf::Vector3 point; + mesh->getPoint(element, 0, point); + double geometry[3]; + point.toArray(geometry); - long gid = apf::getNumber(vertexNum, apf::Node(element, 0)); + *(data + 0) = geometry[0]; + *(data + 1) = geometry[1]; + *(data + 2) = geometry[2]; - if (gid != static_cast(offsets[1] + index)) - logError() << "Global vertex numbering is incorrect"; - - apf::Vector3 point; - mesh->getPoint(element, 0, point); - point.toArray(&geometry[index * 3]); - - index++; - } - mesh->end(it); - - checkH5Err(H5Dwrite(h5geometry, H5T_NATIVE_DOUBLE, h5memspace, h5space, h5dxlist, geometry)); - - checkH5Err(H5Sclose(h5space)); - checkH5Err(H5Sclose(h5memspace)); - checkH5Err(H5Dclose(h5geometry)); - - delete[] geometry; + return true; + }, + h5file, "geometry", mesh, 0, H5T_IEEE_F64LE, H5T_IEEE_F64LE, chunksize, localSize[1], + globalSize[1], reduceInts, filterEnable, filterChunksize); // Group information apf::MeshTag* groupTag = mesh->findTag("group"); + + std::size_t groupBytesPerData = 4; if (groupTag) { logInfo(rank) << "Writing group information"; - - sizes[0] = globalSize[0]; - h5space = H5Screate_simple(1, sizes, 0L); - checkH5Err(h5space); - - hid_t h5group = - H5Dcreate(h5file, "/group", H5T_STD_I32LE, h5space, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - checkH5Err(h5group); - - start[0] = offsets[0]; - count[0] = localSize[0]; - checkH5Err(H5Sselect_hyperslab(h5space, H5S_SELECT_SET, start, 0L, count, 0L)); - - sizes[0] = localSize[0]; - h5memspace = H5Screate_simple(1, sizes, 0L); - checkH5Err(h5memspace); - - int* group = new int[localSize[0]]; - it = mesh->begin(3); - index = 0; - while (apf::MeshEntity* element = mesh->iterate(it)) { - assert(mesh->hasTag(element, groupTag)); - - mesh->getIntTag(element, groupTag, &group[index]); - index++; - } - mesh->end(it); - - checkH5Err(H5Dwrite(h5group, H5T_NATIVE_INT, h5memspace, h5space, h5dxlist, group)); - - checkH5Err(H5Sclose(h5space)); - checkH5Err(H5Sclose(h5memspace)); - checkH5Err(H5Dclose(h5group)); - - delete[] group; + writeH5Data( + [&](auto& element, auto&& data) { + int group; + mesh->getIntTag(element, groupTag, &group); + *data = group; + return true; + }, + h5file, "group", mesh, 3, H5T_NATIVE_INT, H5T_STD_I32LE, chunksize, localSize[0], + globalSize[0], reduceInts, filterEnable, filterChunksize); } else { logInfo() << "No group information found in mesh"; } @@ -375,59 +509,35 @@ int main(int argc, char* argv[]) { logInfo(rank) << "Writing boundary condition"; apf::MeshTag* boundaryTag = mesh->findTag("boundary condition"); assert(boundaryTag); - - int* boundary = new int[localSize[0]]; - memset(boundary, 0, localSize[0] * sizeof(int)); - - sizes[0] = globalSize[0]; - h5space = H5Screate_simple(1, sizes, 0L); - checkH5Err(h5space); - - hid_t h5boundary = - H5Dcreate(h5file, "/boundary", H5T_STD_I32LE, h5space, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - checkH5Err(h5boundary); - - start[0] = offsets[0]; - count[0] = localSize[0]; - checkH5Err(H5Sselect_hyperslab(h5space, H5S_SELECT_SET, start, 0L, count, 0L)); - - sizes[0] = localSize[0]; - h5memspace = H5Screate_simple(1, sizes, 0L); - checkH5Err(h5memspace); - - it = mesh->begin(3); - index = 0; - while (apf::MeshEntity* element = mesh->iterate(it)) { - apf::Downward faces; - mesh->getDownward(element, 2, faces); - - for (unsigned int i = 0; i < 4; i++) { - if (mesh->hasTag(faces[i], boundaryTag)) { - int b; - mesh->getIntTag(faces[i], boundaryTag, &b); - - if (b <= 0 || b > std::numeric_limits::max()) - logError() << "Cannot handle boundary condition" << b; - - boundary[index] += b << (i * 8); - } - } - - index++; - } - mesh->end(it); - - checkH5Err(H5Dwrite(h5boundary, H5T_NATIVE_INT, h5memspace, h5space, h5dxlist, boundary)); - - checkH5Err(H5Sclose(h5space)); - checkH5Err(H5Sclose(h5memspace)); - checkH5Err(H5Dclose(h5boundary)); - - delete[] boundary; - - checkH5Err(H5Pclose(h5dxlist)); - - delete meshInput; + auto i32limit = std::numeric_limits::max(); + auto i64limit = std::numeric_limits::max(); + writeH5Data( + [&](auto& element, auto&& data) { + long boundary = 0; + apf::Downward faces; + mesh->getDownward(element, 2, faces); + + for (int i = 0; i < 4; i++) { + if (mesh->hasTag(faces[i], boundaryTag)) { + int b; + mesh->getIntTag(faces[i], boundaryTag, &b); + + if (b <= 0 || (b > i32limit && boundaryType == 0) || + (b > i64limit && boundaryType == 1)) { + logError() << "Cannot handle boundary condition" << b; + } + + // NOTE: this fails for boundary values per face larger than 2**31 - 1 due to sign + // extension + boundary |= static_cast(b) << (i * faceOffset); + } + } + + *data = boundary; + return true; + }, + h5file, "boundary", mesh, 3, H5T_NATIVE_LONG, boundaryDatatype, chunksize, localSize[0], + globalSize[0], reduceInts, filterEnable, filterChunksize); // Writing XDMF file if (rank == 0) { @@ -451,7 +561,8 @@ int main(int argc, char* argv[]) { << std::endl // This should be UInt but for some reason this does not work with // binary data - << " " << basename << ":/connect" << std::endl << " " << std::endl @@ -462,14 +573,15 @@ int main(int argc, char* argv[]) { << ":/geometry" << std::endl << " " << std::endl << " " << std::endl - << " " << basename << ":/group" << std::endl << " " << std::endl << " " << std::endl - << " " << basename << ":/boundary" << std::endl + << " " << basename + << ":/boundary" << std::endl << " " << std::endl << " " << std::endl << " " << std::endl @@ -478,6 +590,10 @@ int main(int argc, char* argv[]) { checkH5Err(H5Fclose(h5file)); + delete sharing; + delete mesh; + delete meshInput; + logInfo(rank) << "Finished successfully"; PCU_Comm_Free();