Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions host-configs/LLNL/quartz-base.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,6 @@ set(ENABLE_OPENMP ON CACHE BOOL "")
set(ENABLE_PAMELA ON CACHE BOOL "")
set(ENABLE_PVTPackage ON CACHE BOOL "")
set(ENABLE_PETSC OFF CACHE BOOL "Enables PETSc." FORCE)
set(ENABLE_JITTI ON CACHE BOOL "Enables kernel JIT compilation." FORCE)

include(${CMAKE_CURRENT_LIST_DIR}/../tpls.cmake)
10 changes: 10 additions & 0 deletions src/cmake/GeosxOptions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,16 @@ endif(NOT BLT_CXX_STD STREQUAL c++14)

message("CMAKE_CXX_COMPILER_ID = ${CMAKE_CXX_COMPILER_ID}")

option( ENABLE_JITTI "Build all compute kernels just-in-time at runtime." OFF )

if ( ENABLE_JITTI )
message( "JITTI is ENABLED")
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the way we've been doing this is GEOSX_ENABLE_JITTI, or maybe it's GEOSX_USE_JITTI, I forget. But either way I like defined/not defined instead of 1/0.

set( JITTI_DEFINES "JITTI=1" )
else ( )
set( JITTI_DEFINES "JITTI=0" )
message( "JITTI is DISABLED")
endif ( )

blt_append_custom_compiler_flag( FLAGS_VAR CMAKE_CXX_FLAGS DEFAULT "${OpenMP_CXX_FLAGS}")
blt_append_custom_compiler_flag( FLAGS_VAR CMAKE_CXX_FLAGS
GNU "-Wall -Wextra -Wpedantic -pedantic-errors -Wshadow -Wfloat-equal -Wcast-align -Wcast-qual"
Expand Down
14 changes: 13 additions & 1 deletion src/coreComponents/finiteElement/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,15 @@ set( finiteElement_headers
elementFormulations/H1_Wedge_Lagrange1_Gauss6.hpp
elementFormulations/LagrangeBasis1.hpp
elementFormulations/LagrangeBasis2.hpp
${CMAKE_BINARY_DIR}/include/kernelJITCompileCommands.hpp
)
#
# Specify all sources
#
set( finiteElement_sources
FiniteElementDiscretization.cpp
FiniteElementDiscretizationManager.cpp
kernelInterface/kernelJIT.cpp
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This file is not included in the PR, and I can't see it being generated anywhere either

)

set( dependencyList dataRepository )
Expand All @@ -37,12 +39,22 @@ endif()
blt_add_library( NAME finiteElement
SOURCES ${finiteElement_sources}
HEADERS ${finiteElement_headers}
DEPENDS_ON ${dependencyList}
DEFINES JITTI_OUTPUT_DIR=${CMAKE_BINARY_DIR}/lib/jitti ${JITTI_DEFINES}
DEPENDS_ON ${dependencyList} jitti
OBJECT ${GEOSX_BUILD_OBJ_LIBS}
)

target_include_directories( finiteElement PUBLIC ${CMAKE_SOURCE_DIR}/coreComponents)

add_custom_command( OUTPUT ${CMAKE_BINARY_DIR}/include/kernelJITCompileCommands.hpp
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
COMMAND python ${CMAKE_CURRENT_LIST_DIR}/../LvArray/src/jitti/generateCompileCommandsHeader.py
${CMAKE_BINARY_DIR}/compile_commands.json
--cpp ${CMAKE_CURRENT_LIST_DIR}/kernelInterface/kernelJIT.cpp
--hpp ${CMAKE_BINARY_DIR}/include/kernelJITCompileCommands.hpp
--include ${CMAKE_BINARY_DIR}/include
--linker ${CMAKE_CXX_COMPILER} )
Comment on lines +49 to +56
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of relying on headers that you generate on the file system, could it have been more robust to store the C++ pre-processing output as a string alongside the compile/link commands directly into the lib?
And that would be more "modern" JIT style imho.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I think you could do that, although it would require the user to list out the functions that they want to JIT at configuration time in CMake. Then for each of those functions we could pre-process jitti/templateSource.cpp. But only up to a point, since we don't know what template params the user will want. Then at run time when we know the params we could compile the pre-processed source with the additional command line definitions of JITTI_TEMPLATE_PARAMS and JITTI_TEMPLATE_PARAMS_STRING.

However, how we pass a string embedded in our library to the compiler without using the file system is beyond me. Not to mention that at the end of the day the compiler is going to spit out a library on the file system that we then have to open. So this frees us from having to have access to the same headers used to build, but unless we do something really fancy I think file system access is a requirement.

Also at least with CUDA the cost of the compilation time itself will greatly outweigh the cost of opening and pre-processing the source.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also at least with CUDA the cost of the compilation time itself will greatly outweigh the cost of opening and pre-processing the source.

It's more a problem of having a consistent self contained GEOSX installation than a performance problem imho.
Embedding the pre-processed source files directly into GEOSX would help at non relying on sources that may be modified behind the scenes.
I do not know all the JIT details so maybe it's not a problem anyway: I do not want to solve problems that do not exist.


geosx_add_code_checks( PREFIX finiteElement )

add_subdirectory( unitTests )
Expand Down
242 changes: 177 additions & 65 deletions src/coreComponents/finiteElement/kernelInterface/KernelBase.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,11 @@
#include "common/TimingMacros.hpp"
#include "constitutive/ConstitutivePassThru.hpp"
#include "finiteElement/FiniteElementDispatch.hpp"
#include "mesh/MeshLevel.hpp"
#include "mesh/ElementRegionManager.hpp"
#include "common/GEOS_RAJA_Interface.hpp"
#include "common/MpiWrapper.hpp"
#include "LvArray/src/jitti/Cache.hpp"

namespace geosx
{
Expand Down Expand Up @@ -258,52 +261,24 @@ class KernelBase
FE_TYPE const & m_finiteElementSpace;
};

/**
* @class KernelFactory
* @brief Used to forward arguments to a class that implements the KernelBase interface.
* @tparam KERNEL_TYPE The template class to construct, should implement the KernelBase interface.
* @tparam ARGS The arguments used to construct a @p KERNEL_TYPE in addition to the standard arguments.
*/
template< template< typename SUBREGION_TYPE,
typename CONSTITUTIVE_TYPE,
typename FE_TYPE > class KERNEL_TYPE,
typename ... ARGS >
class KernelFactory
{
public:

/**
* @brief Initialize the factory.
* @param args The arguments used to construct a @p KERNEL_TYPE in addition to the standard arguments.
*/
KernelFactory( ARGS ... args ):
m_args( args ... )
{}

/**
* @brief Create a new kernel with the given standard arguments.
* @tparam SUBREGION_TYPE The type of @p elementSubRegion.
* @tparam CONSTITUTIVE_TYPE The type of @p inputConstitutiveType.
* @tparam FE_TYPE The type of @p finiteElementSpace.
* @param nodeManager The node manager.
* @param edgeManager The edge manager.
* @param faceManager The face manager.
* @param targetRegionIndex The target region index.
* @param elementSubRegion The subregion to execute on.
* @param finiteElementSpace The finite element space.
* @param inputConstitutiveType The constitutive relation.
* @return A new kernel constructed with the given arguments and @c ARGS.
*/
template< typename SUBREGION_TYPE, typename CONSTITUTIVE_TYPE, typename FE_TYPE >
KERNEL_TYPE< SUBREGION_TYPE, CONSTITUTIVE_TYPE, FE_TYPE > createKernel(
template< typename POLICY,
typename SUBREGION_TYPE,
typename CONSTITUTIVE_TYPE,
typename FE_TYPE,
template< typename, typename, typename > class KERNEL_TEMPLATE,
typename KERNEL_CONSTRUCTOR_PARAMS >
real64 buildKernelAndInvoke(
localIndex const numElems,
NodeManager & nodeManager,
EdgeManager const & edgeManager,
FaceManager const & faceManager,
localIndex const targetRegionIndex,
SUBREGION_TYPE const & elementSubRegion,
FE_TYPE const & finiteElementSpace,
CONSTITUTIVE_TYPE & inputConstitutiveType )
CONSTITUTIVE_TYPE & inputConstitutiveType,
KERNEL_CONSTRUCTOR_PARAMS const & kernelParamsTuple )
{
using KERNEL_TYPE = KERNEL_TEMPLATE< SUBREGION_TYPE, CONSTITUTIVE_TYPE, FE_TYPE >;
camp::tuple< NodeManager &,
EdgeManager const &,
FaceManager const &,
Expand All @@ -317,16 +292,158 @@ class KernelFactory
elementSubRegion,
finiteElementSpace,
inputConstitutiveType };

auto allArgs = camp::tuple_cat_pair( standardArgs, m_args );
return camp::make_from_tuple< KERNEL_TYPE< SUBREGION_TYPE, CONSTITUTIVE_TYPE, FE_TYPE > >( allArgs );
auto allArgs = camp::tuple_cat_pair( standardArgs, kernelParamsTuple );
KERNEL_TYPE kernel = camp::make_from_tuple< KERNEL_TYPE >( allArgs );
return KERNEL_TYPE::template kernelLaunch< POLICY, KERNEL_TYPE >( numElems, kernel );
}

private:
/// The arguments to append to the standard kernel constructor arguments.
camp::tuple< ARGS ... > m_args;
};
/**
* @class KernelTemplateDispatch
* @brief Used to forward arguments to a class that implements the KernelBase interface.
* @tparam KERNEL_TYPE The template class to construct, should implement the KernelBase interface.
* @tparam ARGS The arguments used to construct a @p KERNEL_TYPE in addition to the standard arguments.
*/
template < template < typename CONSTITUTIVE_TYPE, typename SUBREGION_TYPE, typename FE_TYPE > class KERNEL_TYPE, typename ... ARGS >
class KernelDispatchTemplate
{
public:

/**
* @brief Initialize the factory.
* @param args The arguments used to construct a @p KERNEL_TYPE in addition to the standard arguments.
*/
KernelDispatchTemplate( ARGS ... args ):
m_args( args ... )
{}

template < typename POLICY,
typename SUBREGION_TYPE,
typename FE_TYPE,
typename CONSTITUTIVE_TYPE >
real64 invoke( localIndex const numElems,
NodeManager & nodeManager,
EdgeManager const & edgeManager,
FaceManager const & faceManager,
localIndex const targetRegionIndex,
SUBREGION_TYPE const & elementSubRegion,
FE_TYPE const & finiteElementSpace,
CONSTITUTIVE_TYPE & inputConstitutiveType )
{
return buildKernelAndInvoke< POLICY,
SUBREGION_TYPE,
CONSTITUTIVE_TYPE,
FE_TYPE,
KERNEL_TYPE,
decltype( m_args )> ( numElems,
nodeManager,
edgeManager,
faceManager,
targetRegionIndex,
elementSubRegion,
finiteElementSpace,
inputConstitutiveType,
m_args );

}

private:

/// The arguments to append to the standard kernel constructor arguments.
camp::tuple< ARGS ... > m_args;
};

jitti::CompilationInfo getKernelCompilationInfo( const string & header );
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can't find the definition of this function anywhere. Is it in kernelJIT.cpp?

// compiles the kernel using jitti
template < const char * NAME, const char * HEADER, typename ... ARGS >
//template < template < const char * NAME, const char * HEADER > class constexpr_jitti_info< NAME, HEADER > CONSTEXPR_INFO, typename ... ARGS >
class KernelDispatchJIT
{
public:
/**
* @brief Initialize the factory.
* @param args The arguments used to construct a @p KERNEL_TYPE in addition to the standard arguments.
*/
KernelDispatchJIT( ARGS ... args ):
m_args( args ... )
{}

template < typename POLICY,
typename SUBREGION_TYPE,
typename FE_TYPE,
typename CONSTITUTIVE_TYPE >
real64 invoke( localIndex const numElems,
NodeManager & nodeManager,
EdgeManager const & edgeManager,
FaceManager const & faceManager,
localIndex const targetRegionIndex,
SUBREGION_TYPE const & elementSubRegion,
FE_TYPE const & finiteElementSpace,
CONSTITUTIVE_TYPE & inputConstitutiveType )
{
string header( HEADER );
jitti::CompilationInfo info = getKernelCompilationInfo( header );

info.templateParams = LvArray::system::demangleType< POLICY >() + ", " +
LvArray::system::demangleType< SUBREGION_TYPE >() + ", " +
LvArray::system::demangleType< CONSTITUTIVE_TYPE >() + ", " +
LvArray::system::demangleType< FE_TYPE >() + ", " +
string( NAME ) + ", " +
LvArray::system::demangleType< decltype( m_args ) >();

// Unfortunately can't just decltype(&buildKernelAndInvoke) since we can't fully specify the function template
using JIT_KERNEL_DISPATCH = real64 (*)( localIndex const,
NodeManager &,
EdgeManager const &,
FaceManager const &,
localIndex const,
SUBREGION_TYPE const &,
FE_TYPE const &,
CONSTITUTIVE_TYPE &,
decltype( m_args ) const & );
string outputDir( STRINGIZE( JITTI_OUTPUT_DIR ) );
outputDir += "/";
static jitti::Cache< JIT_KERNEL_DISPATCH > buildCache( time(NULL), outputDir );

if( MpiWrapper::commRank( ) == 0 )
{
buildCache.getOrLoadOrCompile( info );
}
MpiWrapper::barrier( );
// check if the library with the function is available
if ( buildCache.tryGet( info ) == nullptr )
{
// if not, refresh by reading the filesystem to find the new library on the first iteration
buildCache.refresh( );
}
auto & jitKernelDispatch = buildCache.getOrLoad( info );
return jitKernelDispatch( numElems,
nodeManager,
edgeManager,
faceManager,
targetRegionIndex,
elementSubRegion,
finiteElementSpace,
inputConstitutiveType,
m_args );
}

private:

/// The arguments to append to the standard kernel constructor arguments.
camp::tuple< ARGS ... > m_args;
};

// Would VASTLY prefer to use template specialization on the above class(es) and allow JITTI_TPARAM to ultimately
// decide on whether to JIT or not.. but current language restrictions on templating and our in-code restrictions on
// the kernels due to the 'using' statements throughout the code to define specific kernel dispatchers make that
// devwork intractable.
// We can't use 'using' here due to differences in the above templates as well... nor a nice preprocessor macro...
// so we're basically left with only one option
#if JITTI == 1
#define KernelDispatch KernelDispatchJIT
#else
#define KernelDispatch KernelDispatchTemplate
#endif
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Per the comments, this is the part of the current implementation I want to change the most. It should be possible, but wound up eating hours while I was trying to get it implemented the way I would prefer.

Copy link
Contributor

@klevzoff klevzoff Sep 10, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should be possible to make the two dispatcher classes similar by replacing NAME and HEADER parameters in KernelDispatchJIT with with just KERNEL_TYPE parameter (i.e. template template parameter, like in the non-JIT dispatcher).

NAME could then be extracted as (I'm trying to decide if this is 100% robust):

string const fullName = LvArray::system::demangleType< KERNEL_TYPE< SUBREGION_TYPE, FE_TYPE, CONSTITUTIVE_TYPE > >();
string const name = fullName.substr( 0, fullName.find( '<' ) );

HEADER could be replaced by adding inside each "leaf" kernel class something like

template< typename SUBREGION_TYPE, typename FE_TYPE, typename CONSTITUTIVE_TYPE >
class MyKernel
{
  static constexpr char const source_location[] = __FILE__;
  ...
};

// Sad part: this is required in C++14 to avoid linker errors
template< typename SUBREGION_TYPE, typename FE_TYPE, typename CONSTITUTIVE_TYPE >
constexpr char const MyKernel< SUBREGION_TYPE, FE_TYPE, CONSTITUTIVE_TYPE >::source_location[];

and accessing as

string const header = KERNEL_TYPE< SUBREGION_TYPE, FE_TYPE, CONSTITUTIVE_TYPE >::source_location;

The downside is having this extra stuff to remember to put in kernels. Maybe there's a better way to associate kernel class with source file name?

These ideas aren't too pretty, just thought I'd mention them. They allow us to gets rid of JITTI_DECL/JITTI_TPARAM macros entirely and unify the two dispatchers, possibly even making them specializations as the comment above suggests, and allowing for a per-kernel JITting decision.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the end goal is to always use JITTI, although that doesn't mean that we need to always jit things at run-time. We need to have the capability to pre-jit everything at build-time, and I think that would suffice.


//*****************************************************************************
//*****************************************************************************
Expand Down Expand Up @@ -357,13 +474,13 @@ class KernelFactory
template< typename POLICY,
typename CONSTITUTIVE_BASE,
typename SUBREGION_TYPE,
typename KERNEL_FACTORY >
typename KERNEL_DISPATCH >
static
real64 regionBasedKernelApplication( MeshLevel & mesh,
arrayView1d< string const > const & targetRegions,
string const & finiteElementName,
arrayView1d< string const > const & constitutiveNames,
KERNEL_FACTORY & kernelFactory )
KERNEL_DISPATCH & kernelDispatch )
{
GEOSX_MARK_FUNCTION;
// save the maximum residual contribution for scaling residuals for convergence criteria.
Expand All @@ -381,7 +498,7 @@ real64 regionBasedKernelApplication( MeshLevel & mesh,
&nodeManager,
&edgeManager,
&faceManager,
&kernelFactory,
&kernelDispatch,
&finiteElementName]
( localIndex const targetRegionIndex, auto & elementSubRegion )
{
Expand All @@ -407,7 +524,7 @@ real64 regionBasedKernelApplication( MeshLevel & mesh,
&edgeManager,
&faceManager,
targetRegionIndex,
&kernelFactory,
&kernelDispatch,
&elementSubRegion,
&finiteElementName,
numElems]
Expand All @@ -422,25 +539,20 @@ real64 regionBasedKernelApplication( MeshLevel & mesh,
&edgeManager,
&faceManager,
targetRegionIndex,
&kernelFactory,
&kernelDispatch,
&elementSubRegion,
numElems,
&castedConstitutiveRelation] ( auto const finiteElement )
{
auto kernel = kernelFactory.createKernel( nodeManager,
edgeManager,
faceManager,
targetRegionIndex,
elementSubRegion,
finiteElement,
castedConstitutiveRelation );

using KERNEL_TYPE = decltype( kernel );

// Call the kernelLaunch function, and store the maximum contribution to the residual.
maxResidualContribution =
std::max( maxResidualContribution,
KERNEL_TYPE::template kernelLaunch< POLICY, KERNEL_TYPE >( numElems, kernel ) );
maxResidualContribution = std::max( maxResidualContribution,
kernelDispatch.template invoke< POLICY >( numElems,
nodeManager,
edgeManager,
faceManager,
targetRegionIndex,
elementSubRegion,
finiteElement,
castedConstitutiveRelation ) );
} );
} );

Expand Down
Loading