-
Notifications
You must be signed in to change notification settings - Fork 99
WIP: Experimental/corbett/jit #1333
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
ece63c9
599f4ca
c340fa4
301734c
8016eeb
5860e90
ea0ab5c
16068c3
9bc353d
d610d60
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,13 +19,15 @@ set( finiteElement_headers | |
| elementFormulations/H1_Wedge_Lagrange1_Gauss6.hpp | ||
| elementFormulations/LagrangeBasis1.hpp | ||
| elementFormulations/LagrangeBasis2.hpp | ||
| ${CMAKE_BINARY_DIR}/include/kernelJITCompileCommands.hpp | ||
| ) | ||
| # | ||
| # Specify all sources | ||
| # | ||
| set( finiteElement_sources | ||
| FiniteElementDiscretization.cpp | ||
| FiniteElementDiscretizationManager.cpp | ||
| kernelInterface/kernelJIT.cpp | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This file is not included in the PR, and I can't see it being generated anywhere either |
||
| ) | ||
|
|
||
| set( dependencyList dataRepository ) | ||
|
|
@@ -37,12 +39,22 @@ endif() | |
| blt_add_library( NAME finiteElement | ||
| SOURCES ${finiteElement_sources} | ||
| HEADERS ${finiteElement_headers} | ||
| DEPENDS_ON ${dependencyList} | ||
| DEFINES JITTI_OUTPUT_DIR=${CMAKE_BINARY_DIR}/lib/jitti ${JITTI_DEFINES} | ||
| DEPENDS_ON ${dependencyList} jitti | ||
| OBJECT ${GEOSX_BUILD_OBJ_LIBS} | ||
| ) | ||
|
|
||
| target_include_directories( finiteElement PUBLIC ${CMAKE_SOURCE_DIR}/coreComponents) | ||
|
|
||
| add_custom_command( OUTPUT ${CMAKE_BINARY_DIR}/include/kernelJITCompileCommands.hpp | ||
| WORKING_DIRECTORY ${CMAKE_BINARY_DIR} | ||
| COMMAND python ${CMAKE_CURRENT_LIST_DIR}/../LvArray/src/jitti/generateCompileCommandsHeader.py | ||
| ${CMAKE_BINARY_DIR}/compile_commands.json | ||
| --cpp ${CMAKE_CURRENT_LIST_DIR}/kernelInterface/kernelJIT.cpp | ||
| --hpp ${CMAKE_BINARY_DIR}/include/kernelJITCompileCommands.hpp | ||
| --include ${CMAKE_BINARY_DIR}/include | ||
| --linker ${CMAKE_CXX_COMPILER} ) | ||
|
Comment on lines
+49
to
+56
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Instead of relying on headers that you generate on the file system, could it have been more robust to store the C++ pre-processing output as a string alongside the compile/link commands directly into the lib?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah I think you could do that, although it would require the user to list out the functions that they want to JIT at configuration time in CMake. Then for each of those functions we could pre-process However, how we pass a string embedded in our library to the compiler without using the file system is beyond me. Not to mention that at the end of the day the compiler is going to spit out a library on the file system that we then have to open. So this frees us from having to have access to the same headers used to build, but unless we do something really fancy I think file system access is a requirement. Also at least with CUDA the cost of the compilation time itself will greatly outweigh the cost of opening and pre-processing the source.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
It's more a problem of having a consistent self contained GEOSX installation than a performance problem imho. |
||
|
|
||
| geosx_add_code_checks( PREFIX finiteElement ) | ||
|
|
||
| add_subdirectory( unitTests ) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -24,8 +24,11 @@ | |
| #include "common/TimingMacros.hpp" | ||
| #include "constitutive/ConstitutivePassThru.hpp" | ||
| #include "finiteElement/FiniteElementDispatch.hpp" | ||
| #include "mesh/MeshLevel.hpp" | ||
| #include "mesh/ElementRegionManager.hpp" | ||
| #include "common/GEOS_RAJA_Interface.hpp" | ||
| #include "common/MpiWrapper.hpp" | ||
| #include "LvArray/src/jitti/Cache.hpp" | ||
|
|
||
| namespace geosx | ||
| { | ||
|
|
@@ -258,52 +261,24 @@ class KernelBase | |
| FE_TYPE const & m_finiteElementSpace; | ||
| }; | ||
|
|
||
| /** | ||
| * @class KernelFactory | ||
| * @brief Used to forward arguments to a class that implements the KernelBase interface. | ||
| * @tparam KERNEL_TYPE The template class to construct, should implement the KernelBase interface. | ||
| * @tparam ARGS The arguments used to construct a @p KERNEL_TYPE in addition to the standard arguments. | ||
| */ | ||
| template< template< typename SUBREGION_TYPE, | ||
| typename CONSTITUTIVE_TYPE, | ||
| typename FE_TYPE > class KERNEL_TYPE, | ||
| typename ... ARGS > | ||
| class KernelFactory | ||
| { | ||
| public: | ||
|
|
||
| /** | ||
| * @brief Initialize the factory. | ||
| * @param args The arguments used to construct a @p KERNEL_TYPE in addition to the standard arguments. | ||
| */ | ||
| KernelFactory( ARGS ... args ): | ||
| m_args( args ... ) | ||
| {} | ||
|
|
||
| /** | ||
| * @brief Create a new kernel with the given standard arguments. | ||
| * @tparam SUBREGION_TYPE The type of @p elementSubRegion. | ||
| * @tparam CONSTITUTIVE_TYPE The type of @p inputConstitutiveType. | ||
| * @tparam FE_TYPE The type of @p finiteElementSpace. | ||
| * @param nodeManager The node manager. | ||
| * @param edgeManager The edge manager. | ||
| * @param faceManager The face manager. | ||
| * @param targetRegionIndex The target region index. | ||
| * @param elementSubRegion The subregion to execute on. | ||
| * @param finiteElementSpace The finite element space. | ||
| * @param inputConstitutiveType The constitutive relation. | ||
| * @return A new kernel constructed with the given arguments and @c ARGS. | ||
| */ | ||
| template< typename SUBREGION_TYPE, typename CONSTITUTIVE_TYPE, typename FE_TYPE > | ||
| KERNEL_TYPE< SUBREGION_TYPE, CONSTITUTIVE_TYPE, FE_TYPE > createKernel( | ||
| template< typename POLICY, | ||
| typename SUBREGION_TYPE, | ||
| typename CONSTITUTIVE_TYPE, | ||
| typename FE_TYPE, | ||
| template< typename, typename, typename > class KERNEL_TEMPLATE, | ||
| typename KERNEL_CONSTRUCTOR_PARAMS > | ||
| real64 buildKernelAndInvoke( | ||
| localIndex const numElems, | ||
| NodeManager & nodeManager, | ||
| EdgeManager const & edgeManager, | ||
| FaceManager const & faceManager, | ||
| localIndex const targetRegionIndex, | ||
| SUBREGION_TYPE const & elementSubRegion, | ||
| FE_TYPE const & finiteElementSpace, | ||
| CONSTITUTIVE_TYPE & inputConstitutiveType ) | ||
| CONSTITUTIVE_TYPE & inputConstitutiveType, | ||
| KERNEL_CONSTRUCTOR_PARAMS const & kernelParamsTuple ) | ||
| { | ||
| using KERNEL_TYPE = KERNEL_TEMPLATE< SUBREGION_TYPE, CONSTITUTIVE_TYPE, FE_TYPE >; | ||
| camp::tuple< NodeManager &, | ||
| EdgeManager const &, | ||
| FaceManager const &, | ||
|
|
@@ -317,16 +292,158 @@ class KernelFactory | |
| elementSubRegion, | ||
| finiteElementSpace, | ||
| inputConstitutiveType }; | ||
|
|
||
| auto allArgs = camp::tuple_cat_pair( standardArgs, m_args ); | ||
| return camp::make_from_tuple< KERNEL_TYPE< SUBREGION_TYPE, CONSTITUTIVE_TYPE, FE_TYPE > >( allArgs ); | ||
| auto allArgs = camp::tuple_cat_pair( standardArgs, kernelParamsTuple ); | ||
| KERNEL_TYPE kernel = camp::make_from_tuple< KERNEL_TYPE >( allArgs ); | ||
| return KERNEL_TYPE::template kernelLaunch< POLICY, KERNEL_TYPE >( numElems, kernel ); | ||
| } | ||
|
|
||
| private: | ||
| /// The arguments to append to the standard kernel constructor arguments. | ||
| camp::tuple< ARGS ... > m_args; | ||
| }; | ||
| /** | ||
| * @class KernelTemplateDispatch | ||
| * @brief Used to forward arguments to a class that implements the KernelBase interface. | ||
| * @tparam KERNEL_TYPE The template class to construct, should implement the KernelBase interface. | ||
| * @tparam ARGS The arguments used to construct a @p KERNEL_TYPE in addition to the standard arguments. | ||
| */ | ||
| template < template < typename CONSTITUTIVE_TYPE, typename SUBREGION_TYPE, typename FE_TYPE > class KERNEL_TYPE, typename ... ARGS > | ||
| class KernelDispatchTemplate | ||
| { | ||
| public: | ||
|
|
||
| /** | ||
| * @brief Initialize the factory. | ||
| * @param args The arguments used to construct a @p KERNEL_TYPE in addition to the standard arguments. | ||
| */ | ||
| KernelDispatchTemplate( ARGS ... args ): | ||
| m_args( args ... ) | ||
| {} | ||
|
|
||
| template < typename POLICY, | ||
| typename SUBREGION_TYPE, | ||
| typename FE_TYPE, | ||
| typename CONSTITUTIVE_TYPE > | ||
| real64 invoke( localIndex const numElems, | ||
| NodeManager & nodeManager, | ||
| EdgeManager const & edgeManager, | ||
| FaceManager const & faceManager, | ||
| localIndex const targetRegionIndex, | ||
| SUBREGION_TYPE const & elementSubRegion, | ||
| FE_TYPE const & finiteElementSpace, | ||
| CONSTITUTIVE_TYPE & inputConstitutiveType ) | ||
| { | ||
| return buildKernelAndInvoke< POLICY, | ||
| SUBREGION_TYPE, | ||
| CONSTITUTIVE_TYPE, | ||
| FE_TYPE, | ||
| KERNEL_TYPE, | ||
| decltype( m_args )> ( numElems, | ||
| nodeManager, | ||
| edgeManager, | ||
| faceManager, | ||
| targetRegionIndex, | ||
| elementSubRegion, | ||
| finiteElementSpace, | ||
| inputConstitutiveType, | ||
| m_args ); | ||
|
|
||
| } | ||
|
|
||
| private: | ||
|
|
||
| /// The arguments to append to the standard kernel constructor arguments. | ||
| camp::tuple< ARGS ... > m_args; | ||
| }; | ||
|
|
||
| jitti::CompilationInfo getKernelCompilationInfo( const string & header ); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I can't find the definition of this function anywhere. Is it in |
||
| // compiles the kernel using jitti | ||
| template < const char * NAME, const char * HEADER, typename ... ARGS > | ||
| //template < template < const char * NAME, const char * HEADER > class constexpr_jitti_info< NAME, HEADER > CONSTEXPR_INFO, typename ... ARGS > | ||
| class KernelDispatchJIT | ||
| { | ||
| public: | ||
| /** | ||
| * @brief Initialize the factory. | ||
| * @param args The arguments used to construct a @p KERNEL_TYPE in addition to the standard arguments. | ||
| */ | ||
| KernelDispatchJIT( ARGS ... args ): | ||
| m_args( args ... ) | ||
| {} | ||
|
|
||
| template < typename POLICY, | ||
| typename SUBREGION_TYPE, | ||
| typename FE_TYPE, | ||
| typename CONSTITUTIVE_TYPE > | ||
| real64 invoke( localIndex const numElems, | ||
| NodeManager & nodeManager, | ||
| EdgeManager const & edgeManager, | ||
| FaceManager const & faceManager, | ||
| localIndex const targetRegionIndex, | ||
| SUBREGION_TYPE const & elementSubRegion, | ||
| FE_TYPE const & finiteElementSpace, | ||
| CONSTITUTIVE_TYPE & inputConstitutiveType ) | ||
| { | ||
| string header( HEADER ); | ||
| jitti::CompilationInfo info = getKernelCompilationInfo( header ); | ||
|
|
||
| info.templateParams = LvArray::system::demangleType< POLICY >() + ", " + | ||
| LvArray::system::demangleType< SUBREGION_TYPE >() + ", " + | ||
| LvArray::system::demangleType< CONSTITUTIVE_TYPE >() + ", " + | ||
| LvArray::system::demangleType< FE_TYPE >() + ", " + | ||
| string( NAME ) + ", " + | ||
| LvArray::system::demangleType< decltype( m_args ) >(); | ||
|
|
||
| // Unfortunately can't just decltype(&buildKernelAndInvoke) since we can't fully specify the function template | ||
| using JIT_KERNEL_DISPATCH = real64 (*)( localIndex const, | ||
| NodeManager &, | ||
| EdgeManager const &, | ||
| FaceManager const &, | ||
| localIndex const, | ||
| SUBREGION_TYPE const &, | ||
| FE_TYPE const &, | ||
| CONSTITUTIVE_TYPE &, | ||
| decltype( m_args ) const & ); | ||
| string outputDir( STRINGIZE( JITTI_OUTPUT_DIR ) ); | ||
| outputDir += "/"; | ||
| static jitti::Cache< JIT_KERNEL_DISPATCH > buildCache( time(NULL), outputDir ); | ||
|
|
||
| if( MpiWrapper::commRank( ) == 0 ) | ||
| { | ||
| buildCache.getOrLoadOrCompile( info ); | ||
| } | ||
| MpiWrapper::barrier( ); | ||
| // check if the library with the function is available | ||
| if ( buildCache.tryGet( info ) == nullptr ) | ||
| { | ||
| // if not, refresh by reading the filesystem to find the new library on the first iteration | ||
| buildCache.refresh( ); | ||
| } | ||
| auto & jitKernelDispatch = buildCache.getOrLoad( info ); | ||
| return jitKernelDispatch( numElems, | ||
| nodeManager, | ||
| edgeManager, | ||
| faceManager, | ||
| targetRegionIndex, | ||
| elementSubRegion, | ||
| finiteElementSpace, | ||
| inputConstitutiveType, | ||
| m_args ); | ||
| } | ||
|
|
||
| private: | ||
|
|
||
| /// The arguments to append to the standard kernel constructor arguments. | ||
| camp::tuple< ARGS ... > m_args; | ||
| }; | ||
|
|
||
| // Would VASTLY prefer to use template specialization on the above class(es) and allow JITTI_TPARAM to ultimately | ||
| // decide on whether to JIT or not.. but current language restrictions on templating and our in-code restrictions on | ||
| // the kernels due to the 'using' statements throughout the code to define specific kernel dispatchers make that | ||
| // devwork intractable. | ||
| // We can't use 'using' here due to differences in the above templates as well... nor a nice preprocessor macro... | ||
| // so we're basically left with only one option | ||
| #if JITTI == 1 | ||
| #define KernelDispatch KernelDispatchJIT | ||
| #else | ||
| #define KernelDispatch KernelDispatchTemplate | ||
| #endif | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Per the comments, this is the part of the current implementation I want to change the most. It should be possible, but wound up eating hours while I was trying to get it implemented the way I would prefer.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It should be possible to make the two dispatcher classes similar by replacing
string const fullName = LvArray::system::demangleType< KERNEL_TYPE< SUBREGION_TYPE, FE_TYPE, CONSTITUTIVE_TYPE > >();
string const name = fullName.substr( 0, fullName.find( '<' ) );
template< typename SUBREGION_TYPE, typename FE_TYPE, typename CONSTITUTIVE_TYPE >
class MyKernel
{
static constexpr char const source_location[] = __FILE__;
...
};
// Sad part: this is required in C++14 to avoid linker errors
template< typename SUBREGION_TYPE, typename FE_TYPE, typename CONSTITUTIVE_TYPE >
constexpr char const MyKernel< SUBREGION_TYPE, FE_TYPE, CONSTITUTIVE_TYPE >::source_location[];and accessing as string const header = KERNEL_TYPE< SUBREGION_TYPE, FE_TYPE, CONSTITUTIVE_TYPE >::source_location;The downside is having this extra stuff to remember to put in kernels. Maybe there's a better way to associate kernel class with source file name? These ideas aren't too pretty, just thought I'd mention them. They allow us to gets rid of
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the end goal is to always use |
||
|
|
||
| //***************************************************************************** | ||
| //***************************************************************************** | ||
|
|
@@ -357,13 +474,13 @@ class KernelFactory | |
| template< typename POLICY, | ||
| typename CONSTITUTIVE_BASE, | ||
| typename SUBREGION_TYPE, | ||
| typename KERNEL_FACTORY > | ||
| typename KERNEL_DISPATCH > | ||
| static | ||
| real64 regionBasedKernelApplication( MeshLevel & mesh, | ||
| arrayView1d< string const > const & targetRegions, | ||
| string const & finiteElementName, | ||
| arrayView1d< string const > const & constitutiveNames, | ||
| KERNEL_FACTORY & kernelFactory ) | ||
| KERNEL_DISPATCH & kernelDispatch ) | ||
| { | ||
| GEOSX_MARK_FUNCTION; | ||
| // save the maximum residual contribution for scaling residuals for convergence criteria. | ||
|
|
@@ -381,7 +498,7 @@ real64 regionBasedKernelApplication( MeshLevel & mesh, | |
| &nodeManager, | ||
| &edgeManager, | ||
| &faceManager, | ||
| &kernelFactory, | ||
| &kernelDispatch, | ||
| &finiteElementName] | ||
| ( localIndex const targetRegionIndex, auto & elementSubRegion ) | ||
| { | ||
|
|
@@ -407,7 +524,7 @@ real64 regionBasedKernelApplication( MeshLevel & mesh, | |
| &edgeManager, | ||
| &faceManager, | ||
| targetRegionIndex, | ||
| &kernelFactory, | ||
| &kernelDispatch, | ||
| &elementSubRegion, | ||
| &finiteElementName, | ||
| numElems] | ||
|
|
@@ -422,25 +539,20 @@ real64 regionBasedKernelApplication( MeshLevel & mesh, | |
| &edgeManager, | ||
| &faceManager, | ||
| targetRegionIndex, | ||
| &kernelFactory, | ||
| &kernelDispatch, | ||
| &elementSubRegion, | ||
| numElems, | ||
| &castedConstitutiveRelation] ( auto const finiteElement ) | ||
| { | ||
| auto kernel = kernelFactory.createKernel( nodeManager, | ||
| edgeManager, | ||
| faceManager, | ||
| targetRegionIndex, | ||
| elementSubRegion, | ||
| finiteElement, | ||
| castedConstitutiveRelation ); | ||
|
|
||
| using KERNEL_TYPE = decltype( kernel ); | ||
|
|
||
| // Call the kernelLaunch function, and store the maximum contribution to the residual. | ||
| maxResidualContribution = | ||
| std::max( maxResidualContribution, | ||
| KERNEL_TYPE::template kernelLaunch< POLICY, KERNEL_TYPE >( numElems, kernel ) ); | ||
| maxResidualContribution = std::max( maxResidualContribution, | ||
| kernelDispatch.template invoke< POLICY >( numElems, | ||
| nodeManager, | ||
| edgeManager, | ||
| faceManager, | ||
| targetRegionIndex, | ||
| elementSubRegion, | ||
| finiteElement, | ||
| castedConstitutiveRelation ) ); | ||
| } ); | ||
| } ); | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the way we've been doing this is
GEOSX_ENABLE_JITTI, or maybe it'sGEOSX_USE_JITTI, I forget. But either way I like defined/not defined instead of 1/0.