|
| 1 | +cmake_minimum_required(VERSION 3.22.1) |
| 2 | +project(bitdecoding CUDA CXX) |
| 3 | + |
| 4 | +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) |
| 5 | +set(CMAKE_CXX_STANDARD 17) |
| 6 | +set(CMAKE_CUDA_STANDARD 17) |
| 7 | +set(CMAKE_CUDA_ARCHITECTURES 80) |
| 8 | + |
| 9 | +set(INCLUDE_DIR ${PROJECT_SOURCE_DIR}/../../libs/cutlass/include) |
| 10 | + |
| 11 | +# Enable ccache if available |
| 12 | +find_program(CCACHE_PROGRAM ccache) |
| 13 | +if(CCACHE_PROGRAM) |
| 14 | + set(CMAKE_CUDA_COMPILER_LAUNCHER "${CCACHE_PROGRAM}") |
| 15 | + set(CMAKE_CXX_COMPILER_LAUNCHER "${CCACHE_PROGRAM}") |
| 16 | +endif() |
| 17 | + |
| 18 | +find_package(Torch REQUIRED) |
| 19 | +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}") |
| 20 | + |
| 21 | +message(STATUS "Compile testing packdecode kernel.") |
| 22 | +add_executable(test_single_packdecode |
| 23 | + ${PROJECT_SOURCE_DIR}/src/test_single_packdecode.cu |
| 24 | + ${PROJECT_SOURCE_DIR}/src/genfile/flash_fwd_hdim128_fp16_sm80.cu |
| 25 | + ${PROJECT_SOURCE_DIR}/src/genfile/flash_qpack_hdim128_fp16_sm80_2bit.cu |
| 26 | + ${PROJECT_SOURCE_DIR}/src/genfile/flash_qpack_hdim128_fp16_sm80_4bit.cu |
| 27 | + ${PROJECT_SOURCE_DIR}/src/genfile/flash_fwd_split_hdim128_fp16_sm80_2bit.cu |
| 28 | + ${PROJECT_SOURCE_DIR}/src/genfile/flash_fwd_split_hdim128_fp16_sm80_4bit.cu |
| 29 | +) |
| 30 | +target_link_libraries(test_single_packdecode "${TORCH_LIBRARIES}") |
| 31 | +target_include_directories(test_single_packdecode PRIVATE ${INCLUDE_DIR}) |
| 32 | +target_compile_options(test_single_packdecode PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-maxrregcount=255 -gencode arch=compute_80,code=sm_80 -w>) |
| 33 | + |
0 commit comments