Skip to content

Commit 0ebd3f4

Browse files
committed
update k-channel test
1 parent c1f25f1 commit 0ebd3f4

28 files changed

Lines changed: 6784 additions & 2 deletions

.gitignore

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# Byte-compiled / optimized / DLL files
2+
__pycache__/
3+
*.py[cod]
4+
*$py.class
5+
6+
# Distribution / packaging
7+
.Python
8+
env/
9+
build/
10+
dist/
11+
*.log
12+
*.egg-info
13+
14+
# pyenv
15+
.python-version
16+
17+
# dotenv
18+
.env
19+
20+
# virtualenv
21+
.venv/
22+
venv/
23+
ENV/
24+
25+
# VSCode settings
26+
.vscode
27+
28+
# IDEA files
29+
.idea
30+
31+
# OSX dir files
32+
.DS_Store
33+
34+
# Sublime Text settings
35+
*.sublime-workspace
36+
*.sublime-project
37+
38+
# PyTorch Source Files
39+
kernels/3rdparty/libtorch/
40+
41+
hf-models/
42+
*.npy
43+
*.pt
44+
45+
pred/
46+
pred_e/
47+
logs/
48+
49+
*.so
50+
51+
libtorch/

.gitmodules

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
[submodule "3rdparty"]
2-
path = 3rdparty/cutlass
1+
[submodule "libs/cutlass"]
2+
path = libs/cutlass
33
url = https://github.com/NVIDIA/cutlass.git

README.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,15 @@
11
# BitDecoding
22

3+
## Quick Start
4+
2. Run with libtorch c++
5+
```
6+
cd libs/
7+
wget https://download.pytorch.org/libtorch /cu124/libtorch-shared-with-deps-2.5.1%2Bcu124.zip
8+
unzip libtorch-shared-with-deps-2.5.1+cu124.zip
9+
rm libtorch-shared-with-deps-2.5.1+cu124.zip
10+
11+
cd BitDecoding/csrc/bit_decode
12+
mkdir build && cd build
13+
cmake -DCMAKE_PREFIX_PATH=<libtorch_path> ..
14+
make -j12
15+
```

csrc/bit_decode/CMakeLists.txt

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
cmake_minimum_required(VERSION 3.22.1)
2+
project(bitdecoding CUDA CXX)
3+
4+
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
5+
set(CMAKE_CXX_STANDARD 17)
6+
set(CMAKE_CUDA_STANDARD 17)
7+
set(CMAKE_CUDA_ARCHITECTURES 80)
8+
9+
set(INCLUDE_DIR ${PROJECT_SOURCE_DIR}/../../libs/cutlass/include)
10+
11+
# Enable ccache if available
12+
find_program(CCACHE_PROGRAM ccache)
13+
if(CCACHE_PROGRAM)
14+
set(CMAKE_CUDA_COMPILER_LAUNCHER "${CCACHE_PROGRAM}")
15+
set(CMAKE_CXX_COMPILER_LAUNCHER "${CCACHE_PROGRAM}")
16+
endif()
17+
18+
find_package(Torch REQUIRED)
19+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
20+
21+
message(STATUS "Compile testing packdecode kernel.")
22+
add_executable(test_single_packdecode
23+
${PROJECT_SOURCE_DIR}/src/test_single_packdecode.cu
24+
${PROJECT_SOURCE_DIR}/src/genfile/flash_fwd_hdim128_fp16_sm80.cu
25+
${PROJECT_SOURCE_DIR}/src/genfile/flash_qpack_hdim128_fp16_sm80_2bit.cu
26+
${PROJECT_SOURCE_DIR}/src/genfile/flash_qpack_hdim128_fp16_sm80_4bit.cu
27+
${PROJECT_SOURCE_DIR}/src/genfile/flash_fwd_split_hdim128_fp16_sm80_2bit.cu
28+
${PROJECT_SOURCE_DIR}/src/genfile/flash_fwd_split_hdim128_fp16_sm80_4bit.cu
29+
)
30+
target_link_libraries(test_single_packdecode "${TORCH_LIBRARIES}")
31+
target_include_directories(test_single_packdecode PRIVATE ${INCLUDE_DIR})
32+
target_compile_options(test_single_packdecode PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-maxrregcount=255 -gencode arch=compute_80,code=sm_80 -w>)
33+

0 commit comments

Comments
 (0)