Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
115d23c
WIP POC of dispatcher
vidyasagar-amd Nov 4, 2025
24d6a59
Dispatcher python workflow setup.
vidyasagar-amd Nov 5, 2025
65fd7a5
Dispatcher cleanup and updates.
vidyasagar-amd Nov 5, 2025
4c71440
Fixes to python paths
vidyasagar-amd Nov 14, 2025
adc0971
Cleaning up code
vidyasagar-amd Nov 14, 2025
8afccf9
Improving dispatcher support for different arch
vidyasagar-amd Nov 25, 2025
99fd5b0
Fix formatting errors
vidyasagar-amd Nov 25, 2025
f722247
Cleaning up examples
vidyasagar-amd Nov 26, 2025
5cccd84
Improving codegeneration
vidyasagar-amd Nov 28, 2025
c3e194c
Improving and fixing C++ examples
vidyasagar-amd Nov 28, 2025
97b6093
Adding conv functionality (fwd,bwd,bwdw) and examples.
vidyasagar-amd Nov 29, 2025
ca61cc3
Fixes based on feedback.
vidyasagar-amd Dec 2, 2025
af67392
Further fixes based on feedback.
vidyasagar-amd Dec 2, 2025
ed1bb4e
Adding stress test for autogeneration and autocorrection, and fixing …
vidyasagar-amd Dec 2, 2025
daedc37
Another round of improvements based on feedback.
vidyasagar-amd Dec 3, 2025
8c83ed1
Trimming out unnecessary code.
vidyasagar-amd Dec 3, 2025
56032ee
Fixing the multi-D implementation.
vidyasagar-amd Dec 3, 2025
3c6206c
Using gpu verification for gemms and fixing convolutions tflops calcu…
vidyasagar-amd Dec 4, 2025
540371c
Fix counter usage issue and arch filtering per ops.
vidyasagar-amd Dec 4, 2025
4dc6b64
Adding changelog and other fixes.
vidyasagar-amd Dec 4, 2025
c5ed7e0
Improve examples and resolve critical bugs.
vidyasagar-amd Dec 5, 2025
c5aa3e3
Reduce build time for python examples.
vidyasagar-amd Dec 5, 2025
b6f203d
Fixing minor bug.
vidyasagar-amd Dec 5, 2025
5219040
Fix compilation error.
vidyasagar-amd Dec 5, 2025
0fd95b7
Improve installation instructions for dispatcher.
vidyasagar-amd Dec 5, 2025
bc24ff5
Add docker based installation instructions for dispatcher.
vidyasagar-amd Dec 5, 2025
fc6c457
Fixing arch-based filtering to match tile engine.
vidyasagar-amd Dec 5, 2025
eea667a
Remove dead code and fix arch filtering.
vidyasagar-amd Dec 6, 2025
c959ca6
Minor bugfix.
vidyasagar-amd Dec 6, 2025
78bb635
Updates after rebase.
vidyasagar-amd Jan 5, 2026
d07ed74
Trimming code.
vidyasagar-amd Jan 5, 2026
c1a0819
Fix copyright headers.
vidyasagar-amd Jan 5, 2026
ace72c7
Consolidate examples, cut down code.
vidyasagar-amd Jan 14, 2026
4946b15
Minor fixes.
vidyasagar-amd Jan 15, 2026
4390db4
Improving python examples.
vidyasagar-amd Jan 15, 2026
89d8914
Update readmes.
vidyasagar-amd Jan 15, 2026
7f00a38
Remove conv functionality.
vidyasagar-amd Jan 21, 2026
629c199
Cleanup following conv removable.
vidyasagar-amd Jan 21, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,23 @@ CMakeUserPresets.json
# Python cache
__pycache__/

# Cache directories
.cache/
.ck_tile_cache/
ck_tile_cache/
**/kernel_cache/
**/.kernel_cache/

# Dispatcher kernel cache (user-generated, can be large)
dispatcher/**/kernel_cache/
dispatcher/**/.kernel_cache/
dispatcher/**/cached_kernels/
dispatcher/**/*.hsaco
dispatcher/**/*.co

# Dispatcher generated JSON exports
dispatcher/**/*_kernels.json
dispatcher/**/dispatcher_kernels.json

# Generated test data
test_data/*
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ Documentation for Composable Kernel available at [https://rocm.docs.amd.com/proj
## Composable Kernel 1.2.0 for ROCm 7.2.0

### Added
* Added CK-Tile dispatcher - a unified kernel dispatch, code generation and architecture-based kernel filtering system with with C++ and Python frontends starting with GEMM support.
* Added support for bf16 data type to grouped_gemm and grouped_gemm_preshuffle.
* Added Col-Col-Row-Col layout support for aquant mode in blockscale GEMM.
* Added support for mixed precision fp8 x bf8 universal GEMM and weight preshuffle GEMM.
Expand Down
117 changes: 117 additions & 0 deletions dispatcher/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
# SPDX-License-Identifier: MIT

cmake_minimum_required(VERSION 3.16)

project(ck_tile_dispatcher VERSION 1.0.0 LANGUAGES CXX)

# C++17 required
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)

# Find HIP for headers (needed for validation kernels)
find_package(hip QUIET)
if(NOT hip_FOUND)
list(APPEND CMAKE_PREFIX_PATH /opt/rocm /opt/rocm/hip)
find_package(hip REQUIRED)
endif()

# Dispatcher library
add_library(ck_tile_dispatcher
src/registry.cpp
src/dispatcher.cpp
)

# Enable PIC for Python bindings
set_target_properties(ck_tile_dispatcher PROPERTIES
POSITION_INDEPENDENT_CODE ON
)

target_include_directories(ck_tile_dispatcher
PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
$<INSTALL_INTERFACE:include>
)

# Link against CK Tile headers (header-only)
target_include_directories(ck_tile_dispatcher
PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include>
$<INSTALL_INTERFACE:include>
)

# Link against HIP headers if available
if(hip_FOUND)
target_link_libraries(ck_tile_dispatcher PUBLIC hip::host)
endif()

# Compiler warnings
if(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
target_compile_options(ck_tile_dispatcher PRIVATE
-Wall -Wextra -Wpedantic
)
elseif(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
target_compile_options(ck_tile_dispatcher PRIVATE
/W4
)
endif()

# Optional: Build tests
option(BUILD_DISPATCHER_TESTS "Build dispatcher unit tests" OFF)
if(BUILD_DISPATCHER_TESTS)
enable_testing()
add_subdirectory(tests)
endif()

# Optional: Build Python bindings
option(BUILD_DISPATCHER_PYTHON "Build Python bindings for dispatcher" OFF)
if(BUILD_DISPATCHER_PYTHON)
add_subdirectory(python)
endif()

# Optional: Codegen for tile_engine integration
option(DISPATCHER_AUTO_GENERATE_WRAPPERS "Auto-generate wrappers from tile_engine" OFF)
if(DISPATCHER_AUTO_GENERATE_WRAPPERS)
add_subdirectory(codegen)
endif()

# Optional: Build examples
option(BUILD_DISPATCHER_EXAMPLES "Build dispatcher examples" OFF)
if(BUILD_DISPATCHER_EXAMPLES)
add_subdirectory(examples)
endif()

# Optional: Build ctypes bindings
option(BUILD_DISPATCHER_BINDINGS "Build language bindings for dispatcher" OFF)
if(BUILD_DISPATCHER_BINDINGS)
add_subdirectory(bindings/ctypes)
endif()

# If codegen is enabled, add generated include directory
if(DISPATCHER_AUTO_GENERATE_WRAPPERS AND DISPATCHER_GENERATED_INCLUDE_DIR)
target_include_directories(ck_tile_dispatcher
PUBLIC
$<BUILD_INTERFACE:${DISPATCHER_GENERATED_INCLUDE_DIR}>
)
endif()

# Installation
install(TARGETS ck_tile_dispatcher
EXPORT ck_tile_dispatcher_targets
LIBRARY DESTINATION lib
ARCHIVE DESTINATION lib
RUNTIME DESTINATION bin
)

install(DIRECTORY include/
DESTINATION include
FILES_MATCHING PATTERN "*.hpp"
)

install(EXPORT ck_tile_dispatcher_targets
FILE ck_tile_dispatcher_targets.cmake
NAMESPACE ck_tile::
DESTINATION lib/cmake/ck_tile_dispatcher
)

Loading