Skip to content
This repository was archived by the owner on Apr 6, 2026. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions build2cmake/src/templates/xpu/dep-cutlass-sycl.cmake
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
find_package(CutlassSycl)

set(CUTLASS_SYCL_REVISION "v{{ version }}" CACHE STRING "CUTLASS revision to use")
if (NOT CutlassSycl_FOUND)
set(CUTLASS_ENABLE_HEADERS_ONLY ON CACHE BOOL "Enable only the header library")
set(CUTLASS_ENABLE_BENCHMARKS OFF CACHE BOOL "Disable CUTLASS Benchmarks")

# Set CUTLASS_REVISION manually -- its revision detection doesn't work in this case.
set(CUTLASS_REVISION "v{{ version }}" CACHE STRING "CUTLASS revision to use")

# Use the specified CUTLASS source directory for compilation if CUTLASS_SYCL_SRC_DIR is provided
if (DEFINED ENV{CUTLASS_SYCL_SRC_DIR})
set(CUTLASS_SYCL_SRC_DIR $ENV{CUTLASS_SYCL_SRC_DIR})
Expand All @@ -22,7 +19,7 @@ if (NOT CutlassSycl_FOUND)
FetchContent_Declare(
cutlass
GIT_REPOSITORY https://github.com/intel/cutlass-sycl.git
GIT_TAG ${CUTLASS_REVISION}
GIT_TAG ${CUTLASS_SYCL_REVISION}
GIT_PROGRESS TRUE

# Speed up CUTLASS download by retrieving only the specified GIT_TAG instead of the history.
Expand Down Expand Up @@ -54,8 +51,15 @@ else()
include_directories(${CUTLASS_INCLUDE_DIR})
include_directories(${CUTLASS_TOOLS_UTIL_INCLUDE_DIR})
endif(NOT CutlassSycl_FOUND)
if(CUTLASS_SYCL_REVISION MATCHES "^v3\\.9")
add_compile_definitions(OLD_API=1)
endif()

string(REPLACE "-fsycl-targets=spir64_gen,spir64" "-fsycl-targets=intel_gpu_pvc" sycl_link_flags "${sycl_link_flags}")
string(REPLACE "-device pvc,xe-lpg,ats-m150" "" sycl_link_flags "${sycl_link_flags}")
string(APPEND sycl_link_flags "-Xspirv-translator;-spirv-ext=+SPV_INTEL_split_barrier;")
string(APPEND sycl_link_flags "-Xspirv-translator;-spirv-ext=+SPV_INTEL_split_barrier")
if(CUTLASS_SYCL_REVISION STREQUAL "v0.5")
string(APPEND sycl_link_flags ",+SPV_INTEL_2d_block_io,+SPV_INTEL_subgroup_matrix_multiply_accumulate")
endif()
string(REPLACE "-fsycl-targets=spir64_gen,spir64" "-fsycl-targets=intel_gpu_pvc" sycl_flags "${sycl_flags}")

1 change: 1 addition & 0 deletions build2cmake/src/torch/xpu.rs
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ fn render_deps(env: &Environment, build: &Build, write: &mut impl Write) -> Resu
let version = match dpcpp_version.as_str() {
"2025.0" => "3.9-0.2",
"2025.1" => "3.9-0.3",
"2025.2" => "0.5",
_ => bail!(
"No cutlass_sycl version mapped for DPCPP_VERSION {}",
dpcpp_version
Expand Down
4 changes: 4 additions & 0 deletions examples/cutlass-gemm/gemm_sycl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,5 +166,9 @@ void cutlass_gemm(torch::Tensor &out, torch::Tensor const &A, torch::Tensor cons
TORCH_CHECK(gemm_op.can_implement(arguments) == cutlass::Status::kSuccess, "Invalid GEMM problem size or configuration");
CUTLASS_CHECK(gemm_op.initialize(arguments, workspace.get()));
CUTLASS_CHECK(gemm_op.run());
#if defined(OLD_API)
syclcompat::wait();
#else
compat::wait();
#endif
}
Loading