From 8952d30b66825c92ae516dd46e96adb938a3fa07 Mon Sep 17 00:00:00 2001 From: "Wang, Yi A" Date: Fri, 26 Sep 2025 13:42:27 +0000 Subject: [PATCH 1/2] append sycl flags for cutlass in 2025.2.x Signed-off-by: Wang, Yi A --- build2cmake/src/templates/xpu/dep-cutlass-sycl.cmake | 12 +++++++++++- build2cmake/src/torch/xpu.rs | 1 + 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/build2cmake/src/templates/xpu/dep-cutlass-sycl.cmake b/build2cmake/src/templates/xpu/dep-cutlass-sycl.cmake index 3a5a8b09..750e3765 100644 --- a/build2cmake/src/templates/xpu/dep-cutlass-sycl.cmake +++ b/build2cmake/src/templates/xpu/dep-cutlass-sycl.cmake @@ -56,6 +56,16 @@ else() endif(NOT CutlassSycl_FOUND) string(REPLACE "-fsycl-targets=spir64_gen,spir64" "-fsycl-targets=intel_gpu_pvc" sycl_link_flags "${sycl_link_flags}") string(REPLACE "-device pvc,xe-lpg,ats-m150" "" sycl_link_flags "${sycl_link_flags}") -string(APPEND sycl_link_flags "-Xspirv-translator;-spirv-ext=+SPV_INTEL_split_barrier;") +string(APPEND sycl_link_flags "-Xspirv-translator;-spirv-ext=+SPV_INTEL_split_barrier") +execute_process( + COMMAND ${ICPX_COMPILER} --version + OUTPUT_VARIABLE ICPX_VERSION_OUTPUT + OUTPUT_STRIP_TRAILING_WHITESPACE +) +string(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+" ICPX_VERSION "${ICPX_VERSION_OUTPUT}") +if(ICPX_VERSION STREQUAL "2025.2.1") + string(APPEND sycl_link_flags ",+SPV_INTEL_2d_block_io,+SPV_INTEL_subgroup_matrix_multiply_accumulate") +endif() + string(REPLACE "-fsycl-targets=spir64_gen,spir64" "-fsycl-targets=intel_gpu_pvc" sycl_flags "${sycl_flags}") diff --git a/build2cmake/src/torch/xpu.rs b/build2cmake/src/torch/xpu.rs index a1ad30ef..66743461 100644 --- a/build2cmake/src/torch/xpu.rs +++ b/build2cmake/src/torch/xpu.rs @@ -202,6 +202,7 @@ fn render_deps(env: &Environment, build: &Build, write: &mut impl Write) -> Resu let version = match dpcpp_version.as_str() { "2025.0" => "3.9-0.2", "2025.1" => "3.9-0.3", + "2025.2" => "3.9-0.3", _ => bail!( "No cutlass_sycl version mapped for DPCPP_VERSION {}", dpcpp_version From af1912ee8b0c4f7639dd75410d9e885a146fee48 Mon Sep 17 00:00:00 2001 From: "Wang, Yi A" Date: Mon, 29 Sep 2025 11:20:27 +0000 Subject: [PATCH 2/2] update to 0.5 Signed-off-by: Wang, Yi A --- .../src/templates/xpu/dep-cutlass-sycl.cmake | 20 +++++++------------ build2cmake/src/torch/xpu.rs | 2 +- examples/cutlass-gemm/gemm_sycl.cpp | 4 ++++ 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/build2cmake/src/templates/xpu/dep-cutlass-sycl.cmake b/build2cmake/src/templates/xpu/dep-cutlass-sycl.cmake index 750e3765..0e9d69af 100644 --- a/build2cmake/src/templates/xpu/dep-cutlass-sycl.cmake +++ b/build2cmake/src/templates/xpu/dep-cutlass-sycl.cmake @@ -1,12 +1,9 @@ find_package(CutlassSycl) +set(CUTLASS_SYCL_REVISION "v{{ version }}" CACHE STRING "CUTLASS revision to use") if (NOT CutlassSycl_FOUND) set(CUTLASS_ENABLE_HEADERS_ONLY ON CACHE BOOL "Enable only the header library") set(CUTLASS_ENABLE_BENCHMARKS OFF CACHE BOOL "Disable CUTLASS Benchmarks") - -# Set CUTLASS_REVISION manually -- its revision detection doesn't work in this case. - set(CUTLASS_REVISION "v{{ version }}" CACHE STRING "CUTLASS revision to use") - # Use the specified CUTLASS source directory for compilation if CUTLASS_SYCL_SRC_DIR is provided if (DEFINED ENV{CUTLASS_SYCL_SRC_DIR}) set(CUTLASS_SYCL_SRC_DIR $ENV{CUTLASS_SYCL_SRC_DIR}) @@ -22,7 +19,7 @@ if (NOT CutlassSycl_FOUND) FetchContent_Declare( cutlass GIT_REPOSITORY https://github.com/intel/cutlass-sycl.git - GIT_TAG ${CUTLASS_REVISION} + GIT_TAG ${CUTLASS_SYCL_REVISION} GIT_PROGRESS TRUE # Speed up CUTLASS download by retrieving only the specified GIT_TAG instead of the history. @@ -54,18 +51,15 @@ else() include_directories(${CUTLASS_INCLUDE_DIR}) include_directories(${CUTLASS_TOOLS_UTIL_INCLUDE_DIR}) endif(NOT CutlassSycl_FOUND) +if(CUTLASS_SYCL_REVISION MATCHES "^v3\\.9") + add_compile_definitions(OLD_API=1) +endif() + string(REPLACE "-fsycl-targets=spir64_gen,spir64" "-fsycl-targets=intel_gpu_pvc" sycl_link_flags "${sycl_link_flags}") string(REPLACE "-device pvc,xe-lpg,ats-m150" "" sycl_link_flags "${sycl_link_flags}") string(APPEND sycl_link_flags "-Xspirv-translator;-spirv-ext=+SPV_INTEL_split_barrier") -execute_process( - COMMAND ${ICPX_COMPILER} --version - OUTPUT_VARIABLE ICPX_VERSION_OUTPUT - OUTPUT_STRIP_TRAILING_WHITESPACE -) -string(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+" ICPX_VERSION "${ICPX_VERSION_OUTPUT}") -if(ICPX_VERSION STREQUAL "2025.2.1") +if(CUTLASS_SYCL_REVISION STREQUAL "v0.5") string(APPEND sycl_link_flags ",+SPV_INTEL_2d_block_io,+SPV_INTEL_subgroup_matrix_multiply_accumulate") endif() - string(REPLACE "-fsycl-targets=spir64_gen,spir64" "-fsycl-targets=intel_gpu_pvc" sycl_flags "${sycl_flags}") diff --git a/build2cmake/src/torch/xpu.rs b/build2cmake/src/torch/xpu.rs index 66743461..590357c9 100644 --- a/build2cmake/src/torch/xpu.rs +++ b/build2cmake/src/torch/xpu.rs @@ -202,7 +202,7 @@ fn render_deps(env: &Environment, build: &Build, write: &mut impl Write) -> Resu let version = match dpcpp_version.as_str() { "2025.0" => "3.9-0.2", "2025.1" => "3.9-0.3", - "2025.2" => "3.9-0.3", + "2025.2" => "0.5", _ => bail!( "No cutlass_sycl version mapped for DPCPP_VERSION {}", dpcpp_version diff --git a/examples/cutlass-gemm/gemm_sycl.cpp b/examples/cutlass-gemm/gemm_sycl.cpp index 63bdaac8..b268ecf2 100644 --- a/examples/cutlass-gemm/gemm_sycl.cpp +++ b/examples/cutlass-gemm/gemm_sycl.cpp @@ -166,5 +166,9 @@ void cutlass_gemm(torch::Tensor &out, torch::Tensor const &A, torch::Tensor cons TORCH_CHECK(gemm_op.can_implement(arguments) == cutlass::Status::kSuccess, "Invalid GEMM problem size or configuration"); CUTLASS_CHECK(gemm_op.initialize(arguments, workspace.get())); CUTLASS_CHECK(gemm_op.run()); +#if defined(OLD_API) syclcompat::wait(); +#else + compat::wait(); +#endif }