Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions docs/build.md
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,27 @@ Finally, after finishing your build, you should be able to do something like thi
# ggml_vulkan: Using Intel(R) Graphics (ADL GT2) | uma: 1 | fp16: 1 | warp size: 32
```

#### Targeting AMD GCN (GFX8/Polaris) Vulkan devices

Older AMD GPUs such as the RX 580 expose fast FP32 wave64 execution but lack
cooperative-matrix, integer-dot and bfloat16 shader support. You can avoid
building unusable shader families by forcing their GLSL compiler probes off
when configuring CMake:

```bash
cmake -B build-gfx803 -DGGML_VULKAN=ON \
-DGGML_VULKAN_FORCE_COOPMAT_GLSLC_SUPPORT=OFF \
-DGGML_VULKAN_FORCE_COOPMAT2_GLSLC_SUPPORT=OFF \
-DGGML_VULKAN_FORCE_INTEGER_DOT_GLSLC_SUPPORT=OFF \
-DGGML_VULKAN_FORCE_BFLOAT16_GLSLC_SUPPORT=OFF
cmake --build build-gfx803 --config Release
```

At runtime, keep execution on the FP32-oriented code paths by exporting
`GGML_VK_DISABLE_F16=1`. Polaris-class GPUs also benefit from the existing
`GGML_VK_FORCE_MMVQ=1` flag when profiling shows that matrix–matrix–vector
quantization remains faster than the fallback kernels.

## CANN
This provides NPU acceleration using the AI cores of your Ascend NPU. And [CANN](https://www.hiascend.com/en/software/cann) is a hierarchical APIs to help you to quickly build AI applications and service based on Ascend NPU.

Expand Down
106 changes: 86 additions & 20 deletions ggml/src/ggml-vulkan/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,40 @@ cmake_policy(SET CMP0114 NEW)

find_package(Vulkan COMPONENTS glslc REQUIRED)

set(GGML_VULKAN_FORCE_COOPMAT_GLSLC_SUPPORT "" CACHE STRING "Force GL_KHR_cooperative_matrix GLSL compiler support detection (use ON, OFF, or leave empty for auto)")
set(GGML_VULKAN_FORCE_COOPMAT2_GLSLC_SUPPORT "" CACHE STRING "Force GL_NV_cooperative_matrix2 GLSL compiler support detection (use ON, OFF, or leave empty for auto)")
set(GGML_VULKAN_FORCE_INTEGER_DOT_GLSLC_SUPPORT "" CACHE STRING "Force GL_EXT_integer_dot_product GLSL compiler support detection (use ON, OFF, or leave empty for auto)")
set(GGML_VULKAN_FORCE_BFLOAT16_GLSLC_SUPPORT "" CACHE STRING "Force GL_EXT_bfloat16 GLSL compiler support detection (use ON, OFF, or leave empty for auto)")

mark_as_advanced(
GGML_VULKAN_FORCE_COOPMAT_GLSLC_SUPPORT
GGML_VULKAN_FORCE_COOPMAT2_GLSLC_SUPPORT
GGML_VULKAN_FORCE_INTEGER_DOT_GLSLC_SUPPORT
GGML_VULKAN_FORCE_BFLOAT16_GLSLC_SUPPORT
)

function(handle_forced_shader_extension EXTENSION_NAME RESULT_VARIABLE FORCE_VARIABLE)
if (NOT "${${FORCE_VARIABLE}}" STREQUAL "")
string(TOUPPER "${${FORCE_VARIABLE}}" FORCE_VALUE_UPPER)
if (FORCE_VALUE_UPPER STREQUAL "ON")
message(STATUS "${EXTENSION_NAME} support forced ON")
set(${RESULT_VARIABLE} ON)
add_compile_definitions(${RESULT_VARIABLE})
list(APPEND VULKAN_SHADER_GEN_CMAKE_ARGS -D${RESULT_VARIABLE}=ON)
set(VULKAN_SHADER_GEN_CMAKE_ARGS "${VULKAN_SHADER_GEN_CMAKE_ARGS}" PARENT_SCOPE)
set(${RESULT_VARIABLE} ON PARENT_SCOPE)
return()
elseif (FORCE_VALUE_UPPER STREQUAL "OFF")
message(STATUS "${EXTENSION_NAME} support forced OFF")
set(${RESULT_VARIABLE} OFF)
set(${RESULT_VARIABLE} OFF PARENT_SCOPE)
return()
else()
message(FATAL_ERROR "Invalid value for ${FORCE_VARIABLE}: ${${FORCE_VARIABLE}}. Expected ON, OFF, or empty.")
endif()
endif()
endfunction()

function(detect_host_compiler)
if (CMAKE_HOST_SYSTEM_NAME STREQUAL "Windows")
find_program(HOST_C_COMPILER NAMES cl gcc clang NO_CMAKE_FIND_ROOT_PATH)
Expand Down Expand Up @@ -52,29 +86,61 @@ if (Vulkan_FOUND)
set(VULKAN_SHADER_GEN_CMAKE_ARGS "")

# Test all shader extensions
test_shader_extension_support(
"GL_KHR_cooperative_matrix"
"${CMAKE_CURRENT_SOURCE_DIR}/vulkan-shaders/test_coopmat_support.comp"
"GGML_VULKAN_COOPMAT_GLSLC_SUPPORT"
)
if (NOT "${GGML_VULKAN_FORCE_COOPMAT_GLSLC_SUPPORT}" MATCHES "^(|[Aa][Uu][Tt][Oo])$")
handle_forced_shader_extension(
"GL_KHR_cooperative_matrix"
"GGML_VULKAN_COOPMAT_GLSLC_SUPPORT"
"GGML_VULKAN_FORCE_COOPMAT_GLSLC_SUPPORT"
)
else()
test_shader_extension_support(
"GL_KHR_cooperative_matrix"
"${CMAKE_CURRENT_SOURCE_DIR}/vulkan-shaders/test_coopmat_support.comp"
"GGML_VULKAN_COOPMAT_GLSLC_SUPPORT"
)
endif()

test_shader_extension_support(
"GL_NV_cooperative_matrix2"
"${CMAKE_CURRENT_SOURCE_DIR}/vulkan-shaders/test_coopmat2_support.comp"
"GGML_VULKAN_COOPMAT2_GLSLC_SUPPORT"
)
if (NOT "${GGML_VULKAN_FORCE_COOPMAT2_GLSLC_SUPPORT}" MATCHES "^(|[Aa][Uu][Tt][Oo])$")
handle_forced_shader_extension(
"GL_NV_cooperative_matrix2"
"GGML_VULKAN_COOPMAT2_GLSLC_SUPPORT"
"GGML_VULKAN_FORCE_COOPMAT2_GLSLC_SUPPORT"
)
else()
test_shader_extension_support(
"GL_NV_cooperative_matrix2"
"${CMAKE_CURRENT_SOURCE_DIR}/vulkan-shaders/test_coopmat2_support.comp"
"GGML_VULKAN_COOPMAT2_GLSLC_SUPPORT"
)
endif()

test_shader_extension_support(
"GL_EXT_integer_dot_product"
"${CMAKE_CURRENT_SOURCE_DIR}/vulkan-shaders/test_integer_dot_support.comp"
"GGML_VULKAN_INTEGER_DOT_GLSLC_SUPPORT"
)
if (NOT "${GGML_VULKAN_FORCE_INTEGER_DOT_GLSLC_SUPPORT}" MATCHES "^(|[Aa][Uu][Tt][Oo])$")
handle_forced_shader_extension(
"GL_EXT_integer_dot_product"
"GGML_VULKAN_INTEGER_DOT_GLSLC_SUPPORT"
"GGML_VULKAN_FORCE_INTEGER_DOT_GLSLC_SUPPORT"
)
else()
test_shader_extension_support(
"GL_EXT_integer_dot_product"
"${CMAKE_CURRENT_SOURCE_DIR}/vulkan-shaders/test_integer_dot_support.comp"
"GGML_VULKAN_INTEGER_DOT_GLSLC_SUPPORT"
)
endif()

test_shader_extension_support(
"GL_EXT_bfloat16"
"${CMAKE_CURRENT_SOURCE_DIR}/vulkan-shaders/test_bfloat16_support.comp"
"GGML_VULKAN_BFLOAT16_GLSLC_SUPPORT"
)
if (NOT "${GGML_VULKAN_FORCE_BFLOAT16_GLSLC_SUPPORT}" MATCHES "^(|[Aa][Uu][Tt][Oo])$")
handle_forced_shader_extension(
"GL_EXT_bfloat16"
"GGML_VULKAN_BFLOAT16_GLSLC_SUPPORT"
"GGML_VULKAN_FORCE_BFLOAT16_GLSLC_SUPPORT"
)
else()
test_shader_extension_support(
"GL_EXT_bfloat16"
"${CMAKE_CURRENT_SOURCE_DIR}/vulkan-shaders/test_bfloat16_support.comp"
"GGML_VULKAN_BFLOAT16_GLSLC_SUPPORT"
)
endif()

target_link_libraries(ggml-vulkan PRIVATE Vulkan::Vulkan)
target_include_directories(ggml-vulkan PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
Expand Down
46 changes: 37 additions & 9 deletions ggml/src/ggml-vulkan/ggml-vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -257,30 +257,58 @@ static vk_device_architecture get_device_architecture(const vk::PhysicalDevice&
}
}

if (!amd_shader_core_properties || !integer_dot_product || !subgroup_size_control) {
if (!amd_shader_core_properties) {
return vk_device_architecture::OTHER;
}

vk::PhysicalDeviceProperties2 props2;
vk::PhysicalDeviceShaderCorePropertiesAMD shader_core_props_amd;
vk::PhysicalDeviceShaderIntegerDotProductPropertiesKHR integer_dot_props;
vk::PhysicalDeviceSubgroupSizeControlPropertiesEXT subgroup_size_control_props;
vk::PhysicalDeviceSubgroupProperties subgroup_props;

shader_core_props_amd.pNext = &subgroup_props;
subgroup_props.pNext = nullptr;
props2.pNext = &shader_core_props_amd;
shader_core_props_amd.pNext = &integer_dot_props;
integer_dot_props.pNext = &subgroup_size_control_props;

device.getProperties2(&props2);

if (subgroup_size_control_props.maxSubgroupSize == 64 && subgroup_size_control_props.minSubgroupSize == 64) {
uint32_t max_subgroup_size = subgroup_props.subgroupSize;

if (subgroup_size_control) {
vk::PhysicalDeviceProperties2 subgroup_size_props;
vk::PhysicalDeviceSubgroupSizeControlPropertiesEXT subgroup_size_control_props;

subgroup_size_control_props.pNext = nullptr;
subgroup_size_props.pNext = &subgroup_size_control_props;

device.getProperties2(&subgroup_size_props);

max_subgroup_size = subgroup_size_control_props.maxSubgroupSize;
}

bool integer_dot_mixed_signedness = false;
if (integer_dot_product) {
vk::PhysicalDeviceProperties2 integer_dot_props2;
vk::PhysicalDeviceShaderIntegerDotProductPropertiesKHR integer_dot_props;

integer_dot_props.pNext = nullptr;
integer_dot_props2.pNext = &integer_dot_props;

device.getProperties2(&integer_dot_props2);

integer_dot_mixed_signedness = integer_dot_props.integerDotProduct4x8BitPackedMixedSignednessAccelerated;
}

const uint32_t wavefront_size = shader_core_props_amd.wavefrontSize;

if (wavefront_size == 64 && max_subgroup_size >= 64) {
return vk_device_architecture::AMD_GCN;
}
if (subgroup_size_control_props.maxSubgroupSize == 64 && subgroup_size_control_props.minSubgroupSize == 32) {
// RDNA

if (wavefront_size == 32) {
if (shader_core_props_amd.wavefrontsPerSimd == 20) {
return vk_device_architecture::AMD_RDNA1;
}
if (integer_dot_props.integerDotProduct4x8BitPackedMixedSignednessAccelerated) {
if (integer_dot_mixed_signedness) {
return vk_device_architecture::AMD_RDNA3;
}
return vk_device_architecture::AMD_RDNA2;
Expand Down