diff --git a/docs/build.md b/docs/build.md index dcbcce7549a..c277aac82fd 100644 --- a/docs/build.md +++ b/docs/build.md @@ -413,6 +413,27 @@ Finally, after finishing your build, you should be able to do something like thi # ggml_vulkan: Using Intel(R) Graphics (ADL GT2) | uma: 1 | fp16: 1 | warp size: 32 ``` +#### Targeting AMD GCN (GFX8/Polaris) Vulkan devices + +Older AMD GPUs such as the RX 580 expose fast FP32 wave64 execution but lack +cooperative-matrix, integer-dot and bfloat16 shader support. You can avoid +building unusable shader families by forcing their GLSL compiler probes off +when configuring CMake: + +```bash +cmake -B build-gfx803 -DGGML_VULKAN=ON \ + -DGGML_VULKAN_FORCE_COOPMAT_GLSLC_SUPPORT=OFF \ + -DGGML_VULKAN_FORCE_COOPMAT2_GLSLC_SUPPORT=OFF \ + -DGGML_VULKAN_FORCE_INTEGER_DOT_GLSLC_SUPPORT=OFF \ + -DGGML_VULKAN_FORCE_BFLOAT16_GLSLC_SUPPORT=OFF +cmake --build build-gfx803 --config Release +``` + +At runtime, keep execution on the FP32-oriented code paths by exporting +`GGML_VK_DISABLE_F16=1`. Polaris-class GPUs also benefit from the existing +`GGML_VK_FORCE_MMVQ=1` flag when profiling shows that matrix–matrix–vector +quantization remains faster than the fallback kernels. + ## CANN This provides NPU acceleration using the AI cores of your Ascend NPU. And [CANN](https://www.hiascend.com/en/software/cann) is a hierarchical APIs to help you to quickly build AI applications and service based on Ascend NPU. diff --git a/ggml/src/ggml-vulkan/CMakeLists.txt b/ggml/src/ggml-vulkan/CMakeLists.txt index b97e7bf9955..c93d51d51e2 100644 --- a/ggml/src/ggml-vulkan/CMakeLists.txt +++ b/ggml/src/ggml-vulkan/CMakeLists.txt @@ -3,6 +3,40 @@ cmake_policy(SET CMP0114 NEW) find_package(Vulkan COMPONENTS glslc REQUIRED) +set(GGML_VULKAN_FORCE_COOPMAT_GLSLC_SUPPORT "" CACHE STRING "Force GL_KHR_cooperative_matrix GLSL compiler support detection (use ON, OFF, or leave empty for auto)") +set(GGML_VULKAN_FORCE_COOPMAT2_GLSLC_SUPPORT "" CACHE STRING "Force GL_NV_cooperative_matrix2 GLSL compiler support detection (use ON, OFF, or leave empty for auto)") +set(GGML_VULKAN_FORCE_INTEGER_DOT_GLSLC_SUPPORT "" CACHE STRING "Force GL_EXT_integer_dot_product GLSL compiler support detection (use ON, OFF, or leave empty for auto)") +set(GGML_VULKAN_FORCE_BFLOAT16_GLSLC_SUPPORT "" CACHE STRING "Force GL_EXT_bfloat16 GLSL compiler support detection (use ON, OFF, or leave empty for auto)") + +mark_as_advanced( + GGML_VULKAN_FORCE_COOPMAT_GLSLC_SUPPORT + GGML_VULKAN_FORCE_COOPMAT2_GLSLC_SUPPORT + GGML_VULKAN_FORCE_INTEGER_DOT_GLSLC_SUPPORT + GGML_VULKAN_FORCE_BFLOAT16_GLSLC_SUPPORT +) + +function(handle_forced_shader_extension EXTENSION_NAME RESULT_VARIABLE FORCE_VARIABLE) + if (NOT "${${FORCE_VARIABLE}}" STREQUAL "") + string(TOUPPER "${${FORCE_VARIABLE}}" FORCE_VALUE_UPPER) + if (FORCE_VALUE_UPPER STREQUAL "ON") + message(STATUS "${EXTENSION_NAME} support forced ON") + set(${RESULT_VARIABLE} ON) + add_compile_definitions(${RESULT_VARIABLE}) + list(APPEND VULKAN_SHADER_GEN_CMAKE_ARGS -D${RESULT_VARIABLE}=ON) + set(VULKAN_SHADER_GEN_CMAKE_ARGS "${VULKAN_SHADER_GEN_CMAKE_ARGS}" PARENT_SCOPE) + set(${RESULT_VARIABLE} ON PARENT_SCOPE) + return() + elseif (FORCE_VALUE_UPPER STREQUAL "OFF") + message(STATUS "${EXTENSION_NAME} support forced OFF") + set(${RESULT_VARIABLE} OFF) + set(${RESULT_VARIABLE} OFF PARENT_SCOPE) + return() + else() + message(FATAL_ERROR "Invalid value for ${FORCE_VARIABLE}: ${${FORCE_VARIABLE}}. Expected ON, OFF, or empty.") + endif() + endif() +endfunction() + function(detect_host_compiler) if (CMAKE_HOST_SYSTEM_NAME STREQUAL "Windows") find_program(HOST_C_COMPILER NAMES cl gcc clang NO_CMAKE_FIND_ROOT_PATH) @@ -52,29 +86,61 @@ if (Vulkan_FOUND) set(VULKAN_SHADER_GEN_CMAKE_ARGS "") # Test all shader extensions - test_shader_extension_support( - "GL_KHR_cooperative_matrix" - "${CMAKE_CURRENT_SOURCE_DIR}/vulkan-shaders/test_coopmat_support.comp" - "GGML_VULKAN_COOPMAT_GLSLC_SUPPORT" - ) + if (NOT "${GGML_VULKAN_FORCE_COOPMAT_GLSLC_SUPPORT}" MATCHES "^(|[Aa][Uu][Tt][Oo])$") + handle_forced_shader_extension( + "GL_KHR_cooperative_matrix" + "GGML_VULKAN_COOPMAT_GLSLC_SUPPORT" + "GGML_VULKAN_FORCE_COOPMAT_GLSLC_SUPPORT" + ) + else() + test_shader_extension_support( + "GL_KHR_cooperative_matrix" + "${CMAKE_CURRENT_SOURCE_DIR}/vulkan-shaders/test_coopmat_support.comp" + "GGML_VULKAN_COOPMAT_GLSLC_SUPPORT" + ) + endif() - test_shader_extension_support( - "GL_NV_cooperative_matrix2" - "${CMAKE_CURRENT_SOURCE_DIR}/vulkan-shaders/test_coopmat2_support.comp" - "GGML_VULKAN_COOPMAT2_GLSLC_SUPPORT" - ) + if (NOT "${GGML_VULKAN_FORCE_COOPMAT2_GLSLC_SUPPORT}" MATCHES "^(|[Aa][Uu][Tt][Oo])$") + handle_forced_shader_extension( + "GL_NV_cooperative_matrix2" + "GGML_VULKAN_COOPMAT2_GLSLC_SUPPORT" + "GGML_VULKAN_FORCE_COOPMAT2_GLSLC_SUPPORT" + ) + else() + test_shader_extension_support( + "GL_NV_cooperative_matrix2" + "${CMAKE_CURRENT_SOURCE_DIR}/vulkan-shaders/test_coopmat2_support.comp" + "GGML_VULKAN_COOPMAT2_GLSLC_SUPPORT" + ) + endif() - test_shader_extension_support( - "GL_EXT_integer_dot_product" - "${CMAKE_CURRENT_SOURCE_DIR}/vulkan-shaders/test_integer_dot_support.comp" - "GGML_VULKAN_INTEGER_DOT_GLSLC_SUPPORT" - ) + if (NOT "${GGML_VULKAN_FORCE_INTEGER_DOT_GLSLC_SUPPORT}" MATCHES "^(|[Aa][Uu][Tt][Oo])$") + handle_forced_shader_extension( + "GL_EXT_integer_dot_product" + "GGML_VULKAN_INTEGER_DOT_GLSLC_SUPPORT" + "GGML_VULKAN_FORCE_INTEGER_DOT_GLSLC_SUPPORT" + ) + else() + test_shader_extension_support( + "GL_EXT_integer_dot_product" + "${CMAKE_CURRENT_SOURCE_DIR}/vulkan-shaders/test_integer_dot_support.comp" + "GGML_VULKAN_INTEGER_DOT_GLSLC_SUPPORT" + ) + endif() - test_shader_extension_support( - "GL_EXT_bfloat16" - "${CMAKE_CURRENT_SOURCE_DIR}/vulkan-shaders/test_bfloat16_support.comp" - "GGML_VULKAN_BFLOAT16_GLSLC_SUPPORT" - ) + if (NOT "${GGML_VULKAN_FORCE_BFLOAT16_GLSLC_SUPPORT}" MATCHES "^(|[Aa][Uu][Tt][Oo])$") + handle_forced_shader_extension( + "GL_EXT_bfloat16" + "GGML_VULKAN_BFLOAT16_GLSLC_SUPPORT" + "GGML_VULKAN_FORCE_BFLOAT16_GLSLC_SUPPORT" + ) + else() + test_shader_extension_support( + "GL_EXT_bfloat16" + "${CMAKE_CURRENT_SOURCE_DIR}/vulkan-shaders/test_bfloat16_support.comp" + "GGML_VULKAN_BFLOAT16_GLSLC_SUPPORT" + ) + endif() target_link_libraries(ggml-vulkan PRIVATE Vulkan::Vulkan) target_include_directories(ggml-vulkan PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index ebbb412e55f..e3f6c2a78cb 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -257,30 +257,58 @@ static vk_device_architecture get_device_architecture(const vk::PhysicalDevice& } } - if (!amd_shader_core_properties || !integer_dot_product || !subgroup_size_control) { + if (!amd_shader_core_properties) { return vk_device_architecture::OTHER; } vk::PhysicalDeviceProperties2 props2; vk::PhysicalDeviceShaderCorePropertiesAMD shader_core_props_amd; - vk::PhysicalDeviceShaderIntegerDotProductPropertiesKHR integer_dot_props; - vk::PhysicalDeviceSubgroupSizeControlPropertiesEXT subgroup_size_control_props; + vk::PhysicalDeviceSubgroupProperties subgroup_props; + shader_core_props_amd.pNext = &subgroup_props; + subgroup_props.pNext = nullptr; props2.pNext = &shader_core_props_amd; - shader_core_props_amd.pNext = &integer_dot_props; - integer_dot_props.pNext = &subgroup_size_control_props; device.getProperties2(&props2); - if (subgroup_size_control_props.maxSubgroupSize == 64 && subgroup_size_control_props.minSubgroupSize == 64) { + uint32_t max_subgroup_size = subgroup_props.subgroupSize; + + if (subgroup_size_control) { + vk::PhysicalDeviceProperties2 subgroup_size_props; + vk::PhysicalDeviceSubgroupSizeControlPropertiesEXT subgroup_size_control_props; + + subgroup_size_control_props.pNext = nullptr; + subgroup_size_props.pNext = &subgroup_size_control_props; + + device.getProperties2(&subgroup_size_props); + + max_subgroup_size = subgroup_size_control_props.maxSubgroupSize; + } + + bool integer_dot_mixed_signedness = false; + if (integer_dot_product) { + vk::PhysicalDeviceProperties2 integer_dot_props2; + vk::PhysicalDeviceShaderIntegerDotProductPropertiesKHR integer_dot_props; + + integer_dot_props.pNext = nullptr; + integer_dot_props2.pNext = &integer_dot_props; + + device.getProperties2(&integer_dot_props2); + + integer_dot_mixed_signedness = integer_dot_props.integerDotProduct4x8BitPackedMixedSignednessAccelerated; + } + + const uint32_t wavefront_size = shader_core_props_amd.wavefrontSize; + + if (wavefront_size == 64 && max_subgroup_size >= 64) { return vk_device_architecture::AMD_GCN; } - if (subgroup_size_control_props.maxSubgroupSize == 64 && subgroup_size_control_props.minSubgroupSize == 32) { - // RDNA + + if (wavefront_size == 32) { if (shader_core_props_amd.wavefrontsPerSimd == 20) { return vk_device_architecture::AMD_RDNA1; } - if (integer_dot_props.integerDotProduct4x8BitPackedMixedSignednessAccelerated) { + if (integer_dot_mixed_signedness) { return vk_device_architecture::AMD_RDNA3; } return vk_device_architecture::AMD_RDNA2;