Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions ggml/src/ggml-vulkan/ggml-vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3703,6 +3703,7 @@ static vk_device ggml_vk_get_device(size_t idx) {
bool fp16_compute = false;
bool maintenance4_support = false;
bool sm_builtins = false;
bool amd_shader_core_properties = false;
bool amd_shader_core_properties2 = false;
bool pipeline_robustness = false;
bool coopmat2_support = false;
Expand All @@ -3720,6 +3721,8 @@ static vk_device ggml_vk_get_device(size_t idx) {
fp16_compute = true;
} else if (strcmp("VK_NV_shader_sm_builtins", properties.extensionName) == 0) {
sm_builtins = true;
} else if (strcmp("VK_AMD_shader_core_properties", properties.extensionName) == 0) {
amd_shader_core_properties = true;
} else if (strcmp("VK_AMD_shader_core_properties2", properties.extensionName) == 0) {
amd_shader_core_properties2 = true;
} else if (strcmp("VK_EXT_pipeline_robustness", properties.extensionName) == 0) {
Expand Down Expand Up @@ -3760,6 +3763,7 @@ static vk_device ggml_vk_get_device(size_t idx) {
vk::PhysicalDeviceSubgroupProperties subgroup_props;
vk::PhysicalDeviceDriverProperties driver_props;
vk::PhysicalDeviceShaderSMBuiltinsPropertiesNV sm_props;
vk::PhysicalDeviceShaderCorePropertiesAMD amd_shader_core_properties_props;
vk::PhysicalDeviceShaderCoreProperties2AMD amd_shader_core_properties2_props;
vk::PhysicalDeviceVulkan11Properties vk11_props;
vk::PhysicalDeviceVulkan12Properties vk12_props;
Expand All @@ -3782,6 +3786,10 @@ static vk_device ggml_vk_get_device(size_t idx) {
last_struct->pNext = (VkBaseOutStructure *)&sm_props;
last_struct = (VkBaseOutStructure *)&sm_props;
}
if (amd_shader_core_properties) {
last_struct->pNext = (VkBaseOutStructure *)&amd_shader_core_properties_props;
last_struct = (VkBaseOutStructure *)&amd_shader_core_properties_props;
}
if (amd_shader_core_properties2) {
last_struct->pNext = (VkBaseOutStructure *)&amd_shader_core_properties2_props;
last_struct = (VkBaseOutStructure *)&amd_shader_core_properties2_props;
Expand Down Expand Up @@ -3838,6 +3846,13 @@ static vk_device ggml_vk_get_device(size_t idx) {
} else {
device->shader_core_count = 0;
}
if (device->shader_core_count == 0 && amd_shader_core_properties && !amd_shader_core_properties2) {
const uint32_t shader_engine_count = amd_shader_core_properties_props.shaderEngineCount;
const uint32_t shader_arrays_per_engine = amd_shader_core_properties_props.shaderArraysPerEngineCount;
const uint32_t compute_units_per_array = amd_shader_core_properties_props.computeUnitsPerShaderArray;
const uint32_t total_compute_units = shader_engine_count * shader_arrays_per_engine * compute_units_per_array;
device->shader_core_count = total_compute_units;
}
device->float_controls_rte_fp16 = vk12_props.shaderRoundingModeRTEFloat16;

device->subgroup_arithmetic = (vk11_props.subgroupSupportedStages & vk::ShaderStageFlagBits::eCompute) &&
Expand Down