Skip to content

Commit 598d41e

Browse files
committed
Adding tuned instace list for groupoed conv fwd
Following flavors are updated with tuned instance list: - grouped_conv2d_fwd - grouped_conv2d_fwd_bias_clamp - grouped_conv2d_fwd_clamp - grouped_conv3d_fwd - grouped_conv3d_fwd_bias_clamp - grouped_conv3d_fwd_clamp - grouped_conv3d_fwd_scaleadd_ab Re-factored instance selection: - removed all the unnecessary instance tuples (comp/mem/16x16/generic) - removed all unnecessary layouts and data types
1 parent 294e14b commit 598d41e

File tree

130 files changed

+145
-9323
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

130 files changed

+145
-9323
lines changed

library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_comp_instance.hpp

Lines changed: 0 additions & 159 deletions
This file was deleted.

library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_instance.hpp

Lines changed: 60 additions & 373 deletions
Large diffs are not rendered by default.

library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_mem_instance.hpp

Lines changed: 0 additions & 131 deletions
This file was deleted.

library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_merged_groups_instance.hpp

Lines changed: 0 additions & 125 deletions
This file was deleted.

library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_scaleadd_ab_instance.hpp

Lines changed: 70 additions & 38 deletions
Large diffs are not rendered by default.

library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward.hpp

Lines changed: 0 additions & 332 deletions
Large diffs are not rendered by default.

library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_bias_clamp.hpp

Lines changed: 0 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -287,18 +287,8 @@ struct DeviceOperationInstanceFactory<ck::tensor_operation::device::DeviceGroupe
287287
{
288288
add_device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instances(
289289
op_ptrs);
290-
add_device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_16x16_instances(
291-
op_ptrs);
292290
// add_device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instances(
293291
// op_ptrs);
294-
add_device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_merged_groups_nhwgc_gkyxc_nhwgk_bf16_instances(
295-
op_ptrs);
296-
add_device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_comp_instances(
297-
op_ptrs);
298-
add_device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances(
299-
op_ptrs);
300-
add_device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances(
301-
op_ptrs);
302292
}
303293
#endif
304294
#ifdef CK_ENABLE_FP16
@@ -308,18 +298,8 @@ struct DeviceOperationInstanceFactory<ck::tensor_operation::device::DeviceGroupe
308298
{
309299
add_device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instances(
310300
op_ptrs);
311-
add_device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_16x16_instances(
312-
op_ptrs);
313301
// add_device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_f16_instances(
314302
// op_ptrs);
315-
add_device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_merged_groups_nhwgc_gkyxc_nhwgk_f16_instances(
316-
op_ptrs);
317-
add_device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_comp_instances(
318-
op_ptrs);
319-
add_device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances(
320-
op_ptrs);
321-
add_device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances(
322-
op_ptrs);
323303
}
324304
#endif
325305
}
@@ -336,18 +316,8 @@ struct DeviceOperationInstanceFactory<ck::tensor_operation::device::DeviceGroupe
336316
{
337317
add_device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instances(
338318
op_ptrs);
339-
add_device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instances(
340-
op_ptrs);
341319
// add_device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instances(
342320
// op_ptrs);
343-
add_device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_merged_groups_ndhwgc_gkzyxc_ndhwgk_bf16_instances(
344-
op_ptrs);
345-
add_device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances(
346-
op_ptrs);
347-
add_device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances(
348-
op_ptrs);
349-
add_device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances(
350-
op_ptrs);
351321
}
352322
#endif
353323
#ifdef CK_ENABLE_FP16
@@ -357,18 +327,8 @@ struct DeviceOperationInstanceFactory<ck::tensor_operation::device::DeviceGroupe
357327
{
358328
add_device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instances(
359329
op_ptrs);
360-
add_device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_16x16_instances(
361-
op_ptrs);
362330
// add_device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_instances(
363331
// op_ptrs);
364-
add_device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_merged_groups_ndhwgc_gkzyxc_ndhwgk_f16_instances(
365-
op_ptrs);
366-
add_device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances(
367-
op_ptrs);
368-
add_device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances(
369-
op_ptrs);
370-
add_device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances(
371-
op_ptrs);
372332
}
373333
#endif
374334
}

0 commit comments

Comments
 (0)