From e30399e303b3be11a07e3a80e80bb8d39c6b3fd9 Mon Sep 17 00:00:00 2001 From: Krzysztof Drewniak Date: Sat, 6 Dec 2025 00:49:49 +0000 Subject: [PATCH 1/2] [AMDGPU] Add argument range annotations to intrinsics where applicable This commit adds annotations to AMDGPU intrinscis that take arguments which are documented to lie within a specified range, ensuring that invalid instances of these intrinsics don't pass verification. (Note that certain intrinsics that could have range annothations don't, as their existing behavior is to clame out-of-range values silently.) --- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 91d72d5ef9dfc..03488f8389aa2 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -653,19 +653,20 @@ def int_amdgcn_cvt_pk_bf8_f16 // byte_sel selects byte to write in vdst. def int_amdgcn_cvt_sr_fp8_f16 : DefaultAttrsIntrinsic< [llvm_i32_ty], [llvm_half_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, ImmArg>] + [IntrNoMem, IntrSpeculatable, ImmArg>, Range, 0, 4>] >, ClangBuiltin<"__builtin_amdgcn_cvt_sr_fp8_f16">; // llvm.amdgcn.cvt.sr.bf8.f16 i32 vdst, half src, i32 seed, i32 old, imm byte_sel [0..3] // byte_sel selects byte to write in vdst. def int_amdgcn_cvt_sr_bf8_f16 : DefaultAttrsIntrinsic< [llvm_i32_ty], [llvm_half_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, ImmArg>] + [IntrNoMem, IntrSpeculatable, ImmArg>, Range, 0, 4>] >, ClangBuiltin<"__builtin_amdgcn_cvt_sr_bf8_f16">; // llvm.amdgcn.cvt.scale.pk32.f16.bf6 v32f16 vdst, v6i32 src0, i32 scale_sel [0..15] class AMDGPUCvtScaleIntrinsic : DefaultAttrsIntrinsic< - [DstTy], [Src0Ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable, ImmArg>] + [DstTy], [Src0Ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable, ImmArg>, Range, 0, 16>] >, ClangBuiltin<"__builtin_amdgcn_"#name>; class AMDGPUCvtScaleF32Intrinsic : DefaultAttrsIntrinsic< @@ -746,7 +747,8 @@ class AMDGPUCvtScaleFP4FP8BF8ToF1632Intrinsic : Def [llvm_i32_ty, // src llvm_float_ty, // scale llvm_i32_ty], // src_sel index [0..3] - [IntrNoMem, ImmArg>] + [IntrNoMem, + ImmArg>, Range, 0, 4>] >, ClangBuiltin<"__builtin_amdgcn_"#name>; class AMDGPUCvtScale_pk_FromFP8BF8Intrinsic : DefaultAttrsIntrinsic< @@ -783,7 +785,7 @@ class AMDGPUCvtScaleFP8BF8ToF16TiedInputIntrinsic : llvm_float_ty, // scale llvm_i32_ty, // src_sel_index[0..3] llvm_i1_ty], // dst_lo_hi_sel[true false] - [IntrNoMem, ImmArg>, ImmArg>] + [IntrNoMem, ImmArg>, Range, 0, 4>, ImmArg>] >, ClangBuiltin<"__builtin_amdgcn_"#name>; class AMDGPUCvtScaleF32ToFP4Intrinsic : DefaultAttrsIntrinsic< @@ -793,7 +795,7 @@ class AMDGPUCvtScaleF32ToFP4Intrinsic : DefaultAttrsIntrinsic< llvm_float_ty, // src1 llvm_float_ty, // scale llvm_i32_ty], // dst_sel_index[0..3] - [IntrNoMem, ImmArg>] + [IntrNoMem, ImmArg>, Range, 0, 4>] >, ClangBuiltin<"__builtin_amdgcn_"#name>; class AMDGPUCvtScaleF16ToFP4TiedInputIntrinsic : DefaultAttrsIntrinsic< @@ -802,7 +804,7 @@ class AMDGPUCvtScaleF16ToFP4TiedInputIntrinsic : De SrcTy, // src llvm_float_ty, // scale llvm_i32_ty], // dest_sel_index [0..3] - [IntrNoMem, ImmArg>] + [IntrNoMem, ImmArg>, Range, 0, 4>] >, ClangBuiltin<"__builtin_amdgcn_"#name>; class AMDGPUCvtScaleBF16F16F32SRToFP4BF8F8TiedInputIntrinsic : DefaultAttrsIntrinsic< @@ -812,7 +814,7 @@ class AMDGPUCvtScaleBF16F16F32SRToFP4BF8F8TiedInputIntrinsic>] + [IntrNoMem, ImmArg>, Range, 0, 4>] >, ClangBuiltin<"__builtin_amdgcn_"#name>; class AMDGPUCvtScaleSRF32ToBF16F16TiedInputIntrinsic : DefaultAttrsIntrinsic< From c48996443e217cc9ab0381d3e4f22f5cb9dd8309 Mon Sep 17 00:00:00 2001 From: Krzysztof Drewniak Date: Mon, 8 Dec 2025 17:19:30 +0000 Subject: [PATCH 2/2] Add tests (LLM generated) --- .../AMDGPU/test-cvt-fp4f6f8-immarg-ranges.ll | 147 ++++++++++++++++++ 1 file changed, 147 insertions(+) create mode 100644 llvm/test/Verifier/AMDGPU/test-cvt-fp4f6f8-immarg-ranges.ll diff --git a/llvm/test/Verifier/AMDGPU/test-cvt-fp4f6f8-immarg-ranges.ll b/llvm/test/Verifier/AMDGPU/test-cvt-fp4f6f8-immarg-ranges.ll new file mode 100644 index 0000000000000..4e8022711320d --- /dev/null +++ b/llvm/test/Verifier/AMDGPU/test-cvt-fp4f6f8-immarg-ranges.ll @@ -0,0 +1,147 @@ +; RUN: not llvm-as %s -disable-output 2>&1 | FileCheck %s + +; -------------------------------------------------------------------- +; llvm.amdgcn.cvt.sr.fp8.f16 - byte_sel out of range +; -------------------------------------------------------------------- + +; CHECK: immarg value 4 out of range [0, 4) +; CHECK-NEXT: %result = call i32 @llvm.amdgcn.cvt.sr.fp8.f16(half %src, i32 %seed, i32 %old, i32 4) +define i32 @test_cvt_sr_fp8_f16_byte_sel_out_of_range(half %src, i32 %seed, i32 %old) { + %result = call i32 @llvm.amdgcn.cvt.sr.fp8.f16(half %src, i32 %seed, i32 %old, i32 4) + ret i32 %result +} + +; CHECK: immarg value 10 out of range [0, 4) +; CHECK-NEXT: %result = call i32 @llvm.amdgcn.cvt.sr.fp8.f16(half %src, i32 %seed, i32 %old, i32 10) +define i32 @test_cvt_sr_fp8_f16_byte_sel_way_out_of_range(half %src, i32 %seed, i32 %old) { + %result = call i32 @llvm.amdgcn.cvt.sr.fp8.f16(half %src, i32 %seed, i32 %old, i32 10) + ret i32 %result +} + +; -------------------------------------------------------------------- +; llvm.amdgcn.cvt.sr.bf8.f16 - byte_sel out of range +; -------------------------------------------------------------------- + +; CHECK: immarg value 4 out of range [0, 4) +; CHECK-NEXT: %result = call i32 @llvm.amdgcn.cvt.sr.bf8.f16(half %src, i32 %seed, i32 %old, i32 4) +define i32 @test_cvt_sr_bf8_f16_byte_sel_out_of_range(half %src, i32 %seed, i32 %old) { + %result = call i32 @llvm.amdgcn.cvt.sr.bf8.f16(half %src, i32 %seed, i32 %old, i32 4) + ret i32 %result +} + +; -------------------------------------------------------------------- +; llvm.amdgcn.cvt.scale.pk8.f16.fp8 - scale_sel out of range +; -------------------------------------------------------------------- + +; CHECK: immarg value 16 out of range [0, 16) +; CHECK-NEXT: %result = call <8 x half> @llvm.amdgcn.cvt.scale.pk8.f16.fp8(<2 x i32> %src, i32 0, i32 16) +define <8 x half> @test_cvt_scale_pk8_f16_fp8_scale_sel_out_of_range(<2 x i32> %src) { + %result = call <8 x half> @llvm.amdgcn.cvt.scale.pk8.f16.fp8(<2 x i32> %src, i32 0, i32 16) + ret <8 x half> %result +} + +; CHECK: immarg value 100 out of range [0, 16) +; CHECK-NEXT: %result = call <8 x half> @llvm.amdgcn.cvt.scale.pk8.f16.fp8(<2 x i32> %src, i32 0, i32 100) +define <8 x half> @test_cvt_scale_pk8_f16_fp8_scale_sel_way_out_of_range(<2 x i32> %src) { + %result = call <8 x half> @llvm.amdgcn.cvt.scale.pk8.f16.fp8(<2 x i32> %src, i32 0, i32 100) + ret <8 x half> %result +} + +; -------------------------------------------------------------------- +; llvm.amdgcn.cvt.scalef32.f32.fp8 - src_sel out of range +; -------------------------------------------------------------------- + +; CHECK: immarg value 4 out of range [0, 4) +; CHECK-NEXT: %result = call float @llvm.amdgcn.cvt.scalef32.f32.fp8(i32 %src, float %scale, i32 4) +define float @test_cvt_scalef32_f32_fp8_src_sel_out_of_range(i32 %src, float %scale) { + %result = call float @llvm.amdgcn.cvt.scalef32.f32.fp8(i32 %src, float %scale, i32 4) + ret float %result +} + +; CHECK: immarg value 7 out of range [0, 4) +; CHECK-NEXT: %result = call float @llvm.amdgcn.cvt.scalef32.f32.fp8(i32 %src, float %scale, i32 7) +define float @test_cvt_scalef32_f32_fp8_src_sel_way_out_of_range(i32 %src, float %scale) { + %result = call float @llvm.amdgcn.cvt.scalef32.f32.fp8(i32 %src, float %scale, i32 7) + ret float %result +} + +; -------------------------------------------------------------------- +; llvm.amdgcn.cvt.scalef32.f16.fp8 - src_sel_index out of range +; -------------------------------------------------------------------- + +; CHECK: immarg value 4 out of range [0, 4) +; CHECK-NEXT: %result = call <2 x half> @llvm.amdgcn.cvt.scalef32.f16.fp8(<2 x half> %old, i32 %src, float %scale, i32 4, i1 false) +define <2 x half> @test_cvt_scalef32_f16_fp8_src_sel_index_out_of_range(<2 x half> %old, i32 %src, float %scale) { + %result = call <2 x half> @llvm.amdgcn.cvt.scalef32.f16.fp8(<2 x half> %old, i32 %src, float %scale, i32 4, i1 false) + ret <2 x half> %result +} + +; CHECK: immarg value 15 out of range [0, 4) +; CHECK-NEXT: %result = call <2 x half> @llvm.amdgcn.cvt.scalef32.f16.fp8(<2 x half> %old, i32 %src, float %scale, i32 15, i1 true) +define <2 x half> @test_cvt_scalef32_f16_fp8_src_sel_index_way_out_of_range(<2 x half> %old, i32 %src, float %scale) { + %result = call <2 x half> @llvm.amdgcn.cvt.scalef32.f16.fp8(<2 x half> %old, i32 %src, float %scale, i32 15, i1 true) + ret <2 x half> %result +} + +; -------------------------------------------------------------------- +; llvm.amdgcn.cvt.scalef32.pk.fp4.f32 - dst_sel_index out of range +; -------------------------------------------------------------------- + +; CHECK: immarg value 4 out of range [0, 4) +; CHECK-NEXT: %result = call i32 @llvm.amdgcn.cvt.scalef32.pk.fp4.f32(i32 %old, float %src0, float %src1, float %scale, i32 4) +define i32 @test_cvt_scalef32_pk_fp4_f32_dst_sel_index_out_of_range(i32 %old, float %src0, float %src1, float %scale) { + %result = call i32 @llvm.amdgcn.cvt.scalef32.pk.fp4.f32(i32 %old, float %src0, float %src1, float %scale, i32 4) + ret i32 %result +} + +; CHECK: immarg value 8 out of range [0, 4) +; CHECK-NEXT: %result = call i32 @llvm.amdgcn.cvt.scalef32.pk.fp4.f32(i32 %old, float %src0, float %src1, float %scale, i32 8) +define i32 @test_cvt_scalef32_pk_fp4_f32_dst_sel_index_way_out_of_range(i32 %old, float %src0, float %src1, float %scale) { + %result = call i32 @llvm.amdgcn.cvt.scalef32.pk.fp4.f32(i32 %old, float %src0, float %src1, float %scale, i32 8) + ret i32 %result +} + +; -------------------------------------------------------------------- +; llvm.amdgcn.cvt.scalef32.pk.fp4.f16 - dest_sel_index out of range +; -------------------------------------------------------------------- + +; CHECK: immarg value 4 out of range [0, 4) +; CHECK-NEXT: %result = call i32 @llvm.amdgcn.cvt.scalef32.pk.fp4.f16(i32 %old, <2 x half> %src, float %scale, i32 4) +define i32 @test_cvt_scalef32_pk_fp4_f16_dest_sel_index_out_of_range(i32 %old, <2 x half> %src, float %scale) { + %result = call i32 @llvm.amdgcn.cvt.scalef32.pk.fp4.f16(i32 %old, <2 x half> %src, float %scale, i32 4) + ret i32 %result +} + +; CHECK: immarg value 12 out of range [0, 4) +; CHECK-NEXT: %result = call i32 @llvm.amdgcn.cvt.scalef32.pk.fp4.f16(i32 %old, <2 x half> %src, float %scale, i32 12) +define i32 @test_cvt_scalef32_pk_fp4_f16_dest_sel_index_way_out_of_range(i32 %old, <2 x half> %src, float %scale) { + %result = call i32 @llvm.amdgcn.cvt.scalef32.pk.fp4.f16(i32 %old, <2 x half> %src, float %scale, i32 12) + ret i32 %result +} + +; -------------------------------------------------------------------- +; llvm.amdgcn.cvt.scalef32.sr.pk.fp4.f16 - dst_sel_index out of range +; -------------------------------------------------------------------- + +; CHECK: immarg value 4 out of range [0, 4) +; CHECK-NEXT: %result = call i32 @llvm.amdgcn.cvt.scalef32.sr.pk.fp4.f16(i32 %old, <2 x half> %src, i32 %seed, float %scale, i32 4) +define i32 @test_cvt_scalef32_sr_pk_fp4_f16_dst_sel_index_out_of_range(i32 %old, <2 x half> %src, i32 %seed, float %scale) { + %result = call i32 @llvm.amdgcn.cvt.scalef32.sr.pk.fp4.f16(i32 %old, <2 x half> %src, i32 %seed, float %scale, i32 4) + ret i32 %result +} + +; CHECK: immarg value 9 out of range [0, 4) +; CHECK-NEXT: %result = call i32 @llvm.amdgcn.cvt.scalef32.sr.pk.fp4.f16(i32 %old, <2 x half> %src, i32 %seed, float %scale, i32 9) +define i32 @test_cvt_scalef32_sr_pk_fp4_f16_dst_sel_index_way_out_of_range(i32 %old, <2 x half> %src, i32 %seed, float %scale) { + %result = call i32 @llvm.amdgcn.cvt.scalef32.sr.pk.fp4.f16(i32 %old, <2 x half> %src, i32 %seed, float %scale, i32 9) + ret i32 %result +} + +declare i32 @llvm.amdgcn.cvt.sr.fp8.f16(half, i32, i32, i32) +declare i32 @llvm.amdgcn.cvt.sr.bf8.f16(half, i32, i32, i32) +declare <8 x half> @llvm.amdgcn.cvt.scale.pk8.f16.fp8(<2 x i32>, i32, i32) +declare float @llvm.amdgcn.cvt.scalef32.f32.fp8(i32, float, i32) +declare <2 x half> @llvm.amdgcn.cvt.scalef32.f16.fp8(<2 x half>, i32, float, i32, i1) +declare i32 @llvm.amdgcn.cvt.scalef32.pk.fp4.f32(i32, float, float, float, i32) +declare i32 @llvm.amdgcn.cvt.scalef32.pk.fp4.f16(i32, <2 x half>, float, i32) +declare i32 @llvm.amdgcn.cvt.scalef32.sr.pk.fp4.f16(i32, <2 x half>, i32, float, i32)