[clang] [llvm] [AArch64][SME] Fix multi vector cvt builtins (PR #77656)

Matthew Devereau via cfe-commits Thu, 11 Jan 2024 05:47:37 -0800

================
@@ -34,118 +34,118 @@ define <vscale x 8 x bfloat> 
@multi_vector_cvt_x2_bf16(<vscale x 4 x float> %unu
 ;
 ; FCVTZS
 ;
-define {<vscale x 4 x float>, <vscale x 4 x float>}  
@multi_vector_cvt_x2_f32_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> 
%zn0, <vscale x 4 x i32> %zn1) {
-; CHECK-LABEL: multi_vector_cvt_x2_f32_s32:
+define {<vscale x 4 x i32>, <vscale x 4 x i32>}  
@multi_vector_cvt_x2_s32_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> 
%zn0, <vscale x 4 x float> %zn1) {
+; CHECK-LABEL: multi_vector_cvt_x2_s32_f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z3.d, z2.d
 ; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    fcvtzs { z0.s, z1.s }, { z2.s, z3.s }
 ; CHECK-NEXT:    ret
-  %res = call {<vscale x 4 x float>, <vscale x 4 x float>} 
@llvm.aarch64.sve.fcvts.x2.nxv4f32(<vscale x 4 x i32>%zn0, <vscale x 4 x 
i32>%zn1)
-  ret {<vscale x 4 x float>, <vscale x 4 x float>} %res
+  %res = call {<vscale x 4 x i32>, <vscale x 4 x i32>} 
@llvm.aarch64.sve.fcvts.x2.nxv4f32(<vscale x 4 x float> %zn0, <vscale x 4 x 
float> %zn1)
+  ret {<vscale x 4 x i32>, <vscale x 4 x i32>} %res
 }
 
-define {<vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, 
<vscale x 4 x float>}  @multi_vector_cvt_x4_f32_s32(<vscale x 4 x i32> %unused, 
<vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, 
<vscale x 4 x i32> %zn3) {
-; CHECK-LABEL: multi_vector_cvt_x4_f32_s32:
+define {<vscale x 4 x i32>, <vscale x 4 x i32>,<vscale x 4 x i32>, <vscale x 4 
x i32>}  @multi_vector_cvt_x4_s32_f32(<vscale x 4 x float> %unused, <vscale x 4 
x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 
4 x float> %zn3) {
+; CHECK-LABEL: multi_vector_cvt_x4_s32_f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z7.d, z4.d
 ; CHECK-NEXT:    mov z6.d, z3.d
 ; CHECK-NEXT:    mov z5.d, z2.d
 ; CHECK-NEXT:    mov z4.d, z1.d
 ; CHECK-NEXT:    fcvtzs { z0.s - z3.s }, { z4.s - z7.s }
 ; CHECK-NEXT:    ret
-  %res = call {<vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x 
float>, <vscale x 4 x float>} @llvm.aarch64.sve.fcvts.x4.nxv4f32(<vscale x 4 x 
i32>%zn0, <vscale x 4 x i32>%zn1, <vscale x 4 x i32>%zn2, <vscale x 4 x 
i32>%zn3)
-  ret {<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, 
<vscale x 4 x float>} %res
+  %res = call {<vscale x 4 x i32>, <vscale x 4 x i32>,<vscale x 4 x i32>, 
<vscale x 4 x i32>} @llvm.aarch64.sve.fcvts.x4.nxv4f32(<vscale x 4 x float> 
%zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x 
float> %zn3)
+  ret {<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 
x i32>} %res
 }
 
 ;
 ; FCVTZU
 ;
-define {<vscale x 4 x float>, <vscale x 4 x float>}  
@multi_vector_cvt_x2_f32_u32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> 
%zn0, <vscale x 4 x i32> %zn1) {
-; CHECK-LABEL: multi_vector_cvt_x2_f32_u32:
+define {<vscale x 4 x i32>, <vscale x 4 x i32>}  
@multi_vector_cvt_x2_u32_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> 
%zn0, <vscale x 4 x float> %zn1) {
+; CHECK-LABEL: multi_vector_cvt_x2_u32_f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z3.d, z2.d
 ; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    fcvtzu { z0.s, z1.s }, { z2.s, z3.s }
 ; CHECK-NEXT:    ret
-  %res = call {<vscale x 4 x float>, <vscale x 4 x float>} 
@llvm.aarch64.sve.fcvtu.x2.nxv4f32(<vscale x 4 x i32>%zn0, <vscale x 4 x 
i32>%zn1)
-  ret {<vscale x 4 x float>, <vscale x 4 x float>} %res
+  %res = call {<vscale x 4 x i32>, <vscale x 4 x i32>} 
@llvm.aarch64.sve.fcvtu.x2.nxv4f32(<vscale x 4 x float> %zn0, <vscale x 4 x 
float> %zn1)
+  ret {<vscale x 4 x i32>, <vscale x 4 x i32>} %res
 }
 
-define {<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, 
<vscale x 4 x float>}  @multi_vector_cvt_x4_f32_u32(<vscale x 4 x i32> %unused, 
<vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, 
<vscale x 4 x i32> %zn3) {
-; CHECK-LABEL: multi_vector_cvt_x4_f32_u32:
+define {<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 
4 x i32>}  @multi_vector_cvt_x4_u32_f32(<vscale x 4 x float> %unused, <vscale x 
4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale 
x 4 x float> %zn3) {
+; CHECK-LABEL: multi_vector_cvt_x4_u32_f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z7.d, z4.d
 ; CHECK-NEXT:    mov z6.d, z3.d
 ; CHECK-NEXT:    mov z5.d, z2.d
 ; CHECK-NEXT:    mov z4.d, z1.d
 ; CHECK-NEXT:    fcvtzu { z0.s - z3.s }, { z4.s - z7.s }
 ; CHECK-NEXT:    ret
-  %res = call {<vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x 
float>, <vscale x 4 x float>} @llvm.aarch64.sve.fcvtu.x4.nxv4f32(<vscale x 4 x 
i32>%zn0, <vscale x 4 x i32>%zn1, <vscale x 4 x i32>%zn2, <vscale x 4 x 
i32>%zn3)
-  ret {<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, 
<vscale x 4 x float>} %res
+  %res = call {<vscale x 4 x i32>, <vscale x 4 x i32>,<vscale x 4 x i32>, 
<vscale x 4 x i32>} @llvm.aarch64.sve.fcvtu.x4.nxv4f32(<vscale x 4 x float> 
%zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x 
float> %zn3)
+  ret {<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 
x i32>} %res
 }
 
 ;
 ; SCVTF
 ;
-define {<vscale x 4 x i32>, <vscale x 4 x i32>}  
@multi_vector_cvt_x2_s32_f32(<vscale x 4 x float>%unused, <vscale x 4 x float> 
%zn0, <vscale x 4 x float> %zn1) {
-; CHECK-LABEL: multi_vector_cvt_x2_s32_f32:
+define {<vscale x 4 x float>, <vscale x 4 x float>}  
@multi_vector_cvt_x2_f32_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> 
%zn0, <vscale x 4 x i32> %zn1) {
+; CHECK-LABEL: multi_vector_cvt_x2_f32_s32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z3.d, z2.d
 ; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    scvtf { z0.s, z1.s }, { z2.s, z3.s }
 ; CHECK-NEXT:    ret
-  %res = call {<vscale x 4 x i32>, <vscale x 4 x i32>} 
@llvm.aarch64.sve.scvtf.x2.nxv4f32(<vscale x 4 x float>%zn0, <vscale x 4 x 
float>%zn1)
-  ret {<vscale x 4 x i32>, <vscale x 4 x i32>} %res
+  %res = call {<vscale x 4 x float>, <vscale x 4 x float>} 
@llvm.aarch64.sve.scvtf.x2.nxv4i32(<vscale x 4 x i32> %zn0, <vscale x 4 x i32> 
%zn1)
----------------
MDevereau wrote:


I changed this since I thought it was an LLVM IR requirement for the `.nxv4f32` 
suffix to represent the parameters passed into the intrinsic - in this case 
`.nxv4i32`. To me the intrinsic definition looks a bit odd with both the return 
type and parameters being floats - `{<vscale x 4 x float>, <vscale x 4 x 
float>} @llvm.aarch64.sve.scvtf.x2.nxv4f32` - since the name has no indication 
of i32s being passed into the parameters from the name alone.

If there's a particular reason why using the FP type is correct which I'm 
missing then I'm happy to change it. What was previously being bitcast? Was it 
the i32s being bitcast to floats after the return? Or were all the floats being 
bitcast to i32 before the call and then returned as i32? I'm assuming it's the 
former

https://github.com/llvm/llvm-project/pull/77656
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [AArch64][SME] Fix multi vector cvt builtins (PR #77656)

Reply via email to