https://github.com/vangthao95 created 
https://github.com/llvm/llvm-project/pull/178827

None

>From 44e081162afda063dfe1690d1c51a8fccdf8aaae Mon Sep 17 00:00:00 2001
From: Vang Thao <[email protected]>
Date: Thu, 29 Jan 2026 19:46:43 -0800
Subject: [PATCH] [AMDGPU][GlobalISel] Add RegBankLegalize rules for
 amdgcn.class

---
 .../AMDGPU/AMDGPURegBankLegalizeRules.cpp     |   8 +
 .../GlobalISel/regbankselect-amdgcn.class.mir |   3 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.class.f16.ll   |  72 ++++-
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.class.ll | 291 ++++++++++++------
 4 files changed, 267 insertions(+), 107 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index 1eaec8fc8f446..acbdc6d424fd5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -1159,6 +1159,14 @@ RegBankLegalizeRules::RegBankLegalizeRules(const 
GCNSubtarget &_ST,
 
   addRulesForIOpcs({amdgcn_s_getpc}).Any({{UniS64, _}, {{Sgpr64}, {None}}});
 
+  addRulesForIOpcs({amdgcn_class})
+      .Any({{UniS1, _, S16}, {{UniInVcc}, {IntrId, Vgpr16, Vgpr32}}})
+      .Any({{DivS1, _, S16}, {{Vcc}, {IntrId, Vgpr16, Vgpr32}}})
+      .Any({{UniS1, _, S32}, {{UniInVcc}, {IntrId, Vgpr32, Vgpr32}}})
+      .Any({{DivS1, _, S32}, {{Vcc}, {IntrId, Vgpr32, Vgpr32}}})
+      .Any({{UniS1, _, S64}, {{UniInVcc}, {IntrId, Vgpr64, Vgpr32}}})
+      .Any({{DivS1, _, S64}, {{Vcc}, {IntrId, Vgpr64, Vgpr32}}});
+
   // This is "intrinsic lane mask" it was set to i32/i64 in llvm-ir.
   addRulesForIOpcs({amdgcn_end_cf})
       .Any({{_, UniS32}, {{}, {IntrId, Sgpr32}}})
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.class.mir 
b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.class.mir
index 95e63c74a5088..66db698f2584a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.class.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.class.mir
@@ -1,6 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s 
-verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s 
-verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji 
-run-pass='amdgpu-regbankselect,amdgpu-regbanklegalize' %s -o - | FileCheck %s
 
 ---
 name: class_ss
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.class.f16.ll 
b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.class.f16.ll
index 6f67015c1d6b8..a0f1d413de495 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.class.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.class.f16.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 6
 ; RUN: llc -global-isel=0 -amdgpu-scalarize-global-loads=false 
-mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck 
-enable-var-scope -check-prefixes=GCN,VI-SDAG %s
-; RUN: llc -global-isel=1 -amdgpu-scalarize-global-loads=false 
-mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck 
-enable-var-scope -check-prefixes=GCN,VI-GISEL %s
+; RUN: llc -global-isel=1 -new-reg-bank-select 
-amdgpu-scalarize-global-loads=false -mtriple=amdgcn-amd-amdhsa -mcpu=fiji 
-mattr=-flat-for-global < %s | FileCheck -enable-var-scope 
-check-prefixes=GCN,VI-GISEL %s
 
 declare half @llvm.fabs.f16(half %a)
 declare i1 @llvm.amdgcn.class.f16(half %a, i32 %b)
@@ -39,11 +39,18 @@ define amdgpu_kernel void @class_f16(
 ; VI-GISEL-NEXT:    s_mov_b64 s[4:5], s[2:3]
 ; VI-GISEL-NEXT:    buffer_load_ushort v0, off, s[4:7], 0
 ; VI-GISEL-NEXT:    s_load_dword s2, s[8:9], 0x0
-; VI-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; VI-GISEL-NEXT:    v_cmp_class_f16_e64 s[2:3], v0, s2
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, s[2:3]
+; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s2
+; VI-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; VI-GISEL-NEXT:    v_readfirstlane_b32 s2, v0
+; VI-GISEL-NEXT:    v_cmp_class_f16_e32 vcc, s2, v1
+; VI-GISEL-NEXT:    s_cmp_lg_u64 vcc, 0
+; VI-GISEL-NEXT:    s_cselect_b32 s2, 1, 0
+; VI-GISEL-NEXT:    s_and_b32 s2, s2, 1
+; VI-GISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; VI-GISEL-NEXT:    s_cselect_b32 s2, -1, 0
+; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; VI-GISEL-NEXT:    s_mov_b64 s[2:3], s[6:7]
-; VI-GISEL-NEXT:    s_nop 2
 ; VI-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; VI-GISEL-NEXT:    s_endpgm
     ptr addrspace(1) %r,
@@ -75,14 +82,19 @@ define amdgpu_kernel void @class_f16_fabs(
 ;
 ; VI-GISEL-LABEL: class_f16_fabs:
 ; VI-GISEL:       ; %bb.0: ; %entry
-; VI-GISEL-NEXT:    s_load_dword s3, s[8:9], 0x28
-; VI-GISEL-NEXT:    s_load_dword s4, s[8:9], 0x4c
+; VI-GISEL-NEXT:    s_load_dword s3, s[8:9], 0x4c
+; VI-GISEL-NEXT:    s_load_dword s4, s[8:9], 0x28
 ; VI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
 ; VI-GISEL-NEXT:    s_mov_b32 s2, -1
 ; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s3
-; VI-GISEL-NEXT:    v_cmp_class_f16_e64 s[4:5], |v0|, s4
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, s[4:5]
+; VI-GISEL-NEXT:    v_cmp_class_f16_e64 s[4:5], |s4|, v0
+; VI-GISEL-NEXT:    s_cmp_lg_u64 s[4:5], 0
+; VI-GISEL-NEXT:    s_cselect_b32 s3, 1, 0
+; VI-GISEL-NEXT:    s_and_b32 s3, s3, 1
+; VI-GISEL-NEXT:    s_cmp_lg_u32 s3, 0
+; VI-GISEL-NEXT:    s_cselect_b32 s3, -1, 0
+; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s3
 ; VI-GISEL-NEXT:    s_mov_b32 s3, 0x1100f000
 ; VI-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; VI-GISEL-NEXT:    s_endpgm
@@ -123,7 +135,12 @@ define amdgpu_kernel void @class_f16_fneg(
 ; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-GISEL-NEXT:    v_max_f16_e64 v0, -s3, -s3
 ; VI-GISEL-NEXT:    v_cmp_class_f16_e64 s[4:5], v0, s4
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, s[4:5]
+; VI-GISEL-NEXT:    s_cmp_lg_u64 s[4:5], 0
+; VI-GISEL-NEXT:    s_cselect_b32 s3, 1, 0
+; VI-GISEL-NEXT:    s_and_b32 s3, s3, 1
+; VI-GISEL-NEXT:    s_cmp_lg_u32 s3, 0
+; VI-GISEL-NEXT:    s_cselect_b32 s3, -1, 0
+; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s3
 ; VI-GISEL-NEXT:    s_mov_b32 s3, 0x1100f000
 ; VI-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; VI-GISEL-NEXT:    s_endpgm
@@ -164,7 +181,12 @@ define amdgpu_kernel void @class_f16_fabs_fneg(
 ; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-GISEL-NEXT:    v_max_f16_e64 v0, -|s3|, -|s3|
 ; VI-GISEL-NEXT:    v_cmp_class_f16_e64 s[4:5], v0, s4
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, s[4:5]
+; VI-GISEL-NEXT:    s_cmp_lg_u64 s[4:5], 0
+; VI-GISEL-NEXT:    s_cselect_b32 s3, 1, 0
+; VI-GISEL-NEXT:    s_and_b32 s3, s3, 1
+; VI-GISEL-NEXT:    s_cmp_lg_u32 s3, 0
+; VI-GISEL-NEXT:    s_cselect_b32 s3, -1, 0
+; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s3
 ; VI-GISEL-NEXT:    s_mov_b32 s3, 0x1100f000
 ; VI-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; VI-GISEL-NEXT:    s_endpgm
@@ -202,7 +224,12 @@ define amdgpu_kernel void @class_f16_1(
 ; VI-GISEL-NEXT:    s_mov_b32 s2, -1
 ; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-GISEL-NEXT:    v_cmp_class_f16_e64 s[4:5], s3, 1
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, s[4:5]
+; VI-GISEL-NEXT:    s_cmp_lg_u64 s[4:5], 0
+; VI-GISEL-NEXT:    s_cselect_b32 s3, 1, 0
+; VI-GISEL-NEXT:    s_and_b32 s3, s3, 1
+; VI-GISEL-NEXT:    s_cmp_lg_u32 s3, 0
+; VI-GISEL-NEXT:    s_cselect_b32 s3, -1, 0
+; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s3
 ; VI-GISEL-NEXT:    s_mov_b32 s3, 0x1100f000
 ; VI-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; VI-GISEL-NEXT:    s_endpgm
@@ -235,7 +262,12 @@ define amdgpu_kernel void @class_f16_64(
 ; VI-GISEL-NEXT:    s_mov_b32 s2, -1
 ; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-GISEL-NEXT:    v_cmp_class_f16_e64 s[4:5], s3, 64
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, s[4:5]
+; VI-GISEL-NEXT:    s_cmp_lg_u64 s[4:5], 0
+; VI-GISEL-NEXT:    s_cselect_b32 s3, 1, 0
+; VI-GISEL-NEXT:    s_and_b32 s3, s3, 1
+; VI-GISEL-NEXT:    s_cmp_lg_u32 s3, 0
+; VI-GISEL-NEXT:    s_cselect_b32 s3, -1, 0
+; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s3
 ; VI-GISEL-NEXT:    s_mov_b32 s3, 0x1100f000
 ; VI-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; VI-GISEL-NEXT:    s_endpgm
@@ -270,7 +302,12 @@ define amdgpu_kernel void @class_f16_full_mask(
 ; VI-GISEL-NEXT:    s_mov_b32 s2, -1
 ; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-GISEL-NEXT:    v_cmp_class_f16_e32 vcc, s3, v0
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
+; VI-GISEL-NEXT:    s_cmp_lg_u64 vcc, 0
+; VI-GISEL-NEXT:    s_cselect_b32 s3, 1, 0
+; VI-GISEL-NEXT:    s_and_b32 s3, s3, 1
+; VI-GISEL-NEXT:    s_cmp_lg_u32 s3, 0
+; VI-GISEL-NEXT:    s_cselect_b32 s3, -1, 0
+; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s3
 ; VI-GISEL-NEXT:    s_mov_b32 s3, 0x1100f000
 ; VI-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; VI-GISEL-NEXT:    s_endpgm
@@ -305,7 +342,12 @@ define amdgpu_kernel void @class_f16_nine_bit_mask(
 ; VI-GISEL-NEXT:    s_mov_b32 s2, -1
 ; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-GISEL-NEXT:    v_cmp_class_f16_e32 vcc, s3, v0
-; VI-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
+; VI-GISEL-NEXT:    s_cmp_lg_u64 vcc, 0
+; VI-GISEL-NEXT:    s_cselect_b32 s3, 1, 0
+; VI-GISEL-NEXT:    s_and_b32 s3, s3, 1
+; VI-GISEL-NEXT:    s_cmp_lg_u32 s3, 0
+; VI-GISEL-NEXT:    s_cselect_b32 s3, -1, 0
+; VI-GISEL-NEXT:    v_mov_b32_e32 v0, s3
 ; VI-GISEL-NEXT:    s_mov_b32 s3, 0x1100f000
 ; VI-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; VI-GISEL-NEXT:    s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.class.ll 
b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.class.ll
index 0a5522ab5a32c..865b2a5b59e1b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.class.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.class.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 5
 ; RUN:  llc -global-isel=0 -amdgpu-scalarize-global-loads=false  
-mtriple=amdgcn < %s | FileCheck -enable-var-scope -check-prefixes=SI,SI-SDAG %s
-; RUN:  llc -global-isel=1 -amdgpu-scalarize-global-loads=false  
-mtriple=amdgcn < %s | FileCheck -enable-var-scope -check-prefixes=SI,SI-GISEL 
%s
+; RUN:  llc -global-isel=1 -new-reg-bank-select 
-amdgpu-scalarize-global-loads=false  -mtriple=amdgcn < %s | FileCheck 
-enable-var-scope -check-prefixes=SI,SI-GISEL %s
 
 declare i1 @llvm.amdgcn.class.f32(float, i32) #1
 declare i1 @llvm.amdgcn.class.f64(double, i32) #1
@@ -25,15 +25,21 @@ define amdgpu_kernel void @test_class_f32(ptr addrspace(1) 
%out, [8 x i32], floa
 ;
 ; SI-GISEL-LABEL: test_class_f32:
 ; SI-GISEL:       ; %bb.0:
-; SI-GISEL-NEXT:    s_load_dword s3, s[4:5], 0x1c
-; SI-GISEL-NEXT:    s_load_dword s6, s[4:5], 0x13
+; SI-GISEL-NEXT:    s_load_dword s0, s[4:5], 0x1c
+; SI-GISEL-NEXT:    s_load_dword s1, s[4:5], 0x13
+; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; SI-GISEL-NEXT:    v_cmp_class_f32_e32 vcc, s1, v0
+; SI-GISEL-NEXT:    s_or_b64 s[0:1], vcc, vcc
+; SI-GISEL-NEXT:    s_cselect_b32 s2, 1, 0
 ; SI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-GISEL-NEXT:    s_and_b32 s3, s2, 1
 ; SI-GISEL-NEXT:    s_mov_b32 s2, -1
-; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-GISEL-NEXT:    v_mov_b32_e32 v0, s3
-; SI-GISEL-NEXT:    v_cmp_class_f32_e32 vcc, s6, v0
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
+; SI-GISEL-NEXT:    s_cmp_lg_u32 s3, 0
+; SI-GISEL-NEXT:    s_cselect_b32 s4, -1, 0
 ; SI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; SI-GISEL-NEXT:    v_mov_b32_e32 v0, s4
+; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; SI-GISEL-NEXT:    s_endpgm
   %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 %b) #1
@@ -59,15 +65,21 @@ define amdgpu_kernel void @test_class_fabs_f32(ptr 
addrspace(1) %out, [8 x i32],
 ;
 ; SI-GISEL-LABEL: test_class_fabs_f32:
 ; SI-GISEL:       ; %bb.0:
-; SI-GISEL-NEXT:    s_load_dword s3, s[4:5], 0x13
-; SI-GISEL-NEXT:    s_load_dword s6, s[4:5], 0x1c
+; SI-GISEL-NEXT:    s_load_dword s0, s[4:5], 0x1c
+; SI-GISEL-NEXT:    s_load_dword s1, s[4:5], 0x13
+; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; SI-GISEL-NEXT:    v_cmp_class_f32_e64 s[0:1], |s1|, v0
+; SI-GISEL-NEXT:    s_or_b64 s[0:1], s[0:1], s[0:1]
+; SI-GISEL-NEXT:    s_cselect_b32 s2, 1, 0
 ; SI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-GISEL-NEXT:    s_and_b32 s3, s2, 1
 ; SI-GISEL-NEXT:    s_mov_b32 s2, -1
-; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-GISEL-NEXT:    v_mov_b32_e32 v0, s3
-; SI-GISEL-NEXT:    v_cmp_class_f32_e64 s[4:5], |v0|, s6
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, s[4:5]
+; SI-GISEL-NEXT:    s_cmp_lg_u32 s3, 0
+; SI-GISEL-NEXT:    s_cselect_b32 s4, -1, 0
 ; SI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; SI-GISEL-NEXT:    v_mov_b32_e32 v0, s4
+; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; SI-GISEL-NEXT:    s_endpgm
   %a.fabs = call float @llvm.fabs.f32(float %a) #1
@@ -94,15 +106,21 @@ define amdgpu_kernel void @test_class_fneg_f32(ptr 
addrspace(1) %out, [8 x i32],
 ;
 ; SI-GISEL-LABEL: test_class_fneg_f32:
 ; SI-GISEL:       ; %bb.0:
-; SI-GISEL-NEXT:    s_load_dword s3, s[4:5], 0x13
-; SI-GISEL-NEXT:    s_load_dword s6, s[4:5], 0x1c
+; SI-GISEL-NEXT:    s_load_dword s0, s[4:5], 0x13
+; SI-GISEL-NEXT:    s_load_dword s1, s[4:5], 0x1c
+; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-GISEL-NEXT:    v_mul_f32_e64 v0, -1.0, s0
+; SI-GISEL-NEXT:    v_cmp_class_f32_e64 s[0:1], v0, s1
+; SI-GISEL-NEXT:    s_or_b64 s[0:1], s[0:1], s[0:1]
+; SI-GISEL-NEXT:    s_cselect_b32 s2, 1, 0
 ; SI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-GISEL-NEXT:    s_and_b32 s3, s2, 1
 ; SI-GISEL-NEXT:    s_mov_b32 s2, -1
-; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-GISEL-NEXT:    v_mul_f32_e64 v0, 1.0, -s3
-; SI-GISEL-NEXT:    v_cmp_class_f32_e64 s[4:5], v0, s6
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, s[4:5]
+; SI-GISEL-NEXT:    s_cmp_lg_u32 s3, 0
+; SI-GISEL-NEXT:    s_cselect_b32 s4, -1, 0
 ; SI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; SI-GISEL-NEXT:    v_mov_b32_e32 v0, s4
+; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; SI-GISEL-NEXT:    s_endpgm
   %a.fneg = fsub float -0.0, %a
@@ -129,15 +147,21 @@ define amdgpu_kernel void @test_class_fneg_fabs_f32(ptr 
addrspace(1) %out, [8 x
 ;
 ; SI-GISEL-LABEL: test_class_fneg_fabs_f32:
 ; SI-GISEL:       ; %bb.0:
-; SI-GISEL-NEXT:    s_load_dword s3, s[4:5], 0x13
-; SI-GISEL-NEXT:    s_load_dword s6, s[4:5], 0x1c
+; SI-GISEL-NEXT:    s_load_dword s0, s[4:5], 0x13
+; SI-GISEL-NEXT:    s_load_dword s1, s[4:5], 0x1c
+; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-GISEL-NEXT:    v_mul_f32_e64 v0, -1.0, |s0|
+; SI-GISEL-NEXT:    v_cmp_class_f32_e64 s[0:1], v0, s1
+; SI-GISEL-NEXT:    s_or_b64 s[0:1], s[0:1], s[0:1]
+; SI-GISEL-NEXT:    s_cselect_b32 s2, 1, 0
 ; SI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-GISEL-NEXT:    s_and_b32 s3, s2, 1
 ; SI-GISEL-NEXT:    s_mov_b32 s2, -1
-; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-GISEL-NEXT:    v_mul_f32_e64 v0, 1.0, -|s3|
-; SI-GISEL-NEXT:    v_cmp_class_f32_e64 s[4:5], v0, s6
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, s[4:5]
+; SI-GISEL-NEXT:    s_cmp_lg_u32 s3, 0
+; SI-GISEL-NEXT:    s_cselect_b32 s4, -1, 0
 ; SI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; SI-GISEL-NEXT:    v_mov_b32_e32 v0, s4
+; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; SI-GISEL-NEXT:    s_endpgm
   %a.fabs = call float @llvm.fabs.f32(float %a) #1
@@ -163,13 +187,19 @@ define amdgpu_kernel void @test_class_1_f32(ptr 
addrspace(1) %out, float %a) #0
 ;
 ; SI-GISEL-LABEL: test_class_1_f32:
 ; SI-GISEL:       ; %bb.0:
-; SI-GISEL-NEXT:    s_load_dword s3, s[4:5], 0xb
+; SI-GISEL-NEXT:    s_load_dword s0, s[4:5], 0xb
+; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-GISEL-NEXT:    v_cmp_class_f32_e64 s[0:1], s0, 1
+; SI-GISEL-NEXT:    s_or_b64 s[0:1], s[0:1], s[0:1]
+; SI-GISEL-NEXT:    s_cselect_b32 s2, 1, 0
 ; SI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-GISEL-NEXT:    s_and_b32 s3, s2, 1
 ; SI-GISEL-NEXT:    s_mov_b32 s2, -1
-; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-GISEL-NEXT:    v_cmp_class_f32_e64 s[4:5], s3, 1
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, s[4:5]
+; SI-GISEL-NEXT:    s_cmp_lg_u32 s3, 0
+; SI-GISEL-NEXT:    s_cselect_b32 s4, -1, 0
 ; SI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; SI-GISEL-NEXT:    v_mov_b32_e32 v0, s4
+; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; SI-GISEL-NEXT:    s_endpgm
   %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 1) #1
@@ -193,13 +223,19 @@ define amdgpu_kernel void @test_class_64_f32(ptr 
addrspace(1) %out, float %a) #0
 ;
 ; SI-GISEL-LABEL: test_class_64_f32:
 ; SI-GISEL:       ; %bb.0:
-; SI-GISEL-NEXT:    s_load_dword s3, s[4:5], 0xb
+; SI-GISEL-NEXT:    s_load_dword s0, s[4:5], 0xb
+; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-GISEL-NEXT:    v_cmp_class_f32_e64 s[0:1], s0, 64
+; SI-GISEL-NEXT:    s_or_b64 s[0:1], s[0:1], s[0:1]
+; SI-GISEL-NEXT:    s_cselect_b32 s2, 1, 0
 ; SI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-GISEL-NEXT:    s_and_b32 s3, s2, 1
 ; SI-GISEL-NEXT:    s_mov_b32 s2, -1
-; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-GISEL-NEXT:    v_cmp_class_f32_e64 s[4:5], s3, 64
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, s[4:5]
+; SI-GISEL-NEXT:    s_cmp_lg_u32 s3, 0
+; SI-GISEL-NEXT:    s_cselect_b32 s4, -1, 0
 ; SI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; SI-GISEL-NEXT:    v_mov_b32_e32 v0, s4
+; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; SI-GISEL-NEXT:    s_endpgm
   %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 64) #1
@@ -225,14 +261,20 @@ define amdgpu_kernel void @test_class_full_mask_f32(ptr 
addrspace(1) %out, float
 ;
 ; SI-GISEL-LABEL: test_class_full_mask_f32:
 ; SI-GISEL:       ; %bb.0:
-; SI-GISEL-NEXT:    s_load_dword s3, s[4:5], 0xb
-; SI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-GISEL-NEXT:    s_load_dword s0, s[4:5], 0xb
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v0, 0x3ff
-; SI-GISEL-NEXT:    s_mov_b32 s2, -1
 ; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-GISEL-NEXT:    v_cmp_class_f32_e32 vcc, s3, v0
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
+; SI-GISEL-NEXT:    v_cmp_class_f32_e32 vcc, s0, v0
+; SI-GISEL-NEXT:    s_or_b64 s[0:1], vcc, vcc
+; SI-GISEL-NEXT:    s_cselect_b32 s2, 1, 0
+; SI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-GISEL-NEXT:    s_and_b32 s3, s2, 1
+; SI-GISEL-NEXT:    s_mov_b32 s2, -1
+; SI-GISEL-NEXT:    s_cmp_lg_u32 s3, 0
+; SI-GISEL-NEXT:    s_cselect_b32 s4, -1, 0
 ; SI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; SI-GISEL-NEXT:    v_mov_b32_e32 v0, s4
+; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; SI-GISEL-NEXT:    s_endpgm
   %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 1023) #1
@@ -257,14 +299,20 @@ define amdgpu_kernel void @test_class_9bit_mask_f32(ptr 
addrspace(1) %out, float
 ;
 ; SI-GISEL-LABEL: test_class_9bit_mask_f32:
 ; SI-GISEL:       ; %bb.0:
-; SI-GISEL-NEXT:    s_load_dword s3, s[4:5], 0xb
-; SI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-GISEL-NEXT:    s_load_dword s0, s[4:5], 0xb
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v0, 0x1ff
-; SI-GISEL-NEXT:    s_mov_b32 s2, -1
 ; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-GISEL-NEXT:    v_cmp_class_f32_e32 vcc, s3, v0
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
+; SI-GISEL-NEXT:    v_cmp_class_f32_e32 vcc, s0, v0
+; SI-GISEL-NEXT:    s_or_b64 s[0:1], vcc, vcc
+; SI-GISEL-NEXT:    s_cselect_b32 s2, 1, 0
+; SI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-GISEL-NEXT:    s_and_b32 s3, s2, 1
+; SI-GISEL-NEXT:    s_mov_b32 s2, -1
+; SI-GISEL-NEXT:    s_cmp_lg_u32 s3, 0
+; SI-GISEL-NEXT:    s_cselect_b32 s4, -1, 0
 ; SI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; SI-GISEL-NEXT:    v_mov_b32_e32 v0, s4
+; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; SI-GISEL-NEXT:    s_endpgm
   %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 511) #1
@@ -430,15 +478,21 @@ define amdgpu_kernel void @test_class_f64(ptr 
addrspace(1) %out, [8 x i32], doub
 ;
 ; SI-GISEL-LABEL: test_class_f64:
 ; SI-GISEL:       ; %bb.0:
-; SI-GISEL-NEXT:    s_load_dword s3, s[4:5], 0x1d
-; SI-GISEL-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x13
+; SI-GISEL-NEXT:    s_load_dword s2, s[4:5], 0x1d
+; SI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x13
+; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; SI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, s[0:1], v0
+; SI-GISEL-NEXT:    s_or_b64 s[0:1], vcc, vcc
+; SI-GISEL-NEXT:    s_cselect_b32 s2, 1, 0
 ; SI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-GISEL-NEXT:    s_and_b32 s3, s2, 1
 ; SI-GISEL-NEXT:    s_mov_b32 s2, -1
-; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-GISEL-NEXT:    v_mov_b32_e32 v0, s3
-; SI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, s[6:7], v0
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
+; SI-GISEL-NEXT:    s_cmp_lg_u32 s3, 0
+; SI-GISEL-NEXT:    s_cselect_b32 s4, -1, 0
 ; SI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; SI-GISEL-NEXT:    v_mov_b32_e32 v0, s4
+; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; SI-GISEL-NEXT:    s_endpgm
   %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 %b) #1
@@ -464,16 +518,21 @@ define amdgpu_kernel void @test_class_fabs_f64(ptr 
addrspace(1) %out, [8 x i32],
 ;
 ; SI-GISEL-LABEL: test_class_fabs_f64:
 ; SI-GISEL:       ; %bb.0:
-; SI-GISEL-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x13
-; SI-GISEL-NEXT:    s_load_dword s3, s[4:5], 0x1d
+; SI-GISEL-NEXT:    s_load_dword s2, s[4:5], 0x1d
+; SI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x13
+; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; SI-GISEL-NEXT:    v_cmp_class_f64_e64 s[0:1], |s[0:1]|, v0
+; SI-GISEL-NEXT:    s_or_b64 s[0:1], s[0:1], s[0:1]
+; SI-GISEL-NEXT:    s_cselect_b32 s2, 1, 0
 ; SI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-GISEL-NEXT:    s_and_b32 s3, s2, 1
 ; SI-GISEL-NEXT:    s_mov_b32 s2, -1
-; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-GISEL-NEXT:    v_mov_b32_e32 v0, s6
-; SI-GISEL-NEXT:    v_mov_b32_e32 v1, s7
-; SI-GISEL-NEXT:    v_cmp_class_f64_e64 s[4:5], |v[0:1]|, s3
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, s[4:5]
+; SI-GISEL-NEXT:    s_cmp_lg_u32 s3, 0
+; SI-GISEL-NEXT:    s_cselect_b32 s4, -1, 0
 ; SI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; SI-GISEL-NEXT:    v_mov_b32_e32 v0, s4
+; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; SI-GISEL-NEXT:    s_endpgm
   %a.fabs = call double @llvm.fabs.f64(double %a) #1
@@ -500,15 +559,21 @@ define amdgpu_kernel void @test_class_fneg_f64(ptr 
addrspace(1) %out, [8 x i32],
 ;
 ; SI-GISEL-LABEL: test_class_fneg_f64:
 ; SI-GISEL:       ; %bb.0:
-; SI-GISEL-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x13
-; SI-GISEL-NEXT:    s_load_dword s3, s[4:5], 0x1d
+; SI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x13
+; SI-GISEL-NEXT:    s_load_dword s2, s[4:5], 0x1d
+; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-GISEL-NEXT:    v_max_f64 v[0:1], -s[0:1], -s[0:1]
+; SI-GISEL-NEXT:    v_cmp_class_f64_e64 s[0:1], v[0:1], s2
+; SI-GISEL-NEXT:    s_or_b64 s[0:1], s[0:1], s[0:1]
+; SI-GISEL-NEXT:    s_cselect_b32 s2, 1, 0
 ; SI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-GISEL-NEXT:    s_and_b32 s3, s2, 1
 ; SI-GISEL-NEXT:    s_mov_b32 s2, -1
-; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-GISEL-NEXT:    v_max_f64 v[0:1], -s[6:7], -s[6:7]
-; SI-GISEL-NEXT:    v_cmp_class_f64_e64 s[4:5], v[0:1], s3
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, s[4:5]
+; SI-GISEL-NEXT:    s_cmp_lg_u32 s3, 0
+; SI-GISEL-NEXT:    s_cselect_b32 s4, -1, 0
 ; SI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; SI-GISEL-NEXT:    v_mov_b32_e32 v0, s4
+; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; SI-GISEL-NEXT:    s_endpgm
   %a.fneg = fsub double -0.0, %a
@@ -535,15 +600,21 @@ define amdgpu_kernel void @test_class_fneg_fabs_f64(ptr 
addrspace(1) %out, [8 x
 ;
 ; SI-GISEL-LABEL: test_class_fneg_fabs_f64:
 ; SI-GISEL:       ; %bb.0:
-; SI-GISEL-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x13
-; SI-GISEL-NEXT:    s_load_dword s3, s[4:5], 0x1d
+; SI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x13
+; SI-GISEL-NEXT:    s_load_dword s2, s[4:5], 0x1d
+; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-GISEL-NEXT:    v_max_f64 v[0:1], -|s[0:1]|, -|s[0:1]|
+; SI-GISEL-NEXT:    v_cmp_class_f64_e64 s[0:1], v[0:1], s2
+; SI-GISEL-NEXT:    s_or_b64 s[0:1], s[0:1], s[0:1]
+; SI-GISEL-NEXT:    s_cselect_b32 s2, 1, 0
 ; SI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-GISEL-NEXT:    s_and_b32 s3, s2, 1
 ; SI-GISEL-NEXT:    s_mov_b32 s2, -1
-; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-GISEL-NEXT:    v_max_f64 v[0:1], -|s[6:7]|, -|s[6:7]|
-; SI-GISEL-NEXT:    v_cmp_class_f64_e64 s[4:5], v[0:1], s3
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, s[4:5]
+; SI-GISEL-NEXT:    s_cmp_lg_u32 s3, 0
+; SI-GISEL-NEXT:    s_cselect_b32 s4, -1, 0
 ; SI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; SI-GISEL-NEXT:    v_mov_b32_e32 v0, s4
+; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; SI-GISEL-NEXT:    s_endpgm
   %a.fabs = call double @llvm.fabs.f64(double %a) #1
@@ -573,9 +644,14 @@ define amdgpu_kernel void @test_class_1_f64(ptr 
addrspace(1) %out, double %a) #0
 ; SI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
 ; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-GISEL-NEXT:    v_cmp_class_f64_e64 s[2:3], s[2:3], 1
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, s[2:3]
+; SI-GISEL-NEXT:    s_or_b64 s[2:3], s[2:3], s[2:3]
+; SI-GISEL-NEXT:    s_cselect_b32 s2, 1, 0
+; SI-GISEL-NEXT:    s_and_b32 s3, s2, 1
 ; SI-GISEL-NEXT:    s_mov_b32 s2, -1
+; SI-GISEL-NEXT:    s_cmp_lg_u32 s3, 0
+; SI-GISEL-NEXT:    s_cselect_b32 s4, -1, 0
 ; SI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; SI-GISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; SI-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; SI-GISEL-NEXT:    s_endpgm
   %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 1) #1
@@ -603,9 +679,14 @@ define amdgpu_kernel void @test_class_64_f64(ptr 
addrspace(1) %out, double %a) #
 ; SI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
 ; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-GISEL-NEXT:    v_cmp_class_f64_e64 s[2:3], s[2:3], 64
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, s[2:3]
+; SI-GISEL-NEXT:    s_or_b64 s[2:3], s[2:3], s[2:3]
+; SI-GISEL-NEXT:    s_cselect_b32 s2, 1, 0
+; SI-GISEL-NEXT:    s_and_b32 s3, s2, 1
 ; SI-GISEL-NEXT:    s_mov_b32 s2, -1
+; SI-GISEL-NEXT:    s_cmp_lg_u32 s3, 0
+; SI-GISEL-NEXT:    s_cselect_b32 s4, -1, 0
 ; SI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; SI-GISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; SI-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; SI-GISEL-NEXT:    s_endpgm
   %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 64) #1
@@ -631,14 +712,20 @@ define amdgpu_kernel void @test_class_full_mask_f64(ptr 
addrspace(1) %out, [8 x
 ;
 ; SI-GISEL-LABEL: test_class_full_mask_f64:
 ; SI-GISEL:       ; %bb.0:
-; SI-GISEL-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x13
-; SI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x13
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v0, 0x1ff
-; SI-GISEL-NEXT:    s_mov_b32 s2, -1
 ; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, s[6:7], v0
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
+; SI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, s[0:1], v0
+; SI-GISEL-NEXT:    s_or_b64 s[0:1], vcc, vcc
+; SI-GISEL-NEXT:    s_cselect_b32 s2, 1, 0
+; SI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-GISEL-NEXT:    s_and_b32 s3, s2, 1
+; SI-GISEL-NEXT:    s_mov_b32 s2, -1
+; SI-GISEL-NEXT:    s_cmp_lg_u32 s3, 0
+; SI-GISEL-NEXT:    s_cselect_b32 s4, -1, 0
 ; SI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; SI-GISEL-NEXT:    v_mov_b32_e32 v0, s4
+; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; SI-GISEL-NEXT:    s_endpgm
   %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 511) #1
@@ -671,16 +758,21 @@ define amdgpu_kernel void @v_test_class_full_mask_f64(ptr 
addrspace(1) %out, ptr
 ; SI-GISEL-LABEL: v_test_class_full_mask_f64:
 ; SI-GISEL:       ; %bb.0:
 ; SI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-GISEL-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
 ; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x1ff
 ; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-GISEL-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
+; SI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, s[2:3], v1
+; SI-GISEL-NEXT:    s_or_b64 s[2:3], vcc, vcc
+; SI-GISEL-NEXT:    s_cselect_b32 s2, 1, 0
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; SI-GISEL-NEXT:    v_mov_b32_e32 v2, 0x1ff
+; SI-GISEL-NEXT:    s_and_b32 s3, s2, 1
 ; SI-GISEL-NEXT:    s_mov_b32 s2, 0
-; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-GISEL-NEXT:    v_cmp_class_f64_e32 vcc, s[4:5], v2
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
+; SI-GISEL-NEXT:    s_cmp_lg_u32 s3, 0
+; SI-GISEL-NEXT:    s_cselect_b32 s4, -1, 0
 ; SI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; SI-GISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; SI-GISEL-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; SI-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
@@ -1132,6 +1224,8 @@ define amdgpu_kernel void 
@test_no_fold_or_class_f32_0(ptr addrspace(1) %out, pt
 ; SI-GISEL-NEXT:    buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
 ; SI-GISEL-NEXT:    v_cmp_class_f32_e64 s[2:3], s8, 8
 ; SI-GISEL-NEXT:    s_mov_b32 s6, -1
+; SI-GISEL-NEXT:    s_or_b64 s[2:3], s[2:3], s[2:3]
+; SI-GISEL-NEXT:    s_cselect_b64 s[2:3], exec, 0
 ; SI-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; SI-GISEL-NEXT:    v_cmp_class_f32_e64 s[4:5], v0, 4
 ; SI-GISEL-NEXT:    s_or_b64 s[2:3], s[4:5], s[2:3]
@@ -1166,13 +1260,19 @@ define amdgpu_kernel void @test_class_0_f32(ptr 
addrspace(1) %out, float %a) #0
 ;
 ; SI-GISEL-LABEL: test_class_0_f32:
 ; SI-GISEL:       ; %bb.0:
-; SI-GISEL-NEXT:    s_load_dword s3, s[4:5], 0xb
+; SI-GISEL-NEXT:    s_load_dword s0, s[4:5], 0xb
+; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-GISEL-NEXT:    v_cmp_class_f32_e64 s[0:1], s0, 0
+; SI-GISEL-NEXT:    s_or_b64 s[0:1], s[0:1], s[0:1]
+; SI-GISEL-NEXT:    s_cselect_b32 s2, 1, 0
 ; SI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-GISEL-NEXT:    s_and_b32 s3, s2, 1
 ; SI-GISEL-NEXT:    s_mov_b32 s2, -1
-; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-GISEL-NEXT:    v_cmp_class_f32_e64 s[4:5], s3, 0
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, s[4:5]
+; SI-GISEL-NEXT:    s_cmp_lg_u32 s3, 0
+; SI-GISEL-NEXT:    s_cselect_b32 s4, -1, 0
 ; SI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; SI-GISEL-NEXT:    v_mov_b32_e32 v0, s4
+; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; SI-GISEL-NEXT:    s_endpgm
   %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 0) #1
@@ -1197,9 +1297,14 @@ define amdgpu_kernel void @test_class_0_f64(ptr 
addrspace(1) %out, double %a) #0
 ; SI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
 ; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-GISEL-NEXT:    v_cmp_class_f64_e64 s[2:3], s[2:3], 0
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, s[2:3]
+; SI-GISEL-NEXT:    s_or_b64 s[2:3], s[2:3], s[2:3]
+; SI-GISEL-NEXT:    s_cselect_b32 s2, 1, 0
+; SI-GISEL-NEXT:    s_and_b32 s3, s2, 1
 ; SI-GISEL-NEXT:    s_mov_b32 s2, -1
+; SI-GISEL-NEXT:    s_cmp_lg_u32 s3, 0
+; SI-GISEL-NEXT:    s_cselect_b32 s4, -1, 0
 ; SI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; SI-GISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; SI-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; SI-GISEL-NEXT:    s_endpgm
   %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 0) #1
@@ -1221,14 +1326,20 @@ define amdgpu_kernel void @test_class_undef_f32(ptr 
addrspace(1) %out, float %a,
 ;
 ; SI-GISEL-LABEL: test_class_undef_f32:
 ; SI-GISEL:       ; %bb.0:
-; SI-GISEL-NEXT:    s_load_dword s3, s[4:5], 0xc
-; SI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
-; SI-GISEL-NEXT:    s_mov_b32 s2, -1
+; SI-GISEL-NEXT:    s_load_dword s0, s[4:5], 0xc
 ; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-GISEL-NEXT:    v_mov_b32_e32 v0, s3
+; SI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
 ; SI-GISEL-NEXT:    v_cmp_class_f32_e32 vcc, s0, v0
-; SI-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
+; SI-GISEL-NEXT:    s_or_b64 s[0:1], vcc, vcc
+; SI-GISEL-NEXT:    s_cselect_b32 s2, 1, 0
+; SI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-GISEL-NEXT:    s_and_b32 s3, s2, 1
+; SI-GISEL-NEXT:    s_mov_b32 s2, -1
+; SI-GISEL-NEXT:    s_cmp_lg_u32 s3, 0
+; SI-GISEL-NEXT:    s_cselect_b32 s4, -1, 0
 ; SI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; SI-GISEL-NEXT:    v_mov_b32_e32 v0, s4
+; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; SI-GISEL-NEXT:    s_endpgm
   %result = call i1 @llvm.amdgcn.class.f32(float poison, i32 %b) #1

_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to