@@ -38481,10 +38481,7 @@ define <2 x bfloat> @v_select_v2bf16(i1 %cond, <2 x bfloat> %a, <2 x bfloat> %b)
3848138481; GFX8-NEXT:    v_and_b32_e32 v0, 1, v0
3848238482; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
3848338483; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
38484- ; GFX8-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
38485- ; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
38486- ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
38487- ; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
38484+ ; GFX8-NEXT:    v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
3848838485; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
3848938486; GFX8-NEXT:    s_setpc_b64 s[30:31]
3849038487;
@@ -38494,9 +38491,7 @@ define <2 x bfloat> @v_select_v2bf16(i1 %cond, <2 x bfloat> %a, <2 x bfloat> %b)
3849438491; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
3849538492; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
3849638493; GFX9-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
38497- ; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
38498- ; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
38499- ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
38494+ ; GFX9-NEXT:    v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
3850038495; GFX9-NEXT:    s_mov_b32 s4, 0x5040100
3850138496; GFX9-NEXT:    v_perm_b32 v0, v1, v0, s4
3850238497; GFX9-NEXT:    s_setpc_b64 s[30:31]
@@ -38505,11 +38500,9 @@ define <2 x bfloat> @v_select_v2bf16(i1 %cond, <2 x bfloat> %a, <2 x bfloat> %b)
3850538500; GFX10:       ; %bb.0:
3850638501; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3850738502; GFX10-NEXT:    v_and_b32_e32 v0, 1, v0
38508- ; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
38509- ; GFX10-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
3851038503; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
3851138504; GFX10-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
38512- ; GFX10-NEXT:    v_cndmask_b32_e32  v1, v4, v3 , vcc_lo
38505+ ; GFX10-NEXT:    v_cndmask_b32_sdwa  v1, v2, v1 , vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 
3851338506; GFX10-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
3851438507; GFX10-NEXT:    s_setpc_b64 s[30:31]
3851538508;
@@ -38577,44 +38570,37 @@ define <2 x bfloat> @v_vselect_v2bf16(<2 x i1> %cond, <2 x bfloat> %a, <2 x bflo
3857738570; GFX8-LABEL: v_vselect_v2bf16:
3857838571; GFX8:       ; %bb.0:
3857938572; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
38580- ; GFX8-NEXT:    v_and_b32_e32 v0, 1, v0
3858138573; GFX8-NEXT:    v_and_b32_e32 v1, 1, v1
38582- ; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
38583- ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
38584- ; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
38585- ; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
38574+ ; GFX8-NEXT:    v_and_b32_e32 v0, 1, v0
3858638575; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
38587- ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v2, vcc
38588- ; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
38576+ ; GFX8-NEXT:    v_cmp_eq_u32_e64 s[4:5], 1, v0
38577+ ; GFX8-NEXT:    v_cndmask_b32_e64 v0, v3, v2, s[4:5]
38578+ ; GFX8-NEXT:    v_cndmask_b32_sdwa v1, v3, v2, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
3858938579; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
3859038580; GFX8-NEXT:    s_setpc_b64 s[30:31]
3859138581;
3859238582; GFX9-LABEL: v_vselect_v2bf16:
3859338583; GFX9:       ; %bb.0:
3859438584; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
38595- ; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
3859638585; GFX9-NEXT:    v_and_b32_e32 v1, 1, v1
38597- ; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
38598- ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
38599- ; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
38600- ; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
38586+ ; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
3860138587; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
38602- ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v2, vcc
38588+ ; GFX9-NEXT:    v_cmp_eq_u32_e64 s[4:5], 1, v0
38589+ ; GFX9-NEXT:    v_cndmask_b32_e64 v0, v3, v2, s[4:5]
38590+ ; GFX9-NEXT:    v_cndmask_b32_sdwa v1, v3, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
3860338591; GFX9-NEXT:    s_mov_b32 s4, 0x5040100
3860438592; GFX9-NEXT:    v_perm_b32 v0, v1, v0, s4
3860538593; GFX9-NEXT:    s_setpc_b64 s[30:31]
3860638594;
3860738595; GFX10-LABEL: v_vselect_v2bf16:
3860838596; GFX10:       ; %bb.0:
3860938597; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
38610- ; GFX10-NEXT:    v_and_b32_e32 v0, 1, v0
3861138598; GFX10-NEXT:    v_and_b32_e32 v1, 1, v1
38612- ; GFX10-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
38613- ; GFX10-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
38614- ; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
38615- ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
38599+ ; GFX10-NEXT:    v_and_b32_e32 v0, 1, v0
3861638600; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
38617- ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc_lo
38601+ ; GFX10-NEXT:    v_cmp_eq_u32_e64 s4, 1, v0
38602+ ; GFX10-NEXT:    v_cndmask_b32_sdwa v1, v3, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
38603+ ; GFX10-NEXT:    v_cndmask_b32_e64 v0, v3, v2, s4
3861838604; GFX10-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
3861938605; GFX10-NEXT:    s_setpc_b64 s[30:31]
3862038606;
@@ -38771,13 +38757,12 @@ define amdgpu_ps i32 @s_select_v2bf16(<2 x bfloat> inreg %a, <2 x bfloat> inreg
3877138757; GFX8-NEXT:    s_lshr_b32 s3, s1, 16
3877238758; GFX8-NEXT:    v_mov_b32_e32 v1, s3
3877338759; GFX8-NEXT:    v_mov_b32_e32 v2, s2
38760+ ; GFX8-NEXT:    v_mov_b32_e32 v3, s1
38761+ ; GFX8-NEXT:    v_mov_b32_e32 v4, s0
3877438762; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
38775- ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
38776- ; GFX8-NEXT:    v_mov_b32_e32 v1, s1
38777- ; GFX8-NEXT:    v_mov_b32_e32 v2, s0
38778- ; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
38779- ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
38780- ; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
38763+ ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v3, v4, vcc
38764+ ; GFX8-NEXT:    v_cndmask_b32_sdwa v1, v1, v2, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
38765+ ; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
3878138766; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
3878238767; GFX8-NEXT:    ; return to shader part epilog
3878338768;
@@ -38882,14 +38867,13 @@ define amdgpu_ps i32 @s_vselect_v2bf16(<2 x bfloat> inreg %a, <2 x bfloat> inreg
3888238867; GFX8:       ; %bb.0:
3888338868; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
3888438869; GFX8-NEXT:    s_lshr_b32 s3, s1, 16
38885- ; GFX8-NEXT:    v_mov_b32_e32 v2, s3
38886- ; GFX8-NEXT:    v_mov_b32_e32 v3, s2
3888738870; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
38888- ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v2, v3, vcc
38871+ ; GFX8-NEXT:    v_mov_b32_e32 v1, s3
38872+ ; GFX8-NEXT:    v_mov_b32_e32 v2, s2
38873+ ; GFX8-NEXT:    v_cndmask_b32_sdwa v1, v1, v2, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
3888938874; GFX8-NEXT:    v_mov_b32_e32 v2, s1
3889038875; GFX8-NEXT:    v_mov_b32_e32 v3, s0
3889138876; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
38892- ; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
3889338877; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
3889438878; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
3889538879; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
@@ -40792,48 +40776,42 @@ define <4 x bfloat> @v_vselect_v4bf16(<4 x i1> %cond, <4 x bfloat> %a, <4 x bflo
4079240776; GFX9-LABEL: v_vselect_v4bf16:
4079340777; GFX9:       ; %bb.0:
4079440778; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
40795- ; GFX9-NEXT:    v_and_b32_e32 v2, 1, v2
40796- ; GFX9-NEXT:    v_and_b32_e32 v3, 1, v3
40797- ; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v2
40798- ; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
40799- ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v7, v5, vcc
40800- ; GFX9-NEXT:    v_lshrrev_b32_e32 v5, 16, v5
40801- ; GFX9-NEXT:    v_lshrrev_b32_e32 v7, 16, v7
40802- ; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v3
4080340779; GFX9-NEXT:    v_and_b32_e32 v1, 1, v1
40804- ; GFX9-NEXT:    v_cndmask_b32_e32 v3, v7, v5, vcc
40780+ ; GFX9-NEXT:    v_cmp_eq_u32_e64 s[4:5], 1, v1
40781+ ; GFX9-NEXT:    v_and_b32_e32 v1, 1, v3
40782+ ; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
40783+ ; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
40784+ ; GFX9-NEXT:    v_and_b32_e32 v1, 1, v2
40785+ ; GFX9-NEXT:    v_cndmask_b32_sdwa v2, v7, v5, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
4080540786; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
40787+ ; GFX9-NEXT:    v_cmp_eq_u32_e64 s[6:7], 1, v1
4080640788; GFX9-NEXT:    v_cndmask_b32_e32 v0, v6, v4, vcc
40807- ; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
40808- ; GFX9-NEXT:    v_lshrrev_b32_e32 v5, 16, v6
40809- ; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
40810- ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc
40789+ ; GFX9-NEXT:    s_mov_b64 vcc, s[4:5]
40790+ ; GFX9-NEXT:    v_cndmask_b32_e64 v1, v7, v5, s[6:7]
40791+ ; GFX9-NEXT:    v_cndmask_b32_sdwa v3, v6, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
4081140792; GFX9-NEXT:    s_mov_b32 s4, 0x5040100
40812- ; GFX9-NEXT:    v_perm_b32 v0, v1 , v0, s4
40813- ; GFX9-NEXT:    v_perm_b32 v1, v3, v2 , s4
40793+ ; GFX9-NEXT:    v_perm_b32 v0, v3 , v0, s4
40794+ ; GFX9-NEXT:    v_perm_b32 v1, v2, v1 , s4
4081440795; GFX9-NEXT:    s_setpc_b64 s[30:31]
4081540796;
4081640797; GFX10-LABEL: v_vselect_v4bf16:
4081740798; GFX10:       ; %bb.0:
4081840799; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
40819- ; GFX10-NEXT:    v_and_b32_e32 v2, 1, v2
40820- ; GFX10-NEXT:    v_and_b32_e32 v0, 1, v0
40821- ; GFX10-NEXT:    v_and_b32_e32 v1, 1, v1
4082240800; GFX10-NEXT:    v_and_b32_e32 v3, 1, v3
40823- ; GFX10-NEXT:    v_lshrrev_b32_e32 v8, 16, v4
40824- ; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v2
40825- ; GFX10-NEXT:    v_lshrrev_b32_e32 v9, 16, v6
40826- ; GFX10-NEXT:    v_cndmask_b32_e32 v2, v7, v5, vcc_lo
40827- ; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
40828- ; GFX10-NEXT:    v_lshrrev_b32_e32 v5, 16, v5
40829- ; GFX10-NEXT:    v_lshrrev_b32_e32 v7, 16, v7
40830- ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v6, v4, vcc_lo
40831- ; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
40832- ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v9, v8, vcc_lo
40801+ ; GFX10-NEXT:    v_and_b32_e32 v1, 1, v1
40802+ ; GFX10-NEXT:    v_and_b32_e32 v0, 1, v0
4083340803; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v3
40834- ; GFX10-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
40835- ; GFX10-NEXT:    v_cndmask_b32_e32 v3, v7, v5, vcc_lo
40836- ; GFX10-NEXT:    v_perm_b32 v1, v3, v2, 0x5040100
40804+ ; GFX10-NEXT:    v_cmp_eq_u32_e64 s4, 1, v1
40805+ ; GFX10-NEXT:    v_and_b32_e32 v1, 1, v2
40806+ ; GFX10-NEXT:    v_cmp_eq_u32_e64 s5, 1, v0
40807+ ; GFX10-NEXT:    v_cndmask_b32_sdwa v2, v7, v5, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
40808+ ; GFX10-NEXT:    s_mov_b32 vcc_lo, s4
40809+ ; GFX10-NEXT:    v_cndmask_b32_sdwa v3, v6, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
40810+ ; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
40811+ ; GFX10-NEXT:    v_cndmask_b32_e64 v0, v6, v4, s5
40812+ ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v7, v5, vcc_lo
40813+ ; GFX10-NEXT:    v_perm_b32 v0, v3, v0, 0x5040100
40814+ ; GFX10-NEXT:    v_perm_b32 v1, v2, v1, 0x5040100
4083740815; GFX10-NEXT:    s_setpc_b64 s[30:31]
4083840816;
4083940817; GFX11TRUE16-LABEL: v_vselect_v4bf16:
@@ -41081,42 +41059,37 @@ define <8 x bfloat> @v_vselect_v8bf16(<8 x i1> %cond, <8 x bfloat> %a, <8 x bflo
4108141059; GFX10-LABEL: v_vselect_v8bf16:
4108241060; GFX10:       ; %bb.0:
4108341061; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
41084- ; GFX10-NEXT:    v_and_b32_e32 v6, 1, v6
41085- ; GFX10-NEXT:    v_and_b32_e32 v4, 1, v4
41086- ; GFX10-NEXT:    v_and_b32_e32 v5, 1, v5
41087- ; GFX10-NEXT:    v_and_b32_e32 v2, 1, v2
41088- ; GFX10-NEXT:    v_lshrrev_b32_e32 v16, 16, v10
41089- ; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v6
41090- ; GFX10-NEXT:    v_lshrrev_b32_e32 v17, 16, v14
41091- ; GFX10-NEXT:    v_and_b32_e32 v0, 1, v0
4109241062; GFX10-NEXT:    v_and_b32_e32 v1, 1, v1
41093- ; GFX10-NEXT:    v_and_b32_e32 v3, 1, v3
41094- ; GFX10-NEXT:    v_cndmask_b32_e32 v6, v15, v11, vcc_lo
41095- ; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v4
4109641063; GFX10-NEXT:    v_and_b32_e32 v7, 1, v7
41097- ; GFX10-NEXT:    v_lshrrev_b32_e32 v11, 16, v11
41098- ; GFX10-NEXT:    v_lshrrev_b32_e32 v15, 16, v15
41099- ; GFX10-NEXT:    v_cndmask_b32_e32 v4, v14, v10, vcc_lo
41100- ; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v5
41101- ; GFX10-NEXT:    v_lshrrev_b32_e32 v10, 16, v8
41102- ; GFX10-NEXT:    v_lshrrev_b32_e32 v14, 16, v12
41103- ; GFX10-NEXT:    v_cndmask_b32_e32 v5, v17, v16, vcc_lo
41104- ; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v2
41105- ; GFX10-NEXT:    v_cndmask_b32_e32 v2, v13, v9, vcc_lo
41064+ ; GFX10-NEXT:    v_and_b32_e32 v3, 1, v3
41065+ ; GFX10-NEXT:    v_and_b32_e32 v0, 1, v0
41066+ ; GFX10-NEXT:    v_and_b32_e32 v2, 1, v2
41067+ ; GFX10-NEXT:    v_cmp_eq_u32_e64 s4, 1, v1
41068+ ; GFX10-NEXT:    v_and_b32_e32 v1, 1, v5
41069+ ; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v7
41070+ ; GFX10-NEXT:    v_cmp_eq_u32_e64 s5, 1, v3
41071+ ; GFX10-NEXT:    v_and_b32_e32 v3, 1, v6
41072+ ; GFX10-NEXT:    v_cmp_eq_u32_e64 s6, 1, v1
41073+ ; GFX10-NEXT:    v_and_b32_e32 v1, 1, v4
41074+ ; GFX10-NEXT:    v_cndmask_b32_sdwa v4, v15, v11, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
41075+ ; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
41076+ ; GFX10-NEXT:    v_cndmask_b32_e32 v5, v14, v10, vcc_lo
41077+ ; GFX10-NEXT:    s_mov_b32 vcc_lo, s6
41078+ ; GFX10-NEXT:    v_cndmask_b32_sdwa v6, v14, v10, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
41079+ ; GFX10-NEXT:    s_mov_b32 vcc_lo, s5
41080+ ; GFX10-NEXT:    v_cndmask_b32_sdwa v1, v13, v9, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
4110641081; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
41107- ; GFX10-NEXT:    v_lshrrev_b32_e32 v9, 16, v9
41108- ; GFX10-NEXT:    v_lshrrev_b32_e32 v13, 16, v13
4110941082; GFX10-NEXT:    v_cndmask_b32_e32 v0, v12, v8, vcc_lo
41110- ; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
41111- ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v14, v10, vcc_lo
41083+ ; GFX10-NEXT:    s_mov_b32 vcc_lo, s4
41084+ ; GFX10-NEXT:    v_cndmask_b32_sdwa v7, v12, v8, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
41085+ ; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v2
41086+ ; GFX10-NEXT:    v_perm_b32 v0, v7, v0, 0x5040100
41087+ ; GFX10-NEXT:    v_cndmask_b32_e32 v2, v13, v9, vcc_lo
4111241088; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v3
41113- ; GFX10-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
41114- ; GFX10-NEXT:    v_cndmask_b32_e32 v3, v13, v9, vcc_lo
41115- ; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v7
41116- ; GFX10-NEXT:    v_perm_b32 v1, v3, v2, 0x5040100
41117- ; GFX10-NEXT:    v_cndmask_b32_e32 v7, v15, v11, vcc_lo
41118- ; GFX10-NEXT:    v_perm_b32 v2, v5, v4, 0x5040100
41119- ; GFX10-NEXT:    v_perm_b32 v3, v7, v6, 0x5040100
41089+ ; GFX10-NEXT:    v_perm_b32 v1, v1, v2, 0x5040100
41090+ ; GFX10-NEXT:    v_cndmask_b32_e32 v3, v15, v11, vcc_lo
41091+ ; GFX10-NEXT:    v_perm_b32 v2, v6, v5, 0x5040100
41092+ ; GFX10-NEXT:    v_perm_b32 v3, v4, v3, 0x5040100
4112041093; GFX10-NEXT:    s_setpc_b64 s[30:31]
4112141094;
4112241095; GFX11TRUE16-LABEL: v_vselect_v8bf16:
0 commit comments