@@ -38481,10 +38481,7 @@ define <2 x bfloat> @v_select_v2bf16(i1 %cond, <2 x bfloat> %a, <2 x bfloat> %b)
38481
38481
; GFX8-NEXT: v_and_b32_e32 v0, 1, v0
38482
38482
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
38483
38483
; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
38484
- ; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v1
38485
- ; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v2
38486
- ; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
38487
- ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
38484
+ ; GFX8-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
38488
38485
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
38489
38486
; GFX8-NEXT: s_setpc_b64 s[30:31]
38490
38487
;
@@ -38494,9 +38491,7 @@ define <2 x bfloat> @v_select_v2bf16(i1 %cond, <2 x bfloat> %a, <2 x bfloat> %b)
38494
38491
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
38495
38492
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
38496
38493
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
38497
- ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v1
38498
- ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
38499
- ; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
38494
+ ; GFX9-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
38500
38495
; GFX9-NEXT: s_mov_b32 s4, 0x5040100
38501
38496
; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
38502
38497
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -38505,11 +38500,9 @@ define <2 x bfloat> @v_select_v2bf16(i1 %cond, <2 x bfloat> %a, <2 x bfloat> %b)
38505
38500
; GFX10: ; %bb.0:
38506
38501
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
38507
38502
; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
38508
- ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v1
38509
- ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 16, v2
38510
38503
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
38511
38504
; GFX10-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
38512
- ; GFX10-NEXT: v_cndmask_b32_e32 v1, v4, v3 , vcc_lo
38505
+ ; GFX10-NEXT: v_cndmask_b32_sdwa v1, v2, v1 , vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
38513
38506
; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
38514
38507
; GFX10-NEXT: s_setpc_b64 s[30:31]
38515
38508
;
@@ -38577,44 +38570,37 @@ define <2 x bfloat> @v_vselect_v2bf16(<2 x i1> %cond, <2 x bfloat> %a, <2 x bflo
38577
38570
; GFX8-LABEL: v_vselect_v2bf16:
38578
38571
; GFX8: ; %bb.0:
38579
38572
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
38580
- ; GFX8-NEXT: v_and_b32_e32 v0, 1, v0
38581
38573
; GFX8-NEXT: v_and_b32_e32 v1, 1, v1
38582
- ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
38583
- ; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
38584
- ; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v2
38585
- ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v3
38574
+ ; GFX8-NEXT: v_and_b32_e32 v0, 1, v0
38586
38575
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
38587
- ; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc
38588
- ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
38576
+ ; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v0
38577
+ ; GFX8-NEXT: v_cndmask_b32_e64 v0, v3, v2, s[4:5]
38578
+ ; GFX8-NEXT: v_cndmask_b32_sdwa v1, v3, v2, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
38589
38579
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
38590
38580
; GFX8-NEXT: s_setpc_b64 s[30:31]
38591
38581
;
38592
38582
; GFX9-LABEL: v_vselect_v2bf16:
38593
38583
; GFX9: ; %bb.0:
38594
38584
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
38595
- ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
38596
38585
; GFX9-NEXT: v_and_b32_e32 v1, 1, v1
38597
- ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
38598
- ; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
38599
- ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
38600
- ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
38586
+ ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
38601
38587
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
38602
- ; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc
38588
+ ; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v0
38589
+ ; GFX9-NEXT: v_cndmask_b32_e64 v0, v3, v2, s[4:5]
38590
+ ; GFX9-NEXT: v_cndmask_b32_sdwa v1, v3, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
38603
38591
; GFX9-NEXT: s_mov_b32 s4, 0x5040100
38604
38592
; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
38605
38593
; GFX9-NEXT: s_setpc_b64 s[30:31]
38606
38594
;
38607
38595
; GFX10-LABEL: v_vselect_v2bf16:
38608
38596
; GFX10: ; %bb.0:
38609
38597
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
38610
- ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
38611
38598
; GFX10-NEXT: v_and_b32_e32 v1, 1, v1
38612
- ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 16, v2
38613
- ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 16, v3
38614
- ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
38615
- ; GFX10-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
38599
+ ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
38616
38600
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
38617
- ; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc_lo
38601
+ ; GFX10-NEXT: v_cmp_eq_u32_e64 s4, 1, v0
38602
+ ; GFX10-NEXT: v_cndmask_b32_sdwa v1, v3, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
38603
+ ; GFX10-NEXT: v_cndmask_b32_e64 v0, v3, v2, s4
38618
38604
; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
38619
38605
; GFX10-NEXT: s_setpc_b64 s[30:31]
38620
38606
;
@@ -38771,13 +38757,12 @@ define amdgpu_ps i32 @s_select_v2bf16(<2 x bfloat> inreg %a, <2 x bfloat> inreg
38771
38757
; GFX8-NEXT: s_lshr_b32 s3, s1, 16
38772
38758
; GFX8-NEXT: v_mov_b32_e32 v1, s3
38773
38759
; GFX8-NEXT: v_mov_b32_e32 v2, s2
38760
+ ; GFX8-NEXT: v_mov_b32_e32 v3, s1
38761
+ ; GFX8-NEXT: v_mov_b32_e32 v4, s0
38774
38762
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
38775
- ; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
38776
- ; GFX8-NEXT: v_mov_b32_e32 v1, s1
38777
- ; GFX8-NEXT: v_mov_b32_e32 v2, s0
38778
- ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
38779
- ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
38780
- ; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
38763
+ ; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc
38764
+ ; GFX8-NEXT: v_cndmask_b32_sdwa v1, v1, v2, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
38765
+ ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
38781
38766
; GFX8-NEXT: v_readfirstlane_b32 s0, v0
38782
38767
; GFX8-NEXT: ; return to shader part epilog
38783
38768
;
@@ -38882,14 +38867,13 @@ define amdgpu_ps i32 @s_vselect_v2bf16(<2 x bfloat> inreg %a, <2 x bfloat> inreg
38882
38867
; GFX8: ; %bb.0:
38883
38868
; GFX8-NEXT: s_lshr_b32 s2, s0, 16
38884
38869
; GFX8-NEXT: s_lshr_b32 s3, s1, 16
38885
- ; GFX8-NEXT: v_mov_b32_e32 v2, s3
38886
- ; GFX8-NEXT: v_mov_b32_e32 v3, s2
38887
38870
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
38888
- ; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc
38871
+ ; GFX8-NEXT: v_mov_b32_e32 v1, s3
38872
+ ; GFX8-NEXT: v_mov_b32_e32 v2, s2
38873
+ ; GFX8-NEXT: v_cndmask_b32_sdwa v1, v1, v2, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
38889
38874
; GFX8-NEXT: v_mov_b32_e32 v2, s1
38890
38875
; GFX8-NEXT: v_mov_b32_e32 v3, s0
38891
38876
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
38892
- ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
38893
38877
; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
38894
38878
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
38895
38879
; GFX8-NEXT: v_readfirstlane_b32 s0, v0
@@ -40792,48 +40776,42 @@ define <4 x bfloat> @v_vselect_v4bf16(<4 x i1> %cond, <4 x bfloat> %a, <4 x bflo
40792
40776
; GFX9-LABEL: v_vselect_v4bf16:
40793
40777
; GFX9: ; %bb.0:
40794
40778
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
40795
- ; GFX9-NEXT: v_and_b32_e32 v2, 1, v2
40796
- ; GFX9-NEXT: v_and_b32_e32 v3, 1, v3
40797
- ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
40798
- ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
40799
- ; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v5, vcc
40800
- ; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v5
40801
- ; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v7
40802
- ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v3
40803
40779
; GFX9-NEXT: v_and_b32_e32 v1, 1, v1
40804
- ; GFX9-NEXT: v_cndmask_b32_e32 v3, v7, v5, vcc
40780
+ ; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v1
40781
+ ; GFX9-NEXT: v_and_b32_e32 v1, 1, v3
40782
+ ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
40783
+ ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
40784
+ ; GFX9-NEXT: v_and_b32_e32 v1, 1, v2
40785
+ ; GFX9-NEXT: v_cndmask_b32_sdwa v2, v7, v5, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
40805
40786
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
40787
+ ; GFX9-NEXT: v_cmp_eq_u32_e64 s[6:7], 1, v1
40806
40788
; GFX9-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc
40807
- ; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v4
40808
- ; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v6
40809
- ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
40810
- ; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
40789
+ ; GFX9-NEXT: s_mov_b64 vcc, s[4:5]
40790
+ ; GFX9-NEXT: v_cndmask_b32_e64 v1, v7, v5, s[6:7]
40791
+ ; GFX9-NEXT: v_cndmask_b32_sdwa v3, v6, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
40811
40792
; GFX9-NEXT: s_mov_b32 s4, 0x5040100
40812
- ; GFX9-NEXT: v_perm_b32 v0, v1 , v0, s4
40813
- ; GFX9-NEXT: v_perm_b32 v1, v3, v2 , s4
40793
+ ; GFX9-NEXT: v_perm_b32 v0, v3 , v0, s4
40794
+ ; GFX9-NEXT: v_perm_b32 v1, v2, v1 , s4
40814
40795
; GFX9-NEXT: s_setpc_b64 s[30:31]
40815
40796
;
40816
40797
; GFX10-LABEL: v_vselect_v4bf16:
40817
40798
; GFX10: ; %bb.0:
40818
40799
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
40819
- ; GFX10-NEXT: v_and_b32_e32 v2, 1, v2
40820
- ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
40821
- ; GFX10-NEXT: v_and_b32_e32 v1, 1, v1
40822
40800
; GFX10-NEXT: v_and_b32_e32 v3, 1, v3
40823
- ; GFX10-NEXT: v_lshrrev_b32_e32 v8, 16, v4
40824
- ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v2
40825
- ; GFX10-NEXT: v_lshrrev_b32_e32 v9, 16, v6
40826
- ; GFX10-NEXT: v_cndmask_b32_e32 v2, v7, v5, vcc_lo
40827
- ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
40828
- ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 16, v5
40829
- ; GFX10-NEXT: v_lshrrev_b32_e32 v7, 16, v7
40830
- ; GFX10-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc_lo
40831
- ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
40832
- ; GFX10-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc_lo
40801
+ ; GFX10-NEXT: v_and_b32_e32 v1, 1, v1
40802
+ ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
40833
40803
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
40834
- ; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
40835
- ; GFX10-NEXT: v_cndmask_b32_e32 v3, v7, v5, vcc_lo
40836
- ; GFX10-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
40804
+ ; GFX10-NEXT: v_cmp_eq_u32_e64 s4, 1, v1
40805
+ ; GFX10-NEXT: v_and_b32_e32 v1, 1, v2
40806
+ ; GFX10-NEXT: v_cmp_eq_u32_e64 s5, 1, v0
40807
+ ; GFX10-NEXT: v_cndmask_b32_sdwa v2, v7, v5, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
40808
+ ; GFX10-NEXT: s_mov_b32 vcc_lo, s4
40809
+ ; GFX10-NEXT: v_cndmask_b32_sdwa v3, v6, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
40810
+ ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
40811
+ ; GFX10-NEXT: v_cndmask_b32_e64 v0, v6, v4, s5
40812
+ ; GFX10-NEXT: v_cndmask_b32_e32 v1, v7, v5, vcc_lo
40813
+ ; GFX10-NEXT: v_perm_b32 v0, v3, v0, 0x5040100
40814
+ ; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
40837
40815
; GFX10-NEXT: s_setpc_b64 s[30:31]
40838
40816
;
40839
40817
; GFX11TRUE16-LABEL: v_vselect_v4bf16:
@@ -41081,42 +41059,37 @@ define <8 x bfloat> @v_vselect_v8bf16(<8 x i1> %cond, <8 x bfloat> %a, <8 x bflo
41081
41059
; GFX10-LABEL: v_vselect_v8bf16:
41082
41060
; GFX10: ; %bb.0:
41083
41061
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
41084
- ; GFX10-NEXT: v_and_b32_e32 v6, 1, v6
41085
- ; GFX10-NEXT: v_and_b32_e32 v4, 1, v4
41086
- ; GFX10-NEXT: v_and_b32_e32 v5, 1, v5
41087
- ; GFX10-NEXT: v_and_b32_e32 v2, 1, v2
41088
- ; GFX10-NEXT: v_lshrrev_b32_e32 v16, 16, v10
41089
- ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v6
41090
- ; GFX10-NEXT: v_lshrrev_b32_e32 v17, 16, v14
41091
- ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
41092
41062
; GFX10-NEXT: v_and_b32_e32 v1, 1, v1
41093
- ; GFX10-NEXT: v_and_b32_e32 v3, 1, v3
41094
- ; GFX10-NEXT: v_cndmask_b32_e32 v6, v15, v11, vcc_lo
41095
- ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v4
41096
41063
; GFX10-NEXT: v_and_b32_e32 v7, 1, v7
41097
- ; GFX10-NEXT: v_lshrrev_b32_e32 v11, 16, v11
41098
- ; GFX10-NEXT: v_lshrrev_b32_e32 v15, 16, v15
41099
- ; GFX10-NEXT: v_cndmask_b32_e32 v4, v14, v10, vcc_lo
41100
- ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v5
41101
- ; GFX10-NEXT: v_lshrrev_b32_e32 v10, 16, v8
41102
- ; GFX10-NEXT: v_lshrrev_b32_e32 v14, 16, v12
41103
- ; GFX10-NEXT: v_cndmask_b32_e32 v5, v17, v16, vcc_lo
41104
- ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v2
41105
- ; GFX10-NEXT: v_cndmask_b32_e32 v2, v13, v9, vcc_lo
41064
+ ; GFX10-NEXT: v_and_b32_e32 v3, 1, v3
41065
+ ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
41066
+ ; GFX10-NEXT: v_and_b32_e32 v2, 1, v2
41067
+ ; GFX10-NEXT: v_cmp_eq_u32_e64 s4, 1, v1
41068
+ ; GFX10-NEXT: v_and_b32_e32 v1, 1, v5
41069
+ ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v7
41070
+ ; GFX10-NEXT: v_cmp_eq_u32_e64 s5, 1, v3
41071
+ ; GFX10-NEXT: v_and_b32_e32 v3, 1, v6
41072
+ ; GFX10-NEXT: v_cmp_eq_u32_e64 s6, 1, v1
41073
+ ; GFX10-NEXT: v_and_b32_e32 v1, 1, v4
41074
+ ; GFX10-NEXT: v_cndmask_b32_sdwa v4, v15, v11, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
41075
+ ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
41076
+ ; GFX10-NEXT: v_cndmask_b32_e32 v5, v14, v10, vcc_lo
41077
+ ; GFX10-NEXT: s_mov_b32 vcc_lo, s6
41078
+ ; GFX10-NEXT: v_cndmask_b32_sdwa v6, v14, v10, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
41079
+ ; GFX10-NEXT: s_mov_b32 vcc_lo, s5
41080
+ ; GFX10-NEXT: v_cndmask_b32_sdwa v1, v13, v9, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
41106
41081
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
41107
- ; GFX10-NEXT: v_lshrrev_b32_e32 v9, 16, v9
41108
- ; GFX10-NEXT: v_lshrrev_b32_e32 v13, 16, v13
41109
41082
; GFX10-NEXT: v_cndmask_b32_e32 v0, v12, v8, vcc_lo
41110
- ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
41111
- ; GFX10-NEXT: v_cndmask_b32_e32 v1, v14, v10, vcc_lo
41083
+ ; GFX10-NEXT: s_mov_b32 vcc_lo, s4
41084
+ ; GFX10-NEXT: v_cndmask_b32_sdwa v7, v12, v8, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
41085
+ ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v2
41086
+ ; GFX10-NEXT: v_perm_b32 v0, v7, v0, 0x5040100
41087
+ ; GFX10-NEXT: v_cndmask_b32_e32 v2, v13, v9, vcc_lo
41112
41088
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
41113
- ; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
41114
- ; GFX10-NEXT: v_cndmask_b32_e32 v3, v13, v9, vcc_lo
41115
- ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v7
41116
- ; GFX10-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
41117
- ; GFX10-NEXT: v_cndmask_b32_e32 v7, v15, v11, vcc_lo
41118
- ; GFX10-NEXT: v_perm_b32 v2, v5, v4, 0x5040100
41119
- ; GFX10-NEXT: v_perm_b32 v3, v7, v6, 0x5040100
41089
+ ; GFX10-NEXT: v_perm_b32 v1, v1, v2, 0x5040100
41090
+ ; GFX10-NEXT: v_cndmask_b32_e32 v3, v15, v11, vcc_lo
41091
+ ; GFX10-NEXT: v_perm_b32 v2, v6, v5, 0x5040100
41092
+ ; GFX10-NEXT: v_perm_b32 v3, v4, v3, 0x5040100
41120
41093
; GFX10-NEXT: s_setpc_b64 s[30:31]
41121
41094
;
41122
41095
; GFX11TRUE16-LABEL: v_vselect_v8bf16:
0 commit comments