Skip to content

Commit a27baf9

Browse files
[SelectionDAG] Improve v2f16 maximumnum expansion (#160723)
On targets where f32 maximumnum is legal, but maximumnum on vectors of smaller types is not legal (e.g. v2f16), try unrolling the vector first as part of the expansion. Only fall back to expanding the full maximumnum computation into compares + selects if maximumnum on the scalar element type cannot be supported.
1 parent 81aafd9 commit a27baf9

File tree

3 files changed

+165
-405
lines changed

3 files changed

+165
-405
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8839,7 +8839,9 @@ SDValue TargetLowering::expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *Node,
88398839
return DAG.getNode(IEEE2008Op, DL, VT, LHS, RHS, Flags);
88408840
}
88418841

8842-
if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
8842+
if (VT.isVector() &&
8843+
(isOperationLegalOrCustomOrPromote(Opc, VT.getVectorElementType()) ||
8844+
!isOperationLegalOrCustom(ISD::VSELECT, VT)))
88438845
return DAG.UnrollVectorOp(Node);
88448846

88458847
// If only one operand is NaN, override it with another operand.

llvm/test/CodeGen/NVPTX/math-intrins.ll

Lines changed: 48 additions & 158 deletions
Original file line numberDiff line numberDiff line change
@@ -1586,54 +1586,25 @@ define double @minimumnum_double(double %a, double %b) {
15861586
ret double %x
15871587
}
15881588

1589-
; TODO Improve the "Expand" path for minimumnum vectors on targets where
1590-
; f16 is not supported. Ideally it should use two f32 minimumnums first instead of
1591-
; fully expanding the minimumnum instruction into compare/select instructions.
15921589
define <2 x half> @minimumnum_v2half(<2 x half> %a, <2 x half> %b) {
15931590
; CHECK-NOF16-LABEL: minimumnum_v2half(
15941591
; CHECK-NOF16: {
1595-
; CHECK-NOF16-NEXT: .reg .pred %p<13>;
1596-
; CHECK-NOF16-NEXT: .reg .b16 %rs<17>;
1597-
; CHECK-NOF16-NEXT: .reg .b32 %r<11>;
1592+
; CHECK-NOF16-NEXT: .reg .b16 %rs<7>;
1593+
; CHECK-NOF16-NEXT: .reg .b32 %r<8>;
15981594
; CHECK-NOF16-EMPTY:
15991595
; CHECK-NOF16-NEXT: // %bb.0:
16001596
; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [minimumnum_v2half_param_0];
1601-
; CHECK-NOF16-NEXT: cvt.f32.f16 %r1, %rs2;
1602-
; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %r1, %r1;
16031597
; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [minimumnum_v2half_param_1];
1604-
; CHECK-NOF16-NEXT: selp.b16 %rs5, %rs4, %rs2, %p1;
1605-
; CHECK-NOF16-NEXT: cvt.f32.f16 %r2, %rs5;
1606-
; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4;
1607-
; CHECK-NOF16-NEXT: setp.nan.f32 %p2, %r3, %r3;
1608-
; CHECK-NOF16-NEXT: selp.b16 %rs6, %rs5, %rs4, %p2;
1609-
; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs6;
1610-
; CHECK-NOF16-NEXT: setp.lt.f32 %p3, %r2, %r4;
1611-
; CHECK-NOF16-NEXT: selp.b16 %rs7, %rs5, %rs6, %p3;
1612-
; CHECK-NOF16-NEXT: setp.eq.b16 %p4, %rs5, -32768;
1613-
; CHECK-NOF16-NEXT: selp.b16 %rs8, %rs5, %rs7, %p4;
1614-
; CHECK-NOF16-NEXT: setp.eq.b16 %p5, %rs6, -32768;
1615-
; CHECK-NOF16-NEXT: selp.b16 %rs9, %rs6, %rs8, %p5;
1616-
; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs7;
1617-
; CHECK-NOF16-NEXT: setp.eq.f32 %p6, %r5, 0f00000000;
1618-
; CHECK-NOF16-NEXT: selp.b16 %rs10, %rs9, %rs7, %p6;
1619-
; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs1;
1620-
; CHECK-NOF16-NEXT: setp.nan.f32 %p7, %r6, %r6;
1621-
; CHECK-NOF16-NEXT: selp.b16 %rs11, %rs3, %rs1, %p7;
1622-
; CHECK-NOF16-NEXT: cvt.f32.f16 %r7, %rs11;
1623-
; CHECK-NOF16-NEXT: cvt.f32.f16 %r8, %rs3;
1624-
; CHECK-NOF16-NEXT: setp.nan.f32 %p8, %r8, %r8;
1625-
; CHECK-NOF16-NEXT: selp.b16 %rs12, %rs11, %rs3, %p8;
1626-
; CHECK-NOF16-NEXT: cvt.f32.f16 %r9, %rs12;
1627-
; CHECK-NOF16-NEXT: setp.lt.f32 %p9, %r7, %r9;
1628-
; CHECK-NOF16-NEXT: selp.b16 %rs13, %rs11, %rs12, %p9;
1629-
; CHECK-NOF16-NEXT: setp.eq.b16 %p10, %rs11, -32768;
1630-
; CHECK-NOF16-NEXT: selp.b16 %rs14, %rs11, %rs13, %p10;
1631-
; CHECK-NOF16-NEXT: setp.eq.b16 %p11, %rs12, -32768;
1632-
; CHECK-NOF16-NEXT: selp.b16 %rs15, %rs12, %rs14, %p11;
1633-
; CHECK-NOF16-NEXT: cvt.f32.f16 %r10, %rs13;
1634-
; CHECK-NOF16-NEXT: setp.eq.f32 %p12, %r10, 0f00000000;
1635-
; CHECK-NOF16-NEXT: selp.b16 %rs16, %rs15, %rs13, %p12;
1636-
; CHECK-NOF16-NEXT: st.param.v2.b16 [func_retval0], {%rs16, %rs10};
1598+
; CHECK-NOF16-NEXT: cvt.f32.f16 %r1, %rs4;
1599+
; CHECK-NOF16-NEXT: cvt.f32.f16 %r2, %rs2;
1600+
; CHECK-NOF16-NEXT: min.f32 %r3, %r2, %r1;
1601+
; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs5, %r3;
1602+
; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs3;
1603+
; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs1;
1604+
; CHECK-NOF16-NEXT: min.f32 %r6, %r5, %r4;
1605+
; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs6, %r6;
1606+
; CHECK-NOF16-NEXT: mov.b32 %r7, {%rs6, %rs5};
1607+
; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r7;
16371608
; CHECK-NOF16-NEXT: ret;
16381609
;
16391610
; CHECK-F16-LABEL: minimumnum_v2half(
@@ -1649,48 +1620,22 @@ define <2 x half> @minimumnum_v2half(<2 x half> %a, <2 x half> %b) {
16491620
;
16501621
; CHECK-SM80-NOF16-LABEL: minimumnum_v2half(
16511622
; CHECK-SM80-NOF16: {
1652-
; CHECK-SM80-NOF16-NEXT: .reg .pred %p<13>;
1653-
; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<17>;
1654-
; CHECK-SM80-NOF16-NEXT: .reg .b32 %r<11>;
1623+
; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<7>;
1624+
; CHECK-SM80-NOF16-NEXT: .reg .b32 %r<8>;
16551625
; CHECK-SM80-NOF16-EMPTY:
16561626
; CHECK-SM80-NOF16-NEXT: // %bb.0:
16571627
; CHECK-SM80-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [minimumnum_v2half_param_0];
1658-
; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r1, %rs2;
1659-
; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p1, %r1, %r1;
16601628
; CHECK-SM80-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [minimumnum_v2half_param_1];
1661-
; CHECK-SM80-NOF16-NEXT: selp.b16 %rs5, %rs4, %rs2, %p1;
1662-
; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r2, %rs5;
1663-
; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r3, %rs4;
1664-
; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p2, %r3, %r3;
1665-
; CHECK-SM80-NOF16-NEXT: selp.b16 %rs6, %rs5, %rs4, %p2;
1666-
; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r4, %rs6;
1667-
; CHECK-SM80-NOF16-NEXT: setp.lt.f32 %p3, %r2, %r4;
1668-
; CHECK-SM80-NOF16-NEXT: selp.b16 %rs7, %rs5, %rs6, %p3;
1669-
; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p4, %rs5, -32768;
1670-
; CHECK-SM80-NOF16-NEXT: selp.b16 %rs8, %rs5, %rs7, %p4;
1671-
; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p5, %rs6, -32768;
1672-
; CHECK-SM80-NOF16-NEXT: selp.b16 %rs9, %rs6, %rs8, %p5;
1673-
; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r5, %rs7;
1674-
; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p6, %r5, 0f00000000;
1675-
; CHECK-SM80-NOF16-NEXT: selp.b16 %rs10, %rs9, %rs7, %p6;
1676-
; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r6, %rs1;
1677-
; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p7, %r6, %r6;
1678-
; CHECK-SM80-NOF16-NEXT: selp.b16 %rs11, %rs3, %rs1, %p7;
1679-
; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r7, %rs11;
1680-
; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r8, %rs3;
1681-
; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p8, %r8, %r8;
1682-
; CHECK-SM80-NOF16-NEXT: selp.b16 %rs12, %rs11, %rs3, %p8;
1683-
; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r9, %rs12;
1684-
; CHECK-SM80-NOF16-NEXT: setp.lt.f32 %p9, %r7, %r9;
1685-
; CHECK-SM80-NOF16-NEXT: selp.b16 %rs13, %rs11, %rs12, %p9;
1686-
; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p10, %rs11, -32768;
1687-
; CHECK-SM80-NOF16-NEXT: selp.b16 %rs14, %rs11, %rs13, %p10;
1688-
; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p11, %rs12, -32768;
1689-
; CHECK-SM80-NOF16-NEXT: selp.b16 %rs15, %rs12, %rs14, %p11;
1690-
; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r10, %rs13;
1691-
; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p12, %r10, 0f00000000;
1692-
; CHECK-SM80-NOF16-NEXT: selp.b16 %rs16, %rs15, %rs13, %p12;
1693-
; CHECK-SM80-NOF16-NEXT: st.param.v2.b16 [func_retval0], {%rs16, %rs10};
1629+
; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r1, %rs4;
1630+
; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r2, %rs2;
1631+
; CHECK-SM80-NOF16-NEXT: min.f32 %r3, %r2, %r1;
1632+
; CHECK-SM80-NOF16-NEXT: cvt.rn.f16.f32 %rs5, %r3;
1633+
; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r4, %rs3;
1634+
; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r5, %rs1;
1635+
; CHECK-SM80-NOF16-NEXT: min.f32 %r6, %r5, %r4;
1636+
; CHECK-SM80-NOF16-NEXT: cvt.rn.f16.f32 %rs6, %r6;
1637+
; CHECK-SM80-NOF16-NEXT: mov.b32 %r7, {%rs6, %rs5};
1638+
; CHECK-SM80-NOF16-NEXT: st.param.b32 [func_retval0], %r7;
16941639
; CHECK-SM80-NOF16-NEXT: ret;
16951640
%x = call <2 x half> @llvm.minimumnum.v2f16(<2 x half> %a, <2 x half> %b)
16961641
ret <2 x half> %x
@@ -1788,54 +1733,25 @@ define double @maximumnum_double(double %a, double %b) {
17881733
ret double %x
17891734
}
17901735

1791-
; TODO Improve the "Expand" path for maximumnum vectors on targets where
1792-
; f16 is not supported. Ideally it should use two f32 maximumnums first instead of
1793-
; fully expanding the maximumnum instruction into compare/select instructions.
17941736
define <2 x half> @maximumnum_v2half(<2 x half> %a, <2 x half> %b) {
17951737
; CHECK-NOF16-LABEL: maximumnum_v2half(
17961738
; CHECK-NOF16: {
1797-
; CHECK-NOF16-NEXT: .reg .pred %p<13>;
1798-
; CHECK-NOF16-NEXT: .reg .b16 %rs<17>;
1799-
; CHECK-NOF16-NEXT: .reg .b32 %r<11>;
1739+
; CHECK-NOF16-NEXT: .reg .b16 %rs<7>;
1740+
; CHECK-NOF16-NEXT: .reg .b32 %r<8>;
18001741
; CHECK-NOF16-EMPTY:
18011742
; CHECK-NOF16-NEXT: // %bb.0:
18021743
; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [maximumnum_v2half_param_0];
1803-
; CHECK-NOF16-NEXT: cvt.f32.f16 %r1, %rs2;
1804-
; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %r1, %r1;
18051744
; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [maximumnum_v2half_param_1];
1806-
; CHECK-NOF16-NEXT: selp.b16 %rs5, %rs4, %rs2, %p1;
1807-
; CHECK-NOF16-NEXT: cvt.f32.f16 %r2, %rs5;
1808-
; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4;
1809-
; CHECK-NOF16-NEXT: setp.nan.f32 %p2, %r3, %r3;
1810-
; CHECK-NOF16-NEXT: selp.b16 %rs6, %rs5, %rs4, %p2;
1811-
; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs6;
1812-
; CHECK-NOF16-NEXT: setp.gt.f32 %p3, %r2, %r4;
1813-
; CHECK-NOF16-NEXT: selp.b16 %rs7, %rs5, %rs6, %p3;
1814-
; CHECK-NOF16-NEXT: setp.eq.b16 %p4, %rs5, 0;
1815-
; CHECK-NOF16-NEXT: selp.b16 %rs8, %rs5, %rs7, %p4;
1816-
; CHECK-NOF16-NEXT: setp.eq.b16 %p5, %rs6, 0;
1817-
; CHECK-NOF16-NEXT: selp.b16 %rs9, %rs6, %rs8, %p5;
1818-
; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs7;
1819-
; CHECK-NOF16-NEXT: setp.eq.f32 %p6, %r5, 0f00000000;
1820-
; CHECK-NOF16-NEXT: selp.b16 %rs10, %rs9, %rs7, %p6;
1821-
; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs1;
1822-
; CHECK-NOF16-NEXT: setp.nan.f32 %p7, %r6, %r6;
1823-
; CHECK-NOF16-NEXT: selp.b16 %rs11, %rs3, %rs1, %p7;
1824-
; CHECK-NOF16-NEXT: cvt.f32.f16 %r7, %rs11;
1825-
; CHECK-NOF16-NEXT: cvt.f32.f16 %r8, %rs3;
1826-
; CHECK-NOF16-NEXT: setp.nan.f32 %p8, %r8, %r8;
1827-
; CHECK-NOF16-NEXT: selp.b16 %rs12, %rs11, %rs3, %p8;
1828-
; CHECK-NOF16-NEXT: cvt.f32.f16 %r9, %rs12;
1829-
; CHECK-NOF16-NEXT: setp.gt.f32 %p9, %r7, %r9;
1830-
; CHECK-NOF16-NEXT: selp.b16 %rs13, %rs11, %rs12, %p9;
1831-
; CHECK-NOF16-NEXT: setp.eq.b16 %p10, %rs11, 0;
1832-
; CHECK-NOF16-NEXT: selp.b16 %rs14, %rs11, %rs13, %p10;
1833-
; CHECK-NOF16-NEXT: setp.eq.b16 %p11, %rs12, 0;
1834-
; CHECK-NOF16-NEXT: selp.b16 %rs15, %rs12, %rs14, %p11;
1835-
; CHECK-NOF16-NEXT: cvt.f32.f16 %r10, %rs13;
1836-
; CHECK-NOF16-NEXT: setp.eq.f32 %p12, %r10, 0f00000000;
1837-
; CHECK-NOF16-NEXT: selp.b16 %rs16, %rs15, %rs13, %p12;
1838-
; CHECK-NOF16-NEXT: st.param.v2.b16 [func_retval0], {%rs16, %rs10};
1745+
; CHECK-NOF16-NEXT: cvt.f32.f16 %r1, %rs4;
1746+
; CHECK-NOF16-NEXT: cvt.f32.f16 %r2, %rs2;
1747+
; CHECK-NOF16-NEXT: max.f32 %r3, %r2, %r1;
1748+
; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs5, %r3;
1749+
; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs3;
1750+
; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs1;
1751+
; CHECK-NOF16-NEXT: max.f32 %r6, %r5, %r4;
1752+
; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs6, %r6;
1753+
; CHECK-NOF16-NEXT: mov.b32 %r7, {%rs6, %rs5};
1754+
; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r7;
18391755
; CHECK-NOF16-NEXT: ret;
18401756
;
18411757
; CHECK-F16-LABEL: maximumnum_v2half(
@@ -1851,48 +1767,22 @@ define <2 x half> @maximumnum_v2half(<2 x half> %a, <2 x half> %b) {
18511767
;
18521768
; CHECK-SM80-NOF16-LABEL: maximumnum_v2half(
18531769
; CHECK-SM80-NOF16: {
1854-
; CHECK-SM80-NOF16-NEXT: .reg .pred %p<13>;
1855-
; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<17>;
1856-
; CHECK-SM80-NOF16-NEXT: .reg .b32 %r<11>;
1770+
; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<7>;
1771+
; CHECK-SM80-NOF16-NEXT: .reg .b32 %r<8>;
18571772
; CHECK-SM80-NOF16-EMPTY:
18581773
; CHECK-SM80-NOF16-NEXT: // %bb.0:
18591774
; CHECK-SM80-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [maximumnum_v2half_param_0];
1860-
; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r1, %rs2;
1861-
; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p1, %r1, %r1;
18621775
; CHECK-SM80-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [maximumnum_v2half_param_1];
1863-
; CHECK-SM80-NOF16-NEXT: selp.b16 %rs5, %rs4, %rs2, %p1;
1864-
; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r2, %rs5;
1865-
; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r3, %rs4;
1866-
; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p2, %r3, %r3;
1867-
; CHECK-SM80-NOF16-NEXT: selp.b16 %rs6, %rs5, %rs4, %p2;
1868-
; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r4, %rs6;
1869-
; CHECK-SM80-NOF16-NEXT: setp.gt.f32 %p3, %r2, %r4;
1870-
; CHECK-SM80-NOF16-NEXT: selp.b16 %rs7, %rs5, %rs6, %p3;
1871-
; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p4, %rs5, 0;
1872-
; CHECK-SM80-NOF16-NEXT: selp.b16 %rs8, %rs5, %rs7, %p4;
1873-
; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p5, %rs6, 0;
1874-
; CHECK-SM80-NOF16-NEXT: selp.b16 %rs9, %rs6, %rs8, %p5;
1875-
; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r5, %rs7;
1876-
; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p6, %r5, 0f00000000;
1877-
; CHECK-SM80-NOF16-NEXT: selp.b16 %rs10, %rs9, %rs7, %p6;
1878-
; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r6, %rs1;
1879-
; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p7, %r6, %r6;
1880-
; CHECK-SM80-NOF16-NEXT: selp.b16 %rs11, %rs3, %rs1, %p7;
1881-
; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r7, %rs11;
1882-
; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r8, %rs3;
1883-
; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p8, %r8, %r8;
1884-
; CHECK-SM80-NOF16-NEXT: selp.b16 %rs12, %rs11, %rs3, %p8;
1885-
; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r9, %rs12;
1886-
; CHECK-SM80-NOF16-NEXT: setp.gt.f32 %p9, %r7, %r9;
1887-
; CHECK-SM80-NOF16-NEXT: selp.b16 %rs13, %rs11, %rs12, %p9;
1888-
; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p10, %rs11, 0;
1889-
; CHECK-SM80-NOF16-NEXT: selp.b16 %rs14, %rs11, %rs13, %p10;
1890-
; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p11, %rs12, 0;
1891-
; CHECK-SM80-NOF16-NEXT: selp.b16 %rs15, %rs12, %rs14, %p11;
1892-
; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r10, %rs13;
1893-
; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p12, %r10, 0f00000000;
1894-
; CHECK-SM80-NOF16-NEXT: selp.b16 %rs16, %rs15, %rs13, %p12;
1895-
; CHECK-SM80-NOF16-NEXT: st.param.v2.b16 [func_retval0], {%rs16, %rs10};
1776+
; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r1, %rs4;
1777+
; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r2, %rs2;
1778+
; CHECK-SM80-NOF16-NEXT: max.f32 %r3, %r2, %r1;
1779+
; CHECK-SM80-NOF16-NEXT: cvt.rn.f16.f32 %rs5, %r3;
1780+
; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r4, %rs3;
1781+
; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r5, %rs1;
1782+
; CHECK-SM80-NOF16-NEXT: max.f32 %r6, %r5, %r4;
1783+
; CHECK-SM80-NOF16-NEXT: cvt.rn.f16.f32 %rs6, %r6;
1784+
; CHECK-SM80-NOF16-NEXT: mov.b32 %r7, {%rs6, %rs5};
1785+
; CHECK-SM80-NOF16-NEXT: st.param.b32 [func_retval0], %r7;
18961786
; CHECK-SM80-NOF16-NEXT: ret;
18971787
%x = call <2 x half> @llvm.maximumnum.v2f16(<2 x half> %a, <2 x half> %b)
18981788
ret <2 x half> %x

0 commit comments

Comments
 (0)