@@ -1586,54 +1586,25 @@ define double @minimumnum_double(double %a, double %b) {
15861586 ret double %x
15871587}
15881588
1589- ; TODO Improve the "Expand" path for minimumnum vectors on targets where
1590- ; f16 is not supported. Ideally it should use two f32 minimumnums first instead of
1591- ; fully expanding the minimumnum instruction into compare/select instructions.
15921589define <2 x half > @minimumnum_v2half (<2 x half > %a , <2 x half > %b ) {
15931590; CHECK-NOF16-LABEL: minimumnum_v2half(
15941591; CHECK-NOF16: {
1595- ; CHECK-NOF16-NEXT: .reg .pred %p<13>;
1596- ; CHECK-NOF16-NEXT: .reg .b16 %rs<17>;
1597- ; CHECK-NOF16-NEXT: .reg .b32 %r<11>;
1592+ ; CHECK-NOF16-NEXT: .reg .b16 %rs<7>;
1593+ ; CHECK-NOF16-NEXT: .reg .b32 %r<8>;
15981594; CHECK-NOF16-EMPTY:
15991595; CHECK-NOF16-NEXT: // %bb.0:
16001596; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [minimumnum_v2half_param_0];
1601- ; CHECK-NOF16-NEXT: cvt.f32.f16 %r1, %rs2;
1602- ; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %r1, %r1;
16031597; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [minimumnum_v2half_param_1];
1604- ; CHECK-NOF16-NEXT: selp.b16 %rs5, %rs4, %rs2, %p1;
1605- ; CHECK-NOF16-NEXT: cvt.f32.f16 %r2, %rs5;
1606- ; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4;
1607- ; CHECK-NOF16-NEXT: setp.nan.f32 %p2, %r3, %r3;
1608- ; CHECK-NOF16-NEXT: selp.b16 %rs6, %rs5, %rs4, %p2;
1609- ; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs6;
1610- ; CHECK-NOF16-NEXT: setp.lt.f32 %p3, %r2, %r4;
1611- ; CHECK-NOF16-NEXT: selp.b16 %rs7, %rs5, %rs6, %p3;
1612- ; CHECK-NOF16-NEXT: setp.eq.b16 %p4, %rs5, -32768;
1613- ; CHECK-NOF16-NEXT: selp.b16 %rs8, %rs5, %rs7, %p4;
1614- ; CHECK-NOF16-NEXT: setp.eq.b16 %p5, %rs6, -32768;
1615- ; CHECK-NOF16-NEXT: selp.b16 %rs9, %rs6, %rs8, %p5;
1616- ; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs7;
1617- ; CHECK-NOF16-NEXT: setp.eq.f32 %p6, %r5, 0f00000000;
1618- ; CHECK-NOF16-NEXT: selp.b16 %rs10, %rs9, %rs7, %p6;
1619- ; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs1;
1620- ; CHECK-NOF16-NEXT: setp.nan.f32 %p7, %r6, %r6;
1621- ; CHECK-NOF16-NEXT: selp.b16 %rs11, %rs3, %rs1, %p7;
1622- ; CHECK-NOF16-NEXT: cvt.f32.f16 %r7, %rs11;
1623- ; CHECK-NOF16-NEXT: cvt.f32.f16 %r8, %rs3;
1624- ; CHECK-NOF16-NEXT: setp.nan.f32 %p8, %r8, %r8;
1625- ; CHECK-NOF16-NEXT: selp.b16 %rs12, %rs11, %rs3, %p8;
1626- ; CHECK-NOF16-NEXT: cvt.f32.f16 %r9, %rs12;
1627- ; CHECK-NOF16-NEXT: setp.lt.f32 %p9, %r7, %r9;
1628- ; CHECK-NOF16-NEXT: selp.b16 %rs13, %rs11, %rs12, %p9;
1629- ; CHECK-NOF16-NEXT: setp.eq.b16 %p10, %rs11, -32768;
1630- ; CHECK-NOF16-NEXT: selp.b16 %rs14, %rs11, %rs13, %p10;
1631- ; CHECK-NOF16-NEXT: setp.eq.b16 %p11, %rs12, -32768;
1632- ; CHECK-NOF16-NEXT: selp.b16 %rs15, %rs12, %rs14, %p11;
1633- ; CHECK-NOF16-NEXT: cvt.f32.f16 %r10, %rs13;
1634- ; CHECK-NOF16-NEXT: setp.eq.f32 %p12, %r10, 0f00000000;
1635- ; CHECK-NOF16-NEXT: selp.b16 %rs16, %rs15, %rs13, %p12;
1636- ; CHECK-NOF16-NEXT: st.param.v2.b16 [func_retval0], {%rs16, %rs10};
1598+ ; CHECK-NOF16-NEXT: cvt.f32.f16 %r1, %rs4;
1599+ ; CHECK-NOF16-NEXT: cvt.f32.f16 %r2, %rs2;
1600+ ; CHECK-NOF16-NEXT: min.f32 %r3, %r2, %r1;
1601+ ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs5, %r3;
1602+ ; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs3;
1603+ ; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs1;
1604+ ; CHECK-NOF16-NEXT: min.f32 %r6, %r5, %r4;
1605+ ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs6, %r6;
1606+ ; CHECK-NOF16-NEXT: mov.b32 %r7, {%rs6, %rs5};
1607+ ; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r7;
16371608; CHECK-NOF16-NEXT: ret;
16381609;
16391610; CHECK-F16-LABEL: minimumnum_v2half(
@@ -1649,48 +1620,22 @@ define <2 x half> @minimumnum_v2half(<2 x half> %a, <2 x half> %b) {
16491620;
16501621; CHECK-SM80-NOF16-LABEL: minimumnum_v2half(
16511622; CHECK-SM80-NOF16: {
1652- ; CHECK-SM80-NOF16-NEXT: .reg .pred %p<13>;
1653- ; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<17>;
1654- ; CHECK-SM80-NOF16-NEXT: .reg .b32 %r<11>;
1623+ ; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<7>;
1624+ ; CHECK-SM80-NOF16-NEXT: .reg .b32 %r<8>;
16551625; CHECK-SM80-NOF16-EMPTY:
16561626; CHECK-SM80-NOF16-NEXT: // %bb.0:
16571627; CHECK-SM80-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [minimumnum_v2half_param_0];
1658- ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r1, %rs2;
1659- ; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p1, %r1, %r1;
16601628; CHECK-SM80-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [minimumnum_v2half_param_1];
1661- ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs5, %rs4, %rs2, %p1;
1662- ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r2, %rs5;
1663- ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r3, %rs4;
1664- ; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p2, %r3, %r3;
1665- ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs6, %rs5, %rs4, %p2;
1666- ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r4, %rs6;
1667- ; CHECK-SM80-NOF16-NEXT: setp.lt.f32 %p3, %r2, %r4;
1668- ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs7, %rs5, %rs6, %p3;
1669- ; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p4, %rs5, -32768;
1670- ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs8, %rs5, %rs7, %p4;
1671- ; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p5, %rs6, -32768;
1672- ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs9, %rs6, %rs8, %p5;
1673- ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r5, %rs7;
1674- ; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p6, %r5, 0f00000000;
1675- ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs10, %rs9, %rs7, %p6;
1676- ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r6, %rs1;
1677- ; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p7, %r6, %r6;
1678- ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs11, %rs3, %rs1, %p7;
1679- ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r7, %rs11;
1680- ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r8, %rs3;
1681- ; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p8, %r8, %r8;
1682- ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs12, %rs11, %rs3, %p8;
1683- ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r9, %rs12;
1684- ; CHECK-SM80-NOF16-NEXT: setp.lt.f32 %p9, %r7, %r9;
1685- ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs13, %rs11, %rs12, %p9;
1686- ; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p10, %rs11, -32768;
1687- ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs14, %rs11, %rs13, %p10;
1688- ; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p11, %rs12, -32768;
1689- ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs15, %rs12, %rs14, %p11;
1690- ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r10, %rs13;
1691- ; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p12, %r10, 0f00000000;
1692- ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs16, %rs15, %rs13, %p12;
1693- ; CHECK-SM80-NOF16-NEXT: st.param.v2.b16 [func_retval0], {%rs16, %rs10};
1629+ ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r1, %rs4;
1630+ ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r2, %rs2;
1631+ ; CHECK-SM80-NOF16-NEXT: min.f32 %r3, %r2, %r1;
1632+ ; CHECK-SM80-NOF16-NEXT: cvt.rn.f16.f32 %rs5, %r3;
1633+ ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r4, %rs3;
1634+ ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r5, %rs1;
1635+ ; CHECK-SM80-NOF16-NEXT: min.f32 %r6, %r5, %r4;
1636+ ; CHECK-SM80-NOF16-NEXT: cvt.rn.f16.f32 %rs6, %r6;
1637+ ; CHECK-SM80-NOF16-NEXT: mov.b32 %r7, {%rs6, %rs5};
1638+ ; CHECK-SM80-NOF16-NEXT: st.param.b32 [func_retval0], %r7;
16941639; CHECK-SM80-NOF16-NEXT: ret;
16951640 %x = call <2 x half > @llvm.minimumnum.v2f16 (<2 x half > %a , <2 x half > %b )
16961641 ret <2 x half > %x
@@ -1788,54 +1733,25 @@ define double @maximumnum_double(double %a, double %b) {
17881733 ret double %x
17891734}
17901735
1791- ; TODO Improve the "Expand" path for maximumnum vectors on targets where
1792- ; f16 is not supported. Ideally it should use two f32 maximumnums first instead of
1793- ; fully expanding the maximumnum instruction into compare/select instructions.
17941736define <2 x half > @maximumnum_v2half (<2 x half > %a , <2 x half > %b ) {
17951737; CHECK-NOF16-LABEL: maximumnum_v2half(
17961738; CHECK-NOF16: {
1797- ; CHECK-NOF16-NEXT: .reg .pred %p<13>;
1798- ; CHECK-NOF16-NEXT: .reg .b16 %rs<17>;
1799- ; CHECK-NOF16-NEXT: .reg .b32 %r<11>;
1739+ ; CHECK-NOF16-NEXT: .reg .b16 %rs<7>;
1740+ ; CHECK-NOF16-NEXT: .reg .b32 %r<8>;
18001741; CHECK-NOF16-EMPTY:
18011742; CHECK-NOF16-NEXT: // %bb.0:
18021743; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [maximumnum_v2half_param_0];
1803- ; CHECK-NOF16-NEXT: cvt.f32.f16 %r1, %rs2;
1804- ; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %r1, %r1;
18051744; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [maximumnum_v2half_param_1];
1806- ; CHECK-NOF16-NEXT: selp.b16 %rs5, %rs4, %rs2, %p1;
1807- ; CHECK-NOF16-NEXT: cvt.f32.f16 %r2, %rs5;
1808- ; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4;
1809- ; CHECK-NOF16-NEXT: setp.nan.f32 %p2, %r3, %r3;
1810- ; CHECK-NOF16-NEXT: selp.b16 %rs6, %rs5, %rs4, %p2;
1811- ; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs6;
1812- ; CHECK-NOF16-NEXT: setp.gt.f32 %p3, %r2, %r4;
1813- ; CHECK-NOF16-NEXT: selp.b16 %rs7, %rs5, %rs6, %p3;
1814- ; CHECK-NOF16-NEXT: setp.eq.b16 %p4, %rs5, 0;
1815- ; CHECK-NOF16-NEXT: selp.b16 %rs8, %rs5, %rs7, %p4;
1816- ; CHECK-NOF16-NEXT: setp.eq.b16 %p5, %rs6, 0;
1817- ; CHECK-NOF16-NEXT: selp.b16 %rs9, %rs6, %rs8, %p5;
1818- ; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs7;
1819- ; CHECK-NOF16-NEXT: setp.eq.f32 %p6, %r5, 0f00000000;
1820- ; CHECK-NOF16-NEXT: selp.b16 %rs10, %rs9, %rs7, %p6;
1821- ; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs1;
1822- ; CHECK-NOF16-NEXT: setp.nan.f32 %p7, %r6, %r6;
1823- ; CHECK-NOF16-NEXT: selp.b16 %rs11, %rs3, %rs1, %p7;
1824- ; CHECK-NOF16-NEXT: cvt.f32.f16 %r7, %rs11;
1825- ; CHECK-NOF16-NEXT: cvt.f32.f16 %r8, %rs3;
1826- ; CHECK-NOF16-NEXT: setp.nan.f32 %p8, %r8, %r8;
1827- ; CHECK-NOF16-NEXT: selp.b16 %rs12, %rs11, %rs3, %p8;
1828- ; CHECK-NOF16-NEXT: cvt.f32.f16 %r9, %rs12;
1829- ; CHECK-NOF16-NEXT: setp.gt.f32 %p9, %r7, %r9;
1830- ; CHECK-NOF16-NEXT: selp.b16 %rs13, %rs11, %rs12, %p9;
1831- ; CHECK-NOF16-NEXT: setp.eq.b16 %p10, %rs11, 0;
1832- ; CHECK-NOF16-NEXT: selp.b16 %rs14, %rs11, %rs13, %p10;
1833- ; CHECK-NOF16-NEXT: setp.eq.b16 %p11, %rs12, 0;
1834- ; CHECK-NOF16-NEXT: selp.b16 %rs15, %rs12, %rs14, %p11;
1835- ; CHECK-NOF16-NEXT: cvt.f32.f16 %r10, %rs13;
1836- ; CHECK-NOF16-NEXT: setp.eq.f32 %p12, %r10, 0f00000000;
1837- ; CHECK-NOF16-NEXT: selp.b16 %rs16, %rs15, %rs13, %p12;
1838- ; CHECK-NOF16-NEXT: st.param.v2.b16 [func_retval0], {%rs16, %rs10};
1745+ ; CHECK-NOF16-NEXT: cvt.f32.f16 %r1, %rs4;
1746+ ; CHECK-NOF16-NEXT: cvt.f32.f16 %r2, %rs2;
1747+ ; CHECK-NOF16-NEXT: max.f32 %r3, %r2, %r1;
1748+ ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs5, %r3;
1749+ ; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs3;
1750+ ; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs1;
1751+ ; CHECK-NOF16-NEXT: max.f32 %r6, %r5, %r4;
1752+ ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs6, %r6;
1753+ ; CHECK-NOF16-NEXT: mov.b32 %r7, {%rs6, %rs5};
1754+ ; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r7;
18391755; CHECK-NOF16-NEXT: ret;
18401756;
18411757; CHECK-F16-LABEL: maximumnum_v2half(
@@ -1851,48 +1767,22 @@ define <2 x half> @maximumnum_v2half(<2 x half> %a, <2 x half> %b) {
18511767;
18521768; CHECK-SM80-NOF16-LABEL: maximumnum_v2half(
18531769; CHECK-SM80-NOF16: {
1854- ; CHECK-SM80-NOF16-NEXT: .reg .pred %p<13>;
1855- ; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<17>;
1856- ; CHECK-SM80-NOF16-NEXT: .reg .b32 %r<11>;
1770+ ; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<7>;
1771+ ; CHECK-SM80-NOF16-NEXT: .reg .b32 %r<8>;
18571772; CHECK-SM80-NOF16-EMPTY:
18581773; CHECK-SM80-NOF16-NEXT: // %bb.0:
18591774; CHECK-SM80-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [maximumnum_v2half_param_0];
1860- ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r1, %rs2;
1861- ; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p1, %r1, %r1;
18621775; CHECK-SM80-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [maximumnum_v2half_param_1];
1863- ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs5, %rs4, %rs2, %p1;
1864- ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r2, %rs5;
1865- ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r3, %rs4;
1866- ; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p2, %r3, %r3;
1867- ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs6, %rs5, %rs4, %p2;
1868- ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r4, %rs6;
1869- ; CHECK-SM80-NOF16-NEXT: setp.gt.f32 %p3, %r2, %r4;
1870- ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs7, %rs5, %rs6, %p3;
1871- ; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p4, %rs5, 0;
1872- ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs8, %rs5, %rs7, %p4;
1873- ; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p5, %rs6, 0;
1874- ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs9, %rs6, %rs8, %p5;
1875- ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r5, %rs7;
1876- ; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p6, %r5, 0f00000000;
1877- ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs10, %rs9, %rs7, %p6;
1878- ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r6, %rs1;
1879- ; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p7, %r6, %r6;
1880- ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs11, %rs3, %rs1, %p7;
1881- ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r7, %rs11;
1882- ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r8, %rs3;
1883- ; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p8, %r8, %r8;
1884- ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs12, %rs11, %rs3, %p8;
1885- ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r9, %rs12;
1886- ; CHECK-SM80-NOF16-NEXT: setp.gt.f32 %p9, %r7, %r9;
1887- ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs13, %rs11, %rs12, %p9;
1888- ; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p10, %rs11, 0;
1889- ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs14, %rs11, %rs13, %p10;
1890- ; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p11, %rs12, 0;
1891- ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs15, %rs12, %rs14, %p11;
1892- ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r10, %rs13;
1893- ; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p12, %r10, 0f00000000;
1894- ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs16, %rs15, %rs13, %p12;
1895- ; CHECK-SM80-NOF16-NEXT: st.param.v2.b16 [func_retval0], {%rs16, %rs10};
1776+ ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r1, %rs4;
1777+ ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r2, %rs2;
1778+ ; CHECK-SM80-NOF16-NEXT: max.f32 %r3, %r2, %r1;
1779+ ; CHECK-SM80-NOF16-NEXT: cvt.rn.f16.f32 %rs5, %r3;
1780+ ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r4, %rs3;
1781+ ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r5, %rs1;
1782+ ; CHECK-SM80-NOF16-NEXT: max.f32 %r6, %r5, %r4;
1783+ ; CHECK-SM80-NOF16-NEXT: cvt.rn.f16.f32 %rs6, %r6;
1784+ ; CHECK-SM80-NOF16-NEXT: mov.b32 %r7, {%rs6, %rs5};
1785+ ; CHECK-SM80-NOF16-NEXT: st.param.b32 [func_retval0], %r7;
18961786; CHECK-SM80-NOF16-NEXT: ret;
18971787 %x = call <2 x half > @llvm.maximumnum.v2f16 (<2 x half > %a , <2 x half > %b )
18981788 ret <2 x half > %x
0 commit comments