@@ -1564,7 +1564,7 @@ pub unsafe fn vclezd_f64(a: f64) -> u64 {
15641564/// Compare signed less than zero
15651565#[inline]
15661566#[target_feature(enable = "neon")]
1567- #[cfg_attr(test, assert_instr(sshr ))]
1567+ #[cfg_attr(test, assert_instr(cmlt ))]
15681568#[stable(feature = "neon_intrinsics", since = "1.59.0")]
15691569pub unsafe fn vcltz_s8(a: int8x8_t) -> uint8x8_t {
15701570 let b: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
@@ -1574,7 +1574,7 @@ pub unsafe fn vcltz_s8(a: int8x8_t) -> uint8x8_t {
15741574/// Compare signed less than zero
15751575#[inline]
15761576#[target_feature(enable = "neon")]
1577- #[cfg_attr(test, assert_instr(sshr ))]
1577+ #[cfg_attr(test, assert_instr(cmlt ))]
15781578#[stable(feature = "neon_intrinsics", since = "1.59.0")]
15791579pub unsafe fn vcltzq_s8(a: int8x16_t) -> uint8x16_t {
15801580 let b: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
@@ -1584,7 +1584,7 @@ pub unsafe fn vcltzq_s8(a: int8x16_t) -> uint8x16_t {
15841584/// Compare signed less than zero
15851585#[inline]
15861586#[target_feature(enable = "neon")]
1587- #[cfg_attr(test, assert_instr(sshr ))]
1587+ #[cfg_attr(test, assert_instr(cmlt ))]
15881588#[stable(feature = "neon_intrinsics", since = "1.59.0")]
15891589pub unsafe fn vcltz_s16(a: int16x4_t) -> uint16x4_t {
15901590 let b: i16x4 = i16x4::new(0, 0, 0, 0);
@@ -1594,7 +1594,7 @@ pub unsafe fn vcltz_s16(a: int16x4_t) -> uint16x4_t {
15941594/// Compare signed less than zero
15951595#[inline]
15961596#[target_feature(enable = "neon")]
1597- #[cfg_attr(test, assert_instr(sshr ))]
1597+ #[cfg_attr(test, assert_instr(cmlt ))]
15981598#[stable(feature = "neon_intrinsics", since = "1.59.0")]
15991599pub unsafe fn vcltzq_s16(a: int16x8_t) -> uint16x8_t {
16001600 let b: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
@@ -1604,7 +1604,7 @@ pub unsafe fn vcltzq_s16(a: int16x8_t) -> uint16x8_t {
16041604/// Compare signed less than zero
16051605#[inline]
16061606#[target_feature(enable = "neon")]
1607- #[cfg_attr(test, assert_instr(sshr ))]
1607+ #[cfg_attr(test, assert_instr(cmlt ))]
16081608#[stable(feature = "neon_intrinsics", since = "1.59.0")]
16091609pub unsafe fn vcltz_s32(a: int32x2_t) -> uint32x2_t {
16101610 let b: i32x2 = i32x2::new(0, 0);
@@ -1614,7 +1614,7 @@ pub unsafe fn vcltz_s32(a: int32x2_t) -> uint32x2_t {
16141614/// Compare signed less than zero
16151615#[inline]
16161616#[target_feature(enable = "neon")]
1617- #[cfg_attr(test, assert_instr(sshr ))]
1617+ #[cfg_attr(test, assert_instr(cmlt ))]
16181618#[stable(feature = "neon_intrinsics", since = "1.59.0")]
16191619pub unsafe fn vcltzq_s32(a: int32x4_t) -> uint32x4_t {
16201620 let b: i32x4 = i32x4::new(0, 0, 0, 0);
@@ -1624,7 +1624,7 @@ pub unsafe fn vcltzq_s32(a: int32x4_t) -> uint32x4_t {
16241624/// Compare signed less than zero
16251625#[inline]
16261626#[target_feature(enable = "neon")]
1627- #[cfg_attr(test, assert_instr(sshr ))]
1627+ #[cfg_attr(test, assert_instr(cmlt ))]
16281628#[stable(feature = "neon_intrinsics", since = "1.59.0")]
16291629pub unsafe fn vcltz_s64(a: int64x1_t) -> uint64x1_t {
16301630 let b: i64x1 = i64x1::new(0);
@@ -1634,7 +1634,7 @@ pub unsafe fn vcltz_s64(a: int64x1_t) -> uint64x1_t {
16341634/// Compare signed less than zero
16351635#[inline]
16361636#[target_feature(enable = "neon")]
1637- #[cfg_attr(test, assert_instr(sshr ))]
1637+ #[cfg_attr(test, assert_instr(cmlt ))]
16381638#[stable(feature = "neon_intrinsics", since = "1.59.0")]
16391639pub unsafe fn vcltzq_s64(a: int64x2_t) -> uint64x2_t {
16401640 let b: i64x2 = i64x2::new(0, 0);
@@ -2714,7 +2714,7 @@ pub unsafe fn vcopyq_lane_p16<const LANE1: i32, const LANE2: i32>(a: poly16x8_t,
27142714/// Insert vector element from another vector element
27152715#[inline]
27162716#[target_feature(enable = "neon")]
2717- #[cfg_attr(test, assert_instr(zip1 , LANE1 = 1, LANE2 = 0))]
2717+ #[cfg_attr(test, assert_instr(mov , LANE1 = 1, LANE2 = 0))]
27182718#[rustc_legacy_const_generics(1, 3)]
27192719#[stable(feature = "neon_intrinsics", since = "1.59.0")]
27202720pub unsafe fn vcopyq_lane_s64<const LANE1: i32, const LANE2: i32>(a: int64x2_t, b: int64x1_t) -> int64x2_t {
@@ -2731,7 +2731,7 @@ pub unsafe fn vcopyq_lane_s64<const LANE1: i32, const LANE2: i32>(a: int64x2_t,
27312731/// Insert vector element from another vector element
27322732#[inline]
27332733#[target_feature(enable = "neon")]
2734- #[cfg_attr(test, assert_instr(zip1 , LANE1 = 1, LANE2 = 0))]
2734+ #[cfg_attr(test, assert_instr(mov , LANE1 = 1, LANE2 = 0))]
27352735#[rustc_legacy_const_generics(1, 3)]
27362736#[stable(feature = "neon_intrinsics", since = "1.59.0")]
27372737pub unsafe fn vcopyq_lane_u64<const LANE1: i32, const LANE2: i32>(a: uint64x2_t, b: uint64x1_t) -> uint64x2_t {
@@ -2748,7 +2748,7 @@ pub unsafe fn vcopyq_lane_u64<const LANE1: i32, const LANE2: i32>(a: uint64x2_t,
27482748/// Insert vector element from another vector element
27492749#[inline]
27502750#[target_feature(enable = "neon")]
2751- #[cfg_attr(test, assert_instr(zip1 , LANE1 = 1, LANE2 = 0))]
2751+ #[cfg_attr(test, assert_instr(mov , LANE1 = 1, LANE2 = 0))]
27522752#[rustc_legacy_const_generics(1, 3)]
27532753#[stable(feature = "neon_intrinsics", since = "1.59.0")]
27542754pub unsafe fn vcopyq_lane_p64<const LANE1: i32, const LANE2: i32>(a: poly64x2_t, b: poly64x1_t) -> poly64x2_t {
@@ -2784,7 +2784,7 @@ pub unsafe fn vcopyq_lane_f32<const LANE1: i32, const LANE2: i32>(a: float32x4_t
27842784/// Insert vector element from another vector element
27852785#[inline]
27862786#[target_feature(enable = "neon")]
2787- #[cfg_attr(test, assert_instr(zip1 , LANE1 = 1, LANE2 = 0))]
2787+ #[cfg_attr(test, assert_instr(mov , LANE1 = 1, LANE2 = 0))]
27882788#[rustc_legacy_const_generics(1, 3)]
27892789#[stable(feature = "neon_intrinsics", since = "1.59.0")]
27902790pub unsafe fn vcopyq_lane_f64<const LANE1: i32, const LANE2: i32>(a: float64x2_t, b: float64x1_t) -> float64x2_t {
@@ -9183,7 +9183,7 @@ pub unsafe fn vmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
91839183 vmaxq_f64_(a, b)
91849184}
91859185
9186- /// Floating-point Maximun Number (vector)
9186+ /// Floating-point Maximum Number (vector)
91879187#[inline]
91889188#[target_feature(enable = "neon")]
91899189#[cfg_attr(test, assert_instr(fmaxnm))]
@@ -9197,7 +9197,7 @@ pub unsafe fn vmaxnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
91979197 vmaxnm_f64_(a, b)
91989198}
91999199
9200- /// Floating-point Maximun Number (vector)
9200+ /// Floating-point Maximum Number (vector)
92019201#[inline]
92029202#[target_feature(enable = "neon")]
92039203#[cfg_attr(test, assert_instr(fmaxnm))]
@@ -9379,7 +9379,7 @@ pub unsafe fn vminq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
93799379 vminq_f64_(a, b)
93809380}
93819381
9382- /// Floating-point Minimun Number (vector)
9382+ /// Floating-point Minimum Number (vector)
93839383#[inline]
93849384#[target_feature(enable = "neon")]
93859385#[cfg_attr(test, assert_instr(fminnm))]
@@ -9393,7 +9393,7 @@ pub unsafe fn vminnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
93939393 vminnm_f64_(a, b)
93949394}
93959395
9396- /// Floating-point Minimun Number (vector)
9396+ /// Floating-point Minimum Number (vector)
93979397#[inline]
93989398#[target_feature(enable = "neon")]
93999399#[cfg_attr(test, assert_instr(fminnm))]
@@ -10535,31 +10535,51 @@ pub unsafe fn vqrdmulhs_laneq_s32<const LANE: i32>(a: i32, b: int32x4_t) -> i32
1053510535#[target_feature(enable = "rdm")]
1053610536#[cfg_attr(test, assert_instr(sqrdmlah))]
1053710537pub unsafe fn vqrdmlah_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
10538- vqadd_s16(a, vqrdmulh_s16(b, c))
10538+ #[allow(improper_ctypes)]
10539+ extern "unadjusted" {
10540+ #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmlah.v4i16")]
10541+ fn vqrdmlah_s16_(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t;
10542+ }
10543+ vqrdmlah_s16_(a, b, c)
1053910544}
1054010545
1054110546/// Signed saturating rounding doubling multiply accumulate returning high half
1054210547#[inline]
1054310548#[target_feature(enable = "rdm")]
1054410549#[cfg_attr(test, assert_instr(sqrdmlah))]
1054510550pub unsafe fn vqrdmlahq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
10546- vqaddq_s16(a, vqrdmulhq_s16(b, c))
10551+ #[allow(improper_ctypes)]
10552+ extern "unadjusted" {
10553+ #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmlah.v8i16")]
10554+ fn vqrdmlahq_s16_(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t;
10555+ }
10556+ vqrdmlahq_s16_(a, b, c)
1054710557}
1054810558
1054910559/// Signed saturating rounding doubling multiply accumulate returning high half
1055010560#[inline]
1055110561#[target_feature(enable = "rdm")]
1055210562#[cfg_attr(test, assert_instr(sqrdmlah))]
1055310563pub unsafe fn vqrdmlah_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
10554- vqadd_s32(a, vqrdmulh_s32(b, c))
10564+ #[allow(improper_ctypes)]
10565+ extern "unadjusted" {
10566+ #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmlah.v2i32")]
10567+ fn vqrdmlah_s32_(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t;
10568+ }
10569+ vqrdmlah_s32_(a, b, c)
1055510570}
1055610571
1055710572/// Signed saturating rounding doubling multiply accumulate returning high half
1055810573#[inline]
1055910574#[target_feature(enable = "rdm")]
1056010575#[cfg_attr(test, assert_instr(sqrdmlah))]
1056110576pub unsafe fn vqrdmlahq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
10562- vqaddq_s32(a, vqrdmulhq_s32(b, c))
10577+ #[allow(improper_ctypes)]
10578+ extern "unadjusted" {
10579+ #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmlah.v4i32")]
10580+ fn vqrdmlahq_s32_(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
10581+ }
10582+ vqrdmlahq_s32_(a, b, c)
1056310583}
1056410584
1056510585/// Signed saturating rounding doubling multiply accumulate returning high half
@@ -10591,7 +10611,8 @@ pub unsafe fn vqrdmlahs_s32(a: i32, b: i32, c: i32) -> i32 {
1059110611#[rustc_legacy_const_generics(3)]
1059210612pub unsafe fn vqrdmlah_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
1059310613 static_assert_imm2!(LANE);
10594- vqadd_s16(a, vqrdmulh_lane_s16::<LANE>(b, c))
10614+ let c: int16x4_t = simd_shuffle4!(c, c, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
10615+ vqrdmlah_s16(a, b, c)
1059510616}
1059610617
1059710618/// Signed saturating rounding doubling multiply accumulate returning high half
@@ -10601,7 +10622,8 @@ pub unsafe fn vqrdmlah_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c:
1060110622#[rustc_legacy_const_generics(3)]
1060210623pub unsafe fn vqrdmlah_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t {
1060310624 static_assert_imm3!(LANE);
10604- vqadd_s16(a, vqrdmulh_laneq_s16::<LANE>(b, c))
10625+ let c: int16x4_t = simd_shuffle4!(c, c, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
10626+ vqrdmlah_s16(a, b, c)
1060510627}
1060610628
1060710629/// Signed saturating rounding doubling multiply accumulate returning high half
@@ -10611,7 +10633,8 @@ pub unsafe fn vqrdmlah_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c:
1061110633#[rustc_legacy_const_generics(3)]
1061210634pub unsafe fn vqrdmlahq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t {
1061310635 static_assert_imm2!(LANE);
10614- vqaddq_s16(a, vqrdmulhq_lane_s16::<LANE>(b, c))
10636+ let c: int16x8_t = simd_shuffle8!(c, c, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
10637+ vqrdmlahq_s16(a, b, c)
1061510638}
1061610639
1061710640/// Signed saturating rounding doubling multiply accumulate returning high half
@@ -10621,7 +10644,8 @@ pub unsafe fn vqrdmlahq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c:
1062110644#[rustc_legacy_const_generics(3)]
1062210645pub unsafe fn vqrdmlahq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
1062310646 static_assert_imm3!(LANE);
10624- vqaddq_s16(a, vqrdmulhq_laneq_s16::<LANE>(b, c))
10647+ let c: int16x8_t = simd_shuffle8!(c, c, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
10648+ vqrdmlahq_s16(a, b, c)
1062510649}
1062610650
1062710651/// Signed saturating rounding doubling multiply accumulate returning high half
@@ -10631,7 +10655,8 @@ pub unsafe fn vqrdmlahq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c
1063110655#[rustc_legacy_const_generics(3)]
1063210656pub unsafe fn vqrdmlah_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
1063310657 static_assert_imm1!(LANE);
10634- vqadd_s32(a, vqrdmulh_lane_s32::<LANE>(b, c))
10658+ let c: int32x2_t = simd_shuffle2!(c, c, <const LANE: i32> [LANE as u32, LANE as u32]);
10659+ vqrdmlah_s32(a, b, c)
1063510660}
1063610661
1063710662/// Signed saturating rounding doubling multiply accumulate returning high half
@@ -10641,7 +10666,8 @@ pub unsafe fn vqrdmlah_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c:
1064110666#[rustc_legacy_const_generics(3)]
1064210667pub unsafe fn vqrdmlah_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t {
1064310668 static_assert_imm2!(LANE);
10644- vqadd_s32(a, vqrdmulh_laneq_s32::<LANE>(b, c))
10669+ let c: int32x2_t = simd_shuffle2!(c, c, <const LANE: i32> [LANE as u32, LANE as u32]);
10670+ vqrdmlah_s32(a, b, c)
1064510671}
1064610672
1064710673/// Signed saturating rounding doubling multiply accumulate returning high half
@@ -10651,7 +10677,8 @@ pub unsafe fn vqrdmlah_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c:
1065110677#[rustc_legacy_const_generics(3)]
1065210678pub unsafe fn vqrdmlahq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t {
1065310679 static_assert_imm1!(LANE);
10654- vqaddq_s32(a, vqrdmulhq_lane_s32::<LANE>(b, c))
10680+ let c: int32x4_t = simd_shuffle4!(c, c, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
10681+ vqrdmlahq_s32(a, b, c)
1065510682}
1065610683
1065710684/// Signed saturating rounding doubling multiply accumulate returning high half
@@ -10661,7 +10688,8 @@ pub unsafe fn vqrdmlahq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c:
1066110688#[rustc_legacy_const_generics(3)]
1066210689pub unsafe fn vqrdmlahq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
1066310690 static_assert_imm2!(LANE);
10664- vqaddq_s32(a, vqrdmulhq_laneq_s32::<LANE>(b, c))
10691+ let c: int32x4_t = simd_shuffle4!(c, c, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
10692+ vqrdmlahq_s32(a, b, c)
1066510693}
1066610694
1066710695/// Signed saturating rounding doubling multiply accumulate returning high half
0 commit comments