Skip to content
4 changes: 4 additions & 0 deletions crates/core_arch/src/simd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -201,3 +201,7 @@ simd_ty!(i32x16[i32]:
simd_ty!(i64x8[i64]:
i64, i64, i64, i64, i64, i64, i64, i64
| x0, x1, x2, x3, x4, x5, x6, x7);

simd_ty!(u64x8[u64]:
u64, u64, u64, u64, u64, u64, u64, u64
| x0, x1, x2, x3, x4, x5, x6, x7);
126 changes: 126 additions & 0 deletions crates/core_arch/src/x86/avx512f.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,132 @@ pub unsafe fn _mm512_set1_epi64(a: i64) -> __m512i {
transmute(i64x8::splat(a))
}

/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epu64)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
simd_bitmask::<__m512i, _>(simd_lt(a.as_u64x8(), b.as_u64x8()))
}

///Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector k
/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epu64)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmplt_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
_mm512_cmplt_epu64_mask(a, b) & m
}

/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_epu64)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmpgt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
simd_bitmask::<__m512i, _>(simd_gt(a.as_u64x8(), b.as_u64x8()))
}

///Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector k
/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_epu64)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpgt_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
_mm512_cmpgt_epu64_mask(a, b) & m
}

/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epu64)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmpeq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
simd_bitmask::<__m512i, _>(simd_eq(a.as_u64x8(), b.as_u64x8()))
}

///Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector k
/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_epu64)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpeq_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
_mm512_cmpeq_epu64_mask(a, b) & m
}

/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epi64)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmplt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
simd_bitmask::<__m512i, _>(simd_lt(a.as_i64x8(), b.as_i64x8()))
}

///Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector k
/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epi64)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmplt_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
_mm512_cmplt_epi64_mask(a, b) & m
}

/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_epi64)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmpgt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
simd_bitmask::<__m512i, _>(simd_gt(a.as_i64x8(), b.as_i64x8()))
}

///Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector k
/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_epi64)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpgt_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
_mm512_cmpgt_epi64_mask(a, b) & m
}

/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epi64)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmpeq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
simd_bitmask::<__m512i, _>(simd_eq(a.as_i64x8(), b.as_i64x8()))
}

///Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector k
/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_epi64)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpeq_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
_mm512_cmpeq_epi64_mask(a, b) & m
}

#[cfg(test)]
mod tests {
use std;
Expand Down
14 changes: 14 additions & 0 deletions crates/core_arch/src/x86/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,10 @@ types! {
#[allow(non_camel_case_types)]
pub type __mmask16 = i16;

/// The `__mmask8` type used in AVX-512 intrinsics, a 8-bit integer
#[allow(non_camel_case_types)]
pub type __mmask8 = u8;

#[cfg(test)]
mod test;
#[cfg(test)]
Expand Down Expand Up @@ -509,6 +513,16 @@ pub(crate) trait m512iExt: Sized {
fn as_i32x16(self) -> crate::core_arch::simd::i32x16 {
unsafe { transmute(self.as_m512i()) }
}

#[inline]
fn as_u64x8(self) -> crate::core_arch::simd::u64x8 {
unsafe { transmute(self.as_m512i()) }
}

#[inline]
fn as_i64x8(self) -> crate::core_arch::simd::i64x8 {
unsafe { transmute(self.as_m512i()) }
}
}

impl m512iExt for __m512i {
Expand Down
165 changes: 165 additions & 0 deletions crates/core_arch/src/x86_64/avx512f.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
use crate::{
core_arch::{simd::*, x86::*},
mem::transmute,
};

/// Sets packed 64-bit integers in `dst` with the supplied values.
///
/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_epi64)
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_set_epi64(
e0: i64,
e1: i64,
e2: i64,
e3: i64,
e4: i64,
e5: i64,
e6: i64,
e7: i64,
) -> __m512i {
_mm512_setr_epi64(e7, e6, e5, e4, e3, e2, e1, e0)
}

/// Sets packed 64-bit integers in `dst` with the supplied values in
/// reverse order.
///
/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_epi64)
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_setr_epi64(
e0: i64,
e1: i64,
e2: i64,
e3: i64,
e4: i64,
e5: i64,
e6: i64,
e7: i64,
) -> __m512i {
let r = i64x8::new(e0, e1, e2, e3, e4, e5, e6, e7);
transmute(r)
}

#[cfg(test)]
mod tests {
use std;
use stdarch_test::simd_test;

use crate::core_arch::x86::*;
use crate::core_arch::x86_64::*;

#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmplt_epu64_mask() {
let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
let b = _mm512_set1_epi64(-1);
let m = _mm512_cmplt_epu64_mask(a, b);
assert_eq!(m, 0b11001111);
}

#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmplt_epu64_mask() {
let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
let b = _mm512_set1_epi64(-1);
let mask = 0b01111010;
let r = _mm512_mask_cmplt_epu64_mask(mask, a, b);
assert_eq!(r, 0b01001010);
}

#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmpgt_epu64_mask() {
let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
let b = _mm512_set1_epi64(-1);
let m = _mm512_cmpgt_epu64_mask(b, a);
assert_eq!(m, 0b11001111);
}

#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmpgt_epu64_mask() {
let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
let b = _mm512_set1_epi64(-1);
let mask = 0b01111010;
let r = _mm512_mask_cmpgt_epu64_mask(mask, b, a);
assert_eq!(r, 0b01001010);
}

#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmpeq_epu64_mask() {
let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100);
let m = _mm512_cmpeq_epu64_mask(b, a);
assert_eq!(m, 0b11001111);
}

#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmpeq_epu64_mask() {
let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100);
let mask = 0b01111010;
let r = _mm512_mask_cmpeq_epu64_mask(mask, b, a);
assert_eq!(r, 0b01001010);
}

#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmplt_epi64_mask() {
let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100);
let b = _mm512_set1_epi64(-1);
let m = _mm512_cmplt_epi64_mask(a, b);
assert_eq!(m, 0b00000101);
}

#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmplt_epi64_mask() {
let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100);
let b = _mm512_set1_epi64(-1);
let mask = 0b01100110;
let r = _mm512_mask_cmplt_epi64_mask(mask, a, b);
assert_eq!(r, 0b00000100);
}

#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmpgt_epi64_mask() {
let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100);
let b = _mm512_set1_epi64(-1);
let m = _mm512_cmpgt_epi64_mask(b, a);
assert_eq!(m, 0b00000101);
}

#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmpgt_epi64_mask() {
let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100);
let b = _mm512_set1_epi64(-1);
let mask = 0b01100110;
let r = _mm512_mask_cmpgt_epi64_mask(mask, b, a);
assert_eq!(r, 0b00000100);
}

#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmpeq_epi64_mask() {
let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100);
let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100);
let m = _mm512_cmpeq_epi64_mask(b, a);
assert_eq!(m, 0b11001111);
}

#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmpeq_epi64_mask() {
let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100);
let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100);
let mask = 0b01111010;
let r = _mm512_mask_cmpeq_epi64_mask(mask, b, a);
assert_eq!(r, 0b01001010);
}

#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_set_epi64() {
let r = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);
assert_eq_m512i(r, _mm512_set_epi64(7, 6, 5, 4, 3, 2, 1, 0))
}

#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_setr_epi64() {
let r = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
assert_eq_m512i(r, _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0))
}
}
3 changes: 3 additions & 0 deletions crates/core_arch/src/x86_64/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ pub use self::bmi2::*;
mod avx2;
pub use self::avx2::*;

mod avx512f;
pub use self::avx512f::*;

mod bswap;
pub use self::bswap::*;

Expand Down
1 change: 1 addition & 0 deletions crates/stdarch-verify/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ fn to_type(t: &syn::Type) -> proc_macro2::TokenStream {
"__m512" => quote! { &M512 },
"__m512d" => quote! { &M512D },
"__m512i" => quote! { &M512I },
"__mmask8" => quote! { &MMASK8 },
"__mmask16" => quote! { &MMASK16 },
"__m64" => quote! { &M64 },
"bool" => quote! { &BOOL },
Expand Down
3 changes: 3 additions & 0 deletions crates/stdarch-verify/tests/x86-intel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ static M256D: Type = Type::M256D;
static M512: Type = Type::M512;
static M512I: Type = Type::M512I;
static M512D: Type = Type::M512D;
static MMASK8: Type = Type::MMASK8;
static MMASK16: Type = Type::MMASK16;

static TUPLE: Type = Type::Tuple;
Expand All @@ -76,6 +77,7 @@ enum Type {
M512,
M512D,
M512I,
MMASK8,
MMASK16,
Tuple,
CpuidResult,
Expand Down Expand Up @@ -653,6 +655,7 @@ fn equate(t: &Type, intel: &str, intrinsic: &str, is_const: bool) -> Result<(),
(&Type::ConstPtr(&Type::M512I), "__m512i const*") => {}
(&Type::ConstPtr(&Type::M512D), "__m512d const*") => {}

(&Type::MMASK8, "__mmask8") => {}
(&Type::MMASK16, "__mmask16") => {}

// This is a macro (?) in C which seems to mutate its arguments, but
Expand Down