@@ -12,6 +12,16 @@ use stdsimd_test::assert_instr;
1212
1313#[ allow( improper_ctypes) ]
1414extern "C" {
15+ #[ link_name = "llvm.x86.sse.cvtpi2ps" ]
16+ fn cvtpi2ps ( a : f32x4 , b : __m64 ) -> f32x4 ;
17+ #[ link_name = "llvm.x86.mmx.pextr.w" ]
18+ fn pextrw ( a : __m64 , imm8 : i32 ) -> i32 ;
19+ #[ link_name = "llvm.x86.mmx.pinsr.w" ]
20+ fn pinsrw ( a : __m64 , d : i32 , imm8 : i32 ) -> __m64 ;
21+ #[ link_name = "llvm.x86.mmx.pmovmskb" ]
22+ fn pmovmskb ( a : __m64 ) -> i32 ;
23+ #[ link_name = "llvm.x86.sse.pshuf.w" ]
24+ fn pshufw ( a : __m64 , imm8 : i8 ) -> __m64 ;
1525 #[ link_name = "llvm.x86.mmx.pmaxs.w" ]
1626 fn pmaxsw ( a : __m64 , b : __m64 ) -> __m64 ;
1727 #[ link_name = "llvm.x86.mmx.pmaxu.b" ]
@@ -98,6 +108,64 @@ pub unsafe fn _m_pminub(a: u8x8, b: u8x8) -> u8x8 {
98108 _mm_min_pu8 ( a, b)
99109}
100110
111+ /// Converts two elements of a 64-bit vector of [2 x i32] into two
112+ /// floating point values and writes them to the lower 64-bits of the
113+ /// destination. The remaining higher order elements of the destination are
114+ /// copied from the corresponding elements in the first operand.
115+ #[ inline( always) ]
116+ #[ target_feature = "+sse" ]
117+ #[ cfg_attr( test, assert_instr( cvtpi2ps) ) ]
118+ pub unsafe fn _mm_cvt_pi2ps ( a : f32x4 , b : i32x2 ) -> f32x4 {
119+ cvtpi2ps ( a, mem:: transmute ( b) )
120+ }
121+
122+ /// Extracts 16-bit element from a 64-bit vector of [4 x i16] and
123+ /// returns it, as specified by the immediate integer operand.
124+ #[ inline( always) ]
125+ #[ target_feature = "+sse" ]
126+ #[ cfg_attr( test, assert_instr( pextrw, imm2 = 0 ) ) ]
127+ pub unsafe fn _mm_extract_pi16 ( a : i16x4 , imm2 : i32 ) -> i16 {
128+ macro_rules! call {
129+ ( $imm2: expr) => { pextrw( mem:: transmute( a) , $imm2) as i16 }
130+ }
131+ constify_imm2 ! ( imm2, call)
132+ }
133+
134+ /// Copies data from the 64-bit vector of [4 x i16] to the destination,
135+ /// and inserts the lower 16-bits of an integer operand at the 16-bit offset
136+ /// specified by the immediate operand `n`.
137+ #[ inline( always) ]
138+ #[ target_feature = "+sse" ]
139+ #[ cfg_attr( test, assert_instr( pinsrw, imm2 = 0 ) ) ]
140+ pub unsafe fn _mm_insert_pi16 ( a : i16x4 , d : i32 , imm2 : i32 ) -> i16x4 {
141+ macro_rules! call {
142+ ( $imm2: expr) => { mem:: transmute( pinsrw( mem:: transmute( a) , d, $imm2) ) }
143+ }
144+ constify_imm2 ! ( imm2, call)
145+ }
146+
147+ /// Takes the most significant bit from each 8-bit element in a 64-bit
148+ /// integer vector to create a 16-bit mask value. Zero-extends the value to
149+ /// 32-bit integer and writes it to the destination.
150+ #[ inline( always) ]
151+ #[ target_feature = "+sse" ]
152+ #[ cfg_attr( test, assert_instr( pmovmskb) ) ]
153+ pub unsafe fn _mm_movemask_pi8 ( a : i16x4 ) -> i32 {
154+ pmovmskb ( mem:: transmute ( a) )
155+ }
156+
157+ /// Shuffles the 4 16-bit integers from a 64-bit integer vector to the
158+ /// destination, as specified by the immediate value operand.
159+ #[ inline( always) ]
160+ #[ target_feature = "+sse" ]
161+ #[ cfg_attr( test, assert_instr( pshufw, imm8 = 0 ) ) ]
162+ pub unsafe fn _mm_shuffle_pi16 ( a : i16x4 , imm8 : i8 ) -> i16x4 {
163+ macro_rules! call {
164+ ( $imm8: expr) => { mem:: transmute( pshufw( mem:: transmute( a) , $imm8) ) }
165+ }
166+ constify_imm8 ! ( imm8, call)
167+ }
168+
101169/// Convert the two lower packed single-precision (32-bit) floating-point
102170/// elements in `a` to packed 32-bit integers with truncation.
103171#[ inline( always) ]
@@ -205,6 +273,50 @@ mod tests {
205273 assert_eq ! ( r, sse:: _m_pminub( a, b) ) ;
206274 }
207275
276+ #[ simd_test = "sse" ]
277+ unsafe fn _mm_cvt_pi2ps ( ) {
278+ let a = f32x4:: new ( 0. , 0. , 3. , 4. ) ;
279+ let b = i32x2:: new ( 1 , 2 ) ;
280+ let expected = f32x4:: new ( 1. , 2. , 3. , 4. ) ;
281+ let r = sse:: _mm_cvt_pi2ps ( a, b) ;
282+ assert_eq ! ( r, expected) ;
283+ }
284+
285+ #[ simd_test = "sse" ]
286+ unsafe fn _mm_extract_pi16 ( ) {
287+ let a = i16x4:: new ( 1 , 2 , 3 , 4 ) ;
288+ let r = sse:: _mm_extract_pi16 ( a, 0 ) ;
289+ assert_eq ! ( r, 1 ) ;
290+ let r = sse:: _mm_extract_pi16 ( a, 1 ) ;
291+ assert_eq ! ( r, 2 ) ;
292+ }
293+
294+ #[ simd_test = "sse" ]
295+ unsafe fn _mm_insert_pi16 ( ) {
296+ let a = i16x4:: new ( 1 , 2 , 3 , 4 ) ;
297+ let r = sse:: _mm_insert_pi16 ( a, 0 , 0b0 ) ;
298+ let expected = i16x4:: new ( 0 , 2 , 3 , 4 ) ;
299+ assert_eq ! ( r, expected) ;
300+ let r = sse:: _mm_insert_pi16 ( a, 0 , 0b10 ) ;
301+ let expected = i16x4:: new ( 1 , 2 , 0 , 4 ) ;
302+ assert_eq ! ( r, expected) ;
303+ }
304+
305+ #[ simd_test = "sse" ]
306+ unsafe fn _mm_movemask_pi8 ( ) {
307+ let a = i16x4:: new ( 0b1000_0000 , 0b0100_0000 , 0b1000_0000 , 0b0100_0000 ) ;
308+ let r = sse:: _mm_movemask_pi8 ( a) ;
309+ assert_eq ! ( r, 0b10001 ) ;
310+ }
311+
312+ #[ simd_test = "sse" ]
313+ unsafe fn _mm_shuffle_pi16 ( ) {
314+ let a = i16x4:: new ( 1 , 2 , 3 , 4 ) ;
315+ let r = sse:: _mm_shuffle_pi16 ( a, 0b00_01_01_11 ) ;
316+ let expected = i16x4:: new ( 4 , 2 , 2 , 1 ) ;
317+ assert_eq ! ( r, expected) ;
318+ }
319+
208320 #[ simd_test = "sse" ]
209321 unsafe fn _mm_cvtps_pi32 ( ) {
210322 let a = f32x4:: new ( 1.0 , 2.0 , 3.0 , 4.0 ) ;
0 commit comments