@@ -41,7 +41,6 @@ public class TestMulAddS2I {
4141
4242 static short [] sArr1 = new short [RANGE ];
4343 static short [] sArr2 = new short [RANGE ];
44- static int [] ioutArr = new int [RANGE ];
4544 static final int [] GOLDEN_A ;
4645 static final int [] GOLDEN_B ;
4746 static final int [] GOLDEN_C ;
@@ -50,6 +49,10 @@ public class TestMulAddS2I {
5049 static final int [] GOLDEN_F ;
5150 static final int [] GOLDEN_G ;
5251 static final int [] GOLDEN_H ;
52+ static final int [] GOLDEN_I ;
53+ static final int [] GOLDEN_J ;
54+ static final int [] GOLDEN_K ;
55+ static final int [] GOLDEN_L ;
5356
5457 static {
5558 for (int i = 0 ; i < RANGE ; i ++) {
@@ -58,12 +61,16 @@ public class TestMulAddS2I {
5861 }
5962 GOLDEN_A = testa ();
6063 GOLDEN_B = testb ();
61- GOLDEN_C = testc ();
62- GOLDEN_D = testd ();
63- GOLDEN_E = teste ();
64- GOLDEN_F = testf ();
65- GOLDEN_G = testg ();
66- GOLDEN_H = testh ();
64+ GOLDEN_C = testc (new int [ITER ]);
65+ GOLDEN_D = testd (new int [ITER ]);
66+ GOLDEN_E = teste (new int [ITER ]);
67+ GOLDEN_F = testf (new int [ITER ]);
68+ GOLDEN_G = testg (new int [ITER ]);
69+ GOLDEN_H = testh (new int [ITER ]);
70+ GOLDEN_I = testi (new int [ITER ]);
71+ GOLDEN_J = testj (new int [ITER ]);
72+ GOLDEN_K = testk (new int [ITER ]);
73+ GOLDEN_L = testl (new int [ITER ]);
6774 }
6875
6976
@@ -72,17 +79,22 @@ public static void main(String[] args) {
7279 TestFramework .runWithFlags ("-XX:-AlignVector" );
7380 }
7481
75- @ Run (test = {"testa" , "testb" , "testc" , "testd" , "teste" , "testf" , "testg" , "testh" })
82+ @ Run (test = {"testa" , "testb" , "testc" , "testd" , "teste" , "testf" , "testg" , "testh" ,
83+ "testi" , "testj" , "testk" , "testl" })
7684 @ Warmup (0 )
7785 public static void run () {
7886 compare (testa (), GOLDEN_A , "testa" );
7987 compare (testb (), GOLDEN_B , "testb" );
80- compare (testc (), GOLDEN_C , "testc" );
81- compare (testd (), GOLDEN_D , "testd" );
82- compare (teste (), GOLDEN_E , "teste" );
83- compare (testf (), GOLDEN_F , "testf" );
84- compare (testg (), GOLDEN_G , "testg" );
85- compare (testh (), GOLDEN_H , "testh" );
88+ compare (testc (new int [ITER ]), GOLDEN_C , "testc" );
89+ compare (testd (new int [ITER ]), GOLDEN_D , "testd" );
90+ compare (teste (new int [ITER ]), GOLDEN_E , "teste" );
91+ compare (testf (new int [ITER ]), GOLDEN_F , "testf" );
92+ compare (testg (new int [ITER ]), GOLDEN_G , "testg" );
93+ compare (testh (new int [ITER ]), GOLDEN_H , "testh" );
94+ compare (testi (new int [ITER ]), GOLDEN_I , "testi" );
95+ compare (testj (new int [ITER ]), GOLDEN_J , "testj" );
96+ compare (testk (new int [ITER ]), GOLDEN_K , "testk" );
97+ compare (testl (new int [ITER ]), GOLDEN_L , "testl" );
8698 }
8799
88100 public static void compare (int [] out , int [] golden , String name ) {
@@ -138,8 +150,7 @@ public static int[] testb() {
138150 counts = {IRNode .MUL_ADD_S2I , "> 0" , IRNode .MUL_ADD_VS2VI , "> 0" })
139151 @ IR (applyIfCPUFeature = {"avx512_vnni" , "true" },
140152 counts = {IRNode .MUL_ADD_S2I , "> 0" , IRNode .MUL_ADD_VS2VI_VNNI , "> 0" })
141- public static int [] testc () {
142- int [] out = new int [ITER ];
153+ public static int [] testc (int [] out ) {
143154 for (int i = 0 ; i < ITER ; i ++) {
144155 out [i ] += ((sArr1 [2 *i ] * sArr2 [2 *i ]) + (sArr1 [2 *i +1 ] * sArr2 [2 *i +1 ]));
145156 }
@@ -155,8 +166,7 @@ public static int[] testc() {
155166 counts = {IRNode .MUL_ADD_S2I , "> 0" , IRNode .MUL_ADD_VS2VI , "> 0" })
156167 @ IR (applyIfCPUFeature = {"avx512_vnni" , "true" },
157168 counts = {IRNode .MUL_ADD_S2I , "> 0" , IRNode .MUL_ADD_VS2VI_VNNI , "> 0" })
158- public static int [] testd () {
159- int [] out = ioutArr ;
169+ public static int [] testd (int [] out ) {
160170 for (int i = 0 ; i < ITER -2 ; i +=2 ) {
161171 // Unrolled, with the same structure.
162172 out [i +0 ] += ((sArr1 [2 *i +0 ] * sArr2 [2 *i +0 ]) + (sArr1 [2 *i +1 ] * sArr2 [2 *i +1 ]));
@@ -174,8 +184,7 @@ public static int[] testd() {
174184 counts = {IRNode .MUL_ADD_S2I , "> 0" , IRNode .MUL_ADD_VS2VI , "> 0" })
175185 @ IR (applyIfCPUFeature = {"avx512_vnni" , "true" },
176186 counts = {IRNode .MUL_ADD_S2I , "> 0" , IRNode .MUL_ADD_VS2VI_VNNI , "> 0" })
177- public static int [] teste () {
178- int [] out = ioutArr ;
187+ public static int [] teste (int [] out ) {
179188 for (int i = 0 ; i < ITER -2 ; i +=2 ) {
180189 // Unrolled, with some swaps.
181190 out [i +0 ] += ((sArr1 [2 *i +0 ] * sArr2 [2 *i +0 ]) + (sArr1 [2 *i +1 ] * sArr2 [2 *i +1 ]));
@@ -193,8 +202,7 @@ public static int[] teste() {
193202 counts = {IRNode .MUL_ADD_S2I , "> 0" , IRNode .MUL_ADD_VS2VI , "> 0" })
194203 @ IR (applyIfCPUFeature = {"avx512_vnni" , "true" },
195204 counts = {IRNode .MUL_ADD_S2I , "> 0" , IRNode .MUL_ADD_VS2VI_VNNI , "> 0" })
196- public static int [] testf () {
197- int [] out = ioutArr ;
205+ public static int [] testf (int [] out ) {
198206 for (int i = 0 ; i < ITER -2 ; i +=2 ) {
199207 // Unrolled, with some swaps.
200208 out [i +0 ] += ((sArr1 [2 *i +0 ] * sArr2 [2 *i +0 ]) + (sArr1 [2 *i +1 ] * sArr2 [2 *i +1 ]));
@@ -212,8 +220,7 @@ public static int[] testf() {
212220 counts = {IRNode .MUL_ADD_S2I , "> 0" , IRNode .MUL_ADD_VS2VI , "> 0" })
213221 @ IR (applyIfCPUFeature = {"avx512_vnni" , "true" },
214222 counts = {IRNode .MUL_ADD_S2I , "> 0" , IRNode .MUL_ADD_VS2VI_VNNI , "> 0" })
215- public static int [] testg () {
216- int [] out = ioutArr ;
223+ public static int [] testg (int [] out ) {
217224 for (int i = 0 ; i < ITER -2 ; i +=2 ) {
218225 // Unrolled, with some swaps.
219226 out [i +0 ] += ((sArr1 [2 *i +0 ] * sArr2 [2 *i +0 ]) + (sArr1 [2 *i +1 ] * sArr2 [2 *i +1 ]));
@@ -231,13 +238,65 @@ public static int[] testg() {
231238 counts = {IRNode .MUL_ADD_S2I , "> 0" , IRNode .MUL_ADD_VS2VI , "> 0" })
232239 @ IR (applyIfCPUFeature = {"avx512_vnni" , "true" },
233240 counts = {IRNode .MUL_ADD_S2I , "> 0" , IRNode .MUL_ADD_VS2VI_VNNI , "> 0" })
234- public static int [] testh () {
235- int [] out = ioutArr ;
241+ public static int [] testh (int [] out ) {
236242 for (int i = 0 ; i < ITER -2 ; i +=2 ) {
237243 // Unrolled, with some swaps.
238244 out [i +0 ] += ((sArr1 [2 *i +0 ] * sArr2 [2 *i +0 ]) + (sArr1 [2 *i +1 ] * sArr2 [2 *i +1 ]));
239245 out [i +1 ] += ((sArr2 [2 *i +3 ] * sArr1 [2 *i +3 ]) + (sArr2 [2 *i +2 ] * sArr1 [2 *i +2 ])); // swap(1 4), swap(2 3)
240246 }
241247 return out ;
242248 }
249+
250+ @ Test
251+ @ IR (counts = {IRNode .MUL_ADD_S2I , "> 0" },
252+ applyIfCPUFeatureOr = {"sse2" , "true" , "asimd" , "true" })
253+ @ IR (counts = {IRNode .MUL_ADD_VS2VI , "= 0" })
254+ public static int [] testi (int [] out ) {
255+ for (int i = 0 ; i < ITER -2 ; i +=2 ) {
256+ // Unrolled, with some swaps that prevent vectorization.
257+ out [i +0 ] += ((sArr1 [2 *i +0 ] * sArr2 [2 *i +0 ]) + (sArr1 [2 *i +1 ] * sArr2 [2 *i +1 ])); // ok
258+ out [i +1 ] += ((sArr1 [2 *i +2 ] * sArr2 [2 *i +3 ]) + (sArr1 [2 *i +3 ] * sArr2 [2 *i +2 ])); // bad
259+ }
260+ return out ;
261+ }
262+
263+ @ Test
264+ @ IR (counts = {IRNode .MUL_ADD_S2I , "> 0" },
265+ applyIfCPUFeatureOr = {"sse2" , "true" , "asimd" , "true" })
266+ @ IR (counts = {IRNode .MUL_ADD_VS2VI , "= 0" })
267+ public static int [] testj (int [] out ) {
268+ for (int i = 0 ; i < ITER -2 ; i +=2 ) {
269+ // Unrolled, with some swaps that prevent vectorization.
270+ out [i +0 ] += ((sArr1 [2 *i +0 ] * sArr2 [2 *i +1 ]) + (sArr1 [2 *i +1 ] * sArr2 [2 *i +0 ])); // bad
271+ out [i +1 ] += ((sArr1 [2 *i +2 ] * sArr2 [2 *i +3 ]) + (sArr1 [2 *i +3 ] * sArr2 [2 *i +2 ])); // bad
272+ }
273+ return out ;
274+ }
275+
276+ @ Test
277+ @ IR (counts = {IRNode .MUL_ADD_S2I , "> 0" },
278+ applyIfCPUFeatureOr = {"sse2" , "true" , "asimd" , "true" })
279+ @ IR (counts = {IRNode .MUL_ADD_VS2VI , "= 0" })
280+ public static int [] testk (int [] out ) {
281+ for (int i = 0 ; i < ITER -2 ; i +=2 ) {
282+ // Unrolled, with some swaps that prevent vectorization.
283+ out [i +0 ] += ((sArr1 [2 *i +0 ] * sArr2 [2 *i +1 ]) + (sArr1 [2 *i +1 ] * sArr2 [2 *i +0 ])); // bad
284+ out [i +1 ] += ((sArr1 [2 *i +2 ] * sArr2 [2 *i +2 ]) + (sArr1 [2 *i +3 ] * sArr2 [2 *i +3 ])); // ok
285+ }
286+ return out ;
287+ }
288+
289+ @ Test
290+ @ IR (counts = {IRNode .MUL_ADD_S2I , "> 0" },
291+ applyIfCPUFeatureOr = {"sse2" , "true" , "asimd" , "true" })
292+ @ IR (counts = {IRNode .MUL_ADD_VS2VI , "= 0" })
293+ public static int [] testl (int [] out ) {
294+ for (int i = 0 ; i < ITER -2 ; i +=2 ) {
295+ // Unrolled, with some swaps that prevent vectorization.
296+ out [i +0 ] += ((sArr1 [2 *i +1 ] * sArr2 [2 *i +1 ]) + (sArr1 [2 *i +0 ] * sArr2 [2 *i +0 ])); // ok
297+ out [i +1 ] += ((sArr1 [2 *i +2 ] * sArr2 [2 *i +3 ]) + (sArr1 [2 *i +3 ] * sArr2 [2 *i +2 ])); // bad
298+ }
299+ return out ;
300+ }
301+
243302}
0 commit comments