@@ -243,6 +243,106 @@ func.func @vecdim_reduction_ori(%in: memref<256x512xi32>, %out: memref<256xi32>)
243243// CHECK: affine.store %[[final_red]], %{{.*}} : memref<256xi32>
244244// CHECK: }
245245
246+ // -----
247+
248+ func.func @vecdim_reduction_xori (%in: memref <256 x512 xi32 >, %out: memref <256 xi32 >) {
249+ %cst = arith.constant 0 : i32
250+ affine.for %i = 0 to 256 {
251+ %final_red = affine.for %j = 0 to 512 iter_args (%red_iter = %cst ) -> (i32 ) {
252+ %ld = affine.load %in [%i , %j ] : memref <256 x512 xi32 >
253+ %xor = arith.xori %red_iter , %ld : i32
254+ affine.yield %xor : i32
255+ }
256+ affine.store %final_red , %out [%i ] : memref <256 xi32 >
257+ }
258+ return
259+ }
260+
261+ // CHECK-LABEL: func.func @vecdim_reduction_xori(
262+ // CHECK-SAME: %[[input:.*]]: memref<256x512xi32>,
263+ // CHECK-SAME: %[[output:.*]]: memref<256xi32>) {
264+ // CHECK: %[[cst:.*]] = arith.constant 0 : i32
265+ // CHECK: affine.for %{{.*}} = 0 to 256 {
266+ // CHECK: %[[vzero:.*]] = arith.constant dense<0> : vector<128xi32>
267+ // CHECK: %[[vred:.*]] = affine.for %{{.*}} = 0 to 512 step 128 iter_args(%[[red_iter:.*]] = %[[vzero]]) -> (vector<128xi32>) {
268+ // CHECK: %[[poison:.*]] = ub.poison : i32
269+ // CHECK: %[[ld:.*]] = vector.transfer_read %[[input]]{{\[}}%{{.*}}, %{{.*}}], %[[poison]] : memref<256x512xi32>, vector<128xi32>
270+ // CHECK: %[[xor:.*]] = arith.xori %[[red_iter]], %[[ld]] : vector<128xi32>
271+ // CHECK: affine.yield %[[xor]] : vector<128xi32>
272+ // CHECK: }
273+ // CHECK: %[[final_red:.*]] = vector.reduction <xor>, %[[vred]] : vector<128xi32> into i32
274+ // CHECK: affine.store %[[final_red]], %[[output]]{{\[}}%{{.*}}] : memref<256xi32>
275+ // CHECK: }
276+ // CHECK: return
277+ // CHECK: }
278+
279+ // -----
280+
281+ func.func @vecdim_reduction_minnumf (%in: memref <256 x512 xf32 >, %out: memref <256 xf32 >) {
282+ %cst = arith.constant 0xFF800000 : f32
283+ affine.for %i = 0 to 256 {
284+ %final_red = affine.for %j = 0 to 512 iter_args (%red_iter = %cst ) -> (f32 ) {
285+ %ld = affine.load %in [%i , %j ] : memref <256 x512 xf32 >
286+ %min = arith.minnumf %red_iter , %ld : f32
287+ affine.yield %min : f32
288+ }
289+ affine.store %final_red , %out [%i ] : memref <256 xf32 >
290+ }
291+ return
292+ }
293+
294+ // CHECK-LABEL: func.func @vecdim_reduction_minnumf(
295+ // CHECK-SAME: %[[input:.*]]: memref<256x512xf32>,
296+ // CHECK-SAME: %[[output:.*]]: memref<256xf32>) {
297+ // CHECK: %[[cst:.*]] = arith.constant 0xFF800000 : f32
298+ // CHECK: affine.for %{{.*}} = 0 to 256 {
299+ // CHECK: %[[vzero:.*]] = arith.constant dense<0x7FC00000> : vector<128xf32>
300+ // CHECK: %[[vred:.*]] = affine.for %{{.*}} = 0 to 512 step 128 iter_args(%[[red_iter:.*]] = %[[vzero]]) -> (vector<128xf32>) {
301+ // CHECK: %[[poison:.*]] = ub.poison : f32
302+ // CHECK: %[[ld:.*]] = vector.transfer_read %[[input]]{{\[}}%{{.*}}, %{{.*}}], %[[poison]] : memref<256x512xf32>, vector<128xf32>
303+ // CHECK: %[[min:.*]] = arith.minnumf %[[red_iter]], %[[ld]] : vector<128xf32>
304+ // CHECK: affine.yield %[[min]] : vector<128xf32>
305+ // CHECK: }
306+ // CHECK: %[[red_scalar:.*]] = vector.reduction <minnumf>, %[[vred]] : vector<128xf32> into f32
307+ // CHECK: %[[final_red:.*]] = arith.minnumf %[[red_scalar]], %[[cst]] : f32
308+ // CHECK: affine.store %[[final_red]], %[[output]]{{\[}}%{{.*}}] : memref<256xf32>
309+ // CHECK: }
310+ // CHECK: return
311+ // CHECK: }
312+
313+ // -----
314+
315+ func.func @vecdim_reduction_maxnumf (%in: memref <256 x512 xf32 >, %out: memref <256 xf32 >) {
316+ %cst = arith.constant 0xFF800000 : f32
317+ affine.for %i = 0 to 256 {
318+ %final_red = affine.for %j = 0 to 512 iter_args (%red_iter = %cst ) -> (f32 ) {
319+ %ld = affine.load %in [%i , %j ] : memref <256 x512 xf32 >
320+ %max = arith.maxnumf %red_iter , %ld : f32
321+ affine.yield %max : f32
322+ }
323+ affine.store %final_red , %out [%i ] : memref <256 xf32 >
324+ }
325+ return
326+ }
327+
328+ // CHECK-LABEL: func.func @vecdim_reduction_maxnumf(
329+ // CHECK-SAME: %[[input:.*]]: memref<256x512xf32>,
330+ // CHECK-SAME: %[[output:.*]]: memref<256xf32>) {
331+ // CHECK: %[[cst:.*]] = arith.constant 0xFF800000 : f32
332+ // CHECK: affine.for %{{.*}} = 0 to 256 {
333+ // CHECK: %[[vzero:.*]] = arith.constant dense<0xFFC00000> : vector<128xf32>
334+ // CHECK: %[[vred:.*]] = affine.for %{{.*}} = 0 to 512 step 128 iter_args(%[[red_iter:.*]] = %[[vzero]]) -> (vector<128xf32>) {
335+ // CHECK: %[[poison:.*]] = ub.poison : f32
336+ // CHECK: %[[ld:.*]] = vector.transfer_read %[[input]]{{\[}}%{{.*}}, %{{.*}}], %[[poison]] : memref<256x512xf32>, vector<128xf32>
337+ // CHECK: %[[max:.*]] = arith.maxnumf %[[red_iter]], %[[ld]] : vector<128xf32>
338+ // CHECK: affine.yield %[[max]] : vector<128xf32>
339+ // CHECK: }
340+ // CHECK: %[[red_scalar:.*]] = vector.reduction <maxnumf>, %[[vred]] : vector<128xf32> into f32
341+ // CHECK: %[[final_red:.*]] = arith.maxnumf %[[red_scalar]], %[[cst]] : f32
342+ // CHECK: affine.store %[[final_red]], %[[output]]{{\[}}%{{.*}}] : memref<256xf32>
343+ // CHECK: }
344+ // CHECK: return
345+ // CHECK: }
246346
247347// -----
248348
0 commit comments