@@ -189,6 +189,61 @@ func.func @vecdim_reduction_maxui(%in: memref<256x512xi32>, %out: memref<256xi32
189189// CHECK: affine.store %[[final_max]], %{{.*}} : memref<256xi32>
190190// CHECK: }
191191
192+ // -----
193+
194+ func.func @vecdim_reduction_andi (%in: memref <256 x512 xi32 >, %out: memref <256 xi32 >) {
195+ %cst = arith.constant -1 : i32
196+ affine.for %i = 0 to 256 {
197+ %final_red = affine.for %j = 0 to 512 iter_args (%red_iter = %cst ) -> (i32 ) {
198+ %ld = affine.load %in [%i , %j ] : memref <256 x512 xi32 >
199+ %or = arith.andi %red_iter , %ld : i32
200+ affine.yield %or : i32
201+ }
202+ affine.store %final_red , %out [%i ] : memref <256 xi32 >
203+ }
204+ return
205+ }
206+
207+ // CHECK-LABEL: @vecdim_reduction_andi
208+ // CHECK: affine.for %{{.*}} = 0 to 256 {
209+ // CHECK: %[[vallone:.*]] = arith.constant dense<-1> : vector<128xi32>
210+ // CHECK: %[[vred:.*]] = affine.for %{{.*}} = 0 to 512 step 128 iter_args(%[[red_iter:.*]] = %[[vallone]]) -> (vector<128xi32>) {
211+ // CHECK: %[[ld:.*]] = vector.transfer_read %{{.*}} : memref<256x512xi32>, vector<128xi32>
212+ // CHECK: %[[and:.*]] = arith.andi %[[red_iter]], %[[ld]] : vector<128xi32>
213+ // CHECK: affine.yield %[[and]] : vector<128xi32>
214+ // CHECK: }
215+ // CHECK: %[[final_red:.*]] = vector.reduction <and>, %[[vred:.*]] : vector<128xi32> into i32
216+ // CHECK: affine.store %[[final_red]], %{{.*}} : memref<256xi32>
217+ // CHECK: }
218+
219+ // -----
220+
221+ func.func @vecdim_reduction_ori (%in: memref <256 x512 xi32 >, %out: memref <256 xi32 >) {
222+ %cst = arith.constant 0 : i32
223+ affine.for %i = 0 to 256 {
224+ %final_red = affine.for %j = 0 to 512 iter_args (%red_iter = %cst ) -> (i32 ) {
225+ %ld = affine.load %in [%i , %j ] : memref <256 x512 xi32 >
226+ %or = arith.ori %red_iter , %ld : i32
227+ affine.yield %or : i32
228+ }
229+ affine.store %final_red , %out [%i ] : memref <256 xi32 >
230+ }
231+ return
232+ }
233+
234+ // CHECK-LABEL: @vecdim_reduction_ori
235+ // CHECK: affine.for %{{.*}} = 0 to 256 {
236+ // CHECK: %[[vzero:.*]] = arith.constant dense<0> : vector<128xi32>
237+ // CHECK: %[[vred:.*]] = affine.for %{{.*}} = 0 to 512 step 128 iter_args(%[[red_iter:.*]] = %[[vzero]]) -> (vector<128xi32>) {
238+ // CHECK: %[[ld:.*]] = vector.transfer_read %{{.*}} : memref<256x512xi32>, vector<128xi32>
239+ // CHECK: %[[or:.*]] = arith.ori %[[red_iter]], %[[ld]] : vector<128xi32>
240+ // CHECK: affine.yield %[[or]] : vector<128xi32>
241+ // CHECK: }
242+ // CHECK: %[[final_red:.*]] = vector.reduction <or>, %[[vred:.*]] : vector<128xi32> into i32
243+ // CHECK: affine.store %[[final_red]], %{{.*}} : memref<256xi32>
244+ // CHECK: }
245+
246+
192247// -----
193248
194249// The inner reduction loop '%j' is vectorized. (The order of addf's operands is
0 commit comments