@@ -354,6 +354,150 @@ define <2 x i64> @fshl_select_vector(<2 x i64> %x, <2 x i64> %y, <2 x i64> %sham
354354 ret <2 x i64 > %r
355355}
356356
357+ ; Convert 'or concat' to fshl if opposite 'or concat' exists.
358+
359+ define i32 @fshl_concat_i8_i24 (i8 %x , i24 %y , ptr %addr ) {
360+ ; CHECK-LABEL: @fshl_concat_i8_i24(
361+ ; CHECK-NEXT: [[ZEXT_X:%.*]] = zext i8 [[X:%.*]] to i32
362+ ; CHECK-NEXT: [[SLX:%.*]] = shl nuw i32 [[ZEXT_X]], 24
363+ ; CHECK-NEXT: [[ZEXT_Y:%.*]] = zext i24 [[Y:%.*]] to i32
364+ ; CHECK-NEXT: [[XY:%.*]] = or i32 [[SLX]], [[ZEXT_Y]]
365+ ; CHECK-NEXT: store i32 [[XY]], ptr [[ADDR:%.*]], align 4
366+ ; CHECK-NEXT: [[YX:%.*]] = call i32 @llvm.fshl.i32(i32 [[XY]], i32 [[XY]], i32 8)
367+ ; CHECK-NEXT: ret i32 [[YX]]
368+ ;
369+ %zext.x = zext i8 %x to i32
370+ %slx = shl i32 %zext.x , 24
371+ %zext.y = zext i24 %y to i32
372+ %xy = or i32 %zext.y , %slx
373+ store i32 %xy , ptr %addr , align 4
374+ %sly = shl i32 %zext.y , 8
375+ %yx = or i32 %zext.x , %sly
376+ ret i32 %yx
377+ }
378+
379+ define i32 @fshl_concat_i8_i8 (i8 %x , i8 %y , ptr %addr ) {
380+ ; CHECK-LABEL: @fshl_concat_i8_i8(
381+ ; CHECK-NEXT: [[ZEXT_X:%.*]] = zext i8 [[X:%.*]] to i32
382+ ; CHECK-NEXT: [[SLX:%.*]] = shl nuw nsw i32 [[ZEXT_X]], 13
383+ ; CHECK-NEXT: [[ZEXT_Y:%.*]] = zext i8 [[Y:%.*]] to i32
384+ ; CHECK-NEXT: [[XY:%.*]] = or i32 [[SLX]], [[ZEXT_Y]]
385+ ; CHECK-NEXT: store i32 [[XY]], ptr [[ADDR:%.*]], align 4
386+ ; CHECK-NEXT: [[YX:%.*]] = call i32 @llvm.fshl.i32(i32 [[XY]], i32 [[XY]], i32 19)
387+ ; CHECK-NEXT: ret i32 [[YX]]
388+ ;
389+ %zext.x = zext i8 %x to i32
390+ %slx = shl i32 %zext.x , 13
391+ %zext.y = zext i8 %y to i32
392+ %xy = or i32 %zext.y , %slx
393+ store i32 %xy , ptr %addr , align 4
394+ %sly = shl i32 %zext.y , 19
395+ %yx = or i32 %zext.x , %sly
396+ ret i32 %yx
397+ }
398+
399+ define i32 @fshl_concat_i8_i8_overlap (i8 %x , i8 %y , ptr %addr ) {
400+ ; CHECK-LABEL: @fshl_concat_i8_i8_overlap(
401+ ; CHECK-NEXT: [[ZEXT_X:%.*]] = zext i8 [[X:%.*]] to i32
402+ ; CHECK-NEXT: [[SLX:%.*]] = shl i32 [[ZEXT_X]], 25
403+ ; CHECK-NEXT: [[ZEXT_Y:%.*]] = zext i8 [[Y:%.*]] to i32
404+ ; CHECK-NEXT: [[XY:%.*]] = or i32 [[SLX]], [[ZEXT_Y]]
405+ ; CHECK-NEXT: store i32 [[XY]], ptr [[ADDR:%.*]], align 4
406+ ; CHECK-NEXT: [[SLY:%.*]] = shl nuw nsw i32 [[ZEXT_Y]], 7
407+ ; CHECK-NEXT: [[YX:%.*]] = or i32 [[SLY]], [[ZEXT_X]]
408+ ; CHECK-NEXT: ret i32 [[YX]]
409+ ;
410+ ; Test sly overlap.
411+ %zext.x = zext i8 %x to i32
412+ %slx = shl i32 %zext.x , 25
413+ %zext.y = zext i8 %y to i32
414+ %xy = or i32 %zext.y , %slx
415+ store i32 %xy , ptr %addr , align 4
416+ %sly = shl i32 %zext.y , 7
417+ %yx = or i32 %zext.x , %sly
418+ ret i32 %yx
419+ }
420+
421+ define i32 @fshl_concat_i8_i8_drop (i8 %x , i8 %y , ptr %addr ) {
422+ ; CHECK-LABEL: @fshl_concat_i8_i8_drop(
423+ ; CHECK-NEXT: [[ZEXT_X:%.*]] = zext i8 [[X:%.*]] to i32
424+ ; CHECK-NEXT: [[SLX:%.*]] = shl nuw nsw i32 [[ZEXT_X]], 7
425+ ; CHECK-NEXT: [[ZEXT_Y:%.*]] = zext i8 [[Y:%.*]] to i32
426+ ; CHECK-NEXT: [[XY:%.*]] = or i32 [[SLX]], [[ZEXT_Y]]
427+ ; CHECK-NEXT: store i32 [[XY]], ptr [[ADDR:%.*]], align 4
428+ ; CHECK-NEXT: [[SLY:%.*]] = shl i32 [[ZEXT_Y]], 25
429+ ; CHECK-NEXT: [[YX:%.*]] = or i32 [[SLY]], [[ZEXT_X]]
430+ ; CHECK-NEXT: ret i32 [[YX]]
431+ ;
432+ ; Test sly drop.
433+ %zext.x = zext i8 %x to i32
434+ %slx = shl i32 %zext.x , 7
435+ %zext.y = zext i8 %y to i32
436+ %xy = or i32 %zext.y , %slx
437+ store i32 %xy , ptr %addr , align 4
438+ %sly = shl i32 %zext.y , 25
439+ %yx = or i32 %zext.x , %sly
440+ ret i32 %yx
441+ }
442+
443+ define i32 @fshl_concat_i8_i8_different_slot (i8 %x , i8 %y , ptr %addr ) {
444+ ; CHECK-LABEL: @fshl_concat_i8_i8_different_slot(
445+ ; CHECK-NEXT: [[ZEXT_X:%.*]] = zext i8 [[X:%.*]] to i32
446+ ; CHECK-NEXT: [[SLX:%.*]] = shl nuw nsw i32 [[ZEXT_X]], 9
447+ ; CHECK-NEXT: [[ZEXT_Y:%.*]] = zext i8 [[Y:%.*]] to i32
448+ ; CHECK-NEXT: [[XY:%.*]] = or i32 [[SLX]], [[ZEXT_Y]]
449+ ; CHECK-NEXT: store i32 [[XY]], ptr [[ADDR:%.*]], align 4
450+ ; CHECK-NEXT: [[SLY:%.*]] = shl nuw nsw i32 [[ZEXT_Y]], 22
451+ ; CHECK-NEXT: [[YX:%.*]] = or i32 [[SLY]], [[ZEXT_X]]
452+ ; CHECK-NEXT: ret i32 [[YX]]
453+ ;
454+ %zext.x = zext i8 %x to i32
455+ %slx = shl i32 %zext.x , 9
456+ %zext.y = zext i8 %y to i32
457+ %xy = or i32 %zext.y , %slx
458+ store i32 %xy , ptr %addr , align 4
459+ %sly = shl i32 %zext.y , 22
460+ %yx = or i32 %zext.x , %sly
461+ ret i32 %yx
462+ }
463+
464+ define i32 @fshl_concat_unknown_source (i32 %zext.x , i32 %zext.y , ptr %addr ) {
465+ ; CHECK-LABEL: @fshl_concat_unknown_source(
466+ ; CHECK-NEXT: [[SLX:%.*]] = shl i32 [[ZEXT_X:%.*]], 16
467+ ; CHECK-NEXT: [[XY:%.*]] = or i32 [[SLX]], [[ZEXT_Y:%.*]]
468+ ; CHECK-NEXT: store i32 [[XY]], ptr [[ADDR:%.*]], align 4
469+ ; CHECK-NEXT: [[SLY:%.*]] = shl i32 [[ZEXT_Y]], 16
470+ ; CHECK-NEXT: [[YX:%.*]] = or i32 [[SLY]], [[ZEXT_X]]
471+ ; CHECK-NEXT: ret i32 [[YX]]
472+ ;
473+ %slx = shl i32 %zext.x , 16
474+ %xy = or i32 %zext.y , %slx
475+ store i32 %xy , ptr %addr , align 4
476+ %sly = shl i32 %zext.y , 16
477+ %yx = or i32 %zext.x , %sly
478+ ret i32 %yx
479+ }
480+
481+ define <2 x i32 > @fshl_concat_vector (<2 x i8 > %x , <2 x i24 > %y , ptr %addr ) {
482+ ; CHECK-LABEL: @fshl_concat_vector(
483+ ; CHECK-NEXT: [[ZEXT_X:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i32>
484+ ; CHECK-NEXT: [[SLX:%.*]] = shl nuw <2 x i32> [[ZEXT_X]], <i32 24, i32 24>
485+ ; CHECK-NEXT: [[ZEXT_Y:%.*]] = zext <2 x i24> [[Y:%.*]] to <2 x i32>
486+ ; CHECK-NEXT: [[XY:%.*]] = or <2 x i32> [[SLX]], [[ZEXT_Y]]
487+ ; CHECK-NEXT: store <2 x i32> [[XY]], ptr [[ADDR:%.*]], align 4
488+ ; CHECK-NEXT: [[YX:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[XY]], <2 x i32> [[XY]], <2 x i32> <i32 8, i32 8>)
489+ ; CHECK-NEXT: ret <2 x i32> [[YX]]
490+ ;
491+ %zext.x = zext <2 x i8 > %x to <2 x i32 >
492+ %slx = shl <2 x i32 > %zext.x , <i32 24 , i32 24 >
493+ %zext.y = zext <2 x i24 > %y to <2 x i32 >
494+ %xy = or <2 x i32 > %slx , %zext.y
495+ store <2 x i32 > %xy , ptr %addr , align 4
496+ %sly = shl <2 x i32 > %zext.y , <i32 8 , i32 8 >
497+ %yx = or <2 x i32 > %sly , %zext.x
498+ ret <2 x i32 > %yx
499+ }
500+
357501; Negative test - an oversized shift in the narrow type would produce the wrong value.
358502
359503define i8 @unmasked_shlop_unmasked_shift_amount (i32 %x , i32 %y , i32 %shamt ) {
0 commit comments