@@ -8,9 +8,8 @@ declare void @use(<4 x i32>)
88
99define <16 x i8 > @ins0_ins0_add (i8 %x , i8 %y ) {
1010; CHECK-LABEL: @ins0_ins0_add(
11- ; CHECK-NEXT: [[I0:%.*]] = insertelement <16 x i8> undef, i8 [[X:%.*]], i32 0
12- ; CHECK-NEXT: [[I1:%.*]] = insertelement <16 x i8> undef, i8 [[Y:%.*]], i32 0
13- ; CHECK-NEXT: [[R:%.*]] = add <16 x i8> [[I0]], [[I1]]
11+ ; CHECK-NEXT: [[R_SCALAR:%.*]] = add i8 [[X:%.*]], [[Y:%.*]]
12+ ; CHECK-NEXT: [[R:%.*]] = insertelement <16 x i8> undef, i8 [[R_SCALAR]], i64 0
1413; CHECK-NEXT: ret <16 x i8> [[R]]
1514;
1615 %i0 = insertelement <16 x i8 > undef , i8 %x , i32 0
@@ -23,9 +22,8 @@ define <16 x i8> @ins0_ins0_add(i8 %x, i8 %y) {
2322
2423define <8 x i16 > @ins0_ins0_sub_flags (i16 %x , i16 %y ) {
2524; CHECK-LABEL: @ins0_ins0_sub_flags(
26- ; CHECK-NEXT: [[I0:%.*]] = insertelement <8 x i16> undef, i16 [[X:%.*]], i8 5
27- ; CHECK-NEXT: [[I1:%.*]] = insertelement <8 x i16> undef, i16 [[Y:%.*]], i32 5
28- ; CHECK-NEXT: [[R:%.*]] = sub nuw nsw <8 x i16> [[I0]], [[I1]]
25+ ; CHECK-NEXT: [[R_SCALAR:%.*]] = sub nuw nsw i16 [[X:%.*]], [[Y:%.*]]
26+ ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[R_SCALAR]], i64 5
2927; CHECK-NEXT: ret <8 x i16> [[R]]
3028;
3129 %i0 = insertelement <8 x i16 > undef , i16 %x , i8 5
@@ -34,11 +32,13 @@ define <8 x i16> @ins0_ins0_sub_flags(i16 %x, i16 %y) {
3432 ret <8 x i16 > %r
3533}
3634
35+ ; The new vector constant is calculated by constant folding.
36+ ; This is conservatively created as zero rather than undef for 'undef ^ undef'.
37+
3738define <2 x i64 > @ins1_ins1_xor (i64 %x , i64 %y ) {
3839; CHECK-LABEL: @ins1_ins1_xor(
39- ; CHECK-NEXT: [[I0:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i64 1
40- ; CHECK-NEXT: [[I1:%.*]] = insertelement <2 x i64> undef, i64 [[Y:%.*]], i32 1
41- ; CHECK-NEXT: [[R:%.*]] = xor <2 x i64> [[I0]], [[I1]]
40+ ; CHECK-NEXT: [[R_SCALAR:%.*]] = xor i64 [[X:%.*]], [[Y:%.*]]
41+ ; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[R_SCALAR]], i64 1
4242; CHECK-NEXT: ret <2 x i64> [[R]]
4343;
4444 %i0 = insertelement <2 x i64 > undef , i64 %x , i64 1
@@ -51,9 +51,8 @@ define <2 x i64> @ins1_ins1_xor(i64 %x, i64 %y) {
5151
5252define <2 x double > @ins0_ins0_fadd (double %x , double %y ) {
5353; CHECK-LABEL: @ins0_ins0_fadd(
54- ; CHECK-NEXT: [[I0:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
55- ; CHECK-NEXT: [[I1:%.*]] = insertelement <2 x double> undef, double [[Y:%.*]], i32 0
56- ; CHECK-NEXT: [[R:%.*]] = fadd reassoc nsz <2 x double> [[I0]], [[I1]]
54+ ; CHECK-NEXT: [[R_SCALAR:%.*]] = fadd reassoc nsz double [[X:%.*]], [[Y:%.*]]
55+ ; CHECK-NEXT: [[R:%.*]] = insertelement <2 x double> undef, double [[R_SCALAR]], i64 0
5756; CHECK-NEXT: ret <2 x double> [[R]]
5857;
5958 %i0 = insertelement <2 x double > undef , double %x , i32 0
@@ -62,6 +61,8 @@ define <2 x double> @ins0_ins0_fadd(double %x, double %y) {
6261 ret <2 x double > %r
6362}
6463
64+ ; Negative test - mismatched indexes (but could fold this).
65+
6566define <16 x i8 > @ins1_ins0_add (i8 %x , i8 %y ) {
6667; CHECK-LABEL: @ins1_ins0_add(
6768; CHECK-NEXT: [[I0:%.*]] = insertelement <16 x i8> undef, i8 [[X:%.*]], i32 1
@@ -75,11 +76,12 @@ define <16 x i8> @ins1_ins0_add(i8 %x, i8 %y) {
7576 ret <16 x i8 > %r
7677}
7778
79+ ; Base vector does not have to be undef.
80+
7881define <4 x i32 > @ins0_ins0_mul (i32 %x , i32 %y ) {
7982; CHECK-LABEL: @ins0_ins0_mul(
80- ; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[X:%.*]], i32 0
81- ; CHECK-NEXT: [[I1:%.*]] = insertelement <4 x i32> undef, i32 [[Y:%.*]], i32 0
82- ; CHECK-NEXT: [[R:%.*]] = mul <4 x i32> [[I0]], [[I1]]
83+ ; CHECK-NEXT: [[R_SCALAR:%.*]] = mul i32 [[X:%.*]], [[Y:%.*]]
84+ ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[R_SCALAR]], i64 0
8385; CHECK-NEXT: ret <4 x i32> [[R]]
8486;
8587 %i0 = insertelement <4 x i32 > zeroinitializer , i32 %x , i32 0
@@ -88,11 +90,12 @@ define <4 x i32> @ins0_ins0_mul(i32 %x, i32 %y) {
8890 ret <4 x i32 > %r
8991}
9092
93+ ; It is safe to scalarize any binop (no extra UB/poison danger).
94+
9195define <2 x i64 > @ins1_ins1_sdiv (i64 %x , i64 %y ) {
9296; CHECK-LABEL: @ins1_ins1_sdiv(
93- ; CHECK-NEXT: [[I0:%.*]] = insertelement <2 x i64> <i64 42, i64 -42>, i64 [[X:%.*]], i64 1
94- ; CHECK-NEXT: [[I1:%.*]] = insertelement <2 x i64> <i64 -7, i64 128>, i64 [[Y:%.*]], i32 1
95- ; CHECK-NEXT: [[R:%.*]] = sdiv <2 x i64> [[I0]], [[I1]]
97+ ; CHECK-NEXT: [[R_SCALAR:%.*]] = sdiv i64 [[X:%.*]], [[Y:%.*]]
98+ ; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> <i64 -6, i64 0>, i64 [[R_SCALAR]], i64 1
9699; CHECK-NEXT: ret <2 x i64> [[R]]
97100;
98101 %i0 = insertelement <2 x i64 > <i64 42 , i64 -42 >, i64 %x , i64 1
@@ -101,11 +104,12 @@ define <2 x i64> @ins1_ins1_sdiv(i64 %x, i64 %y) {
101104 ret <2 x i64 > %r
102105}
103106
107+ ; Constant folding deals with undef per element - the entire value does not become undef.
108+
104109define <2 x i64 > @ins1_ins1_udiv (i64 %x , i64 %y ) {
105110; CHECK-LABEL: @ins1_ins1_udiv(
106- ; CHECK-NEXT: [[I0:%.*]] = insertelement <2 x i64> <i64 42, i64 undef>, i64 [[X:%.*]], i32 1
107- ; CHECK-NEXT: [[I1:%.*]] = insertelement <2 x i64> <i64 7, i64 undef>, i64 [[Y:%.*]], i32 1
108- ; CHECK-NEXT: [[R:%.*]] = udiv <2 x i64> [[I0]], [[I1]]
111+ ; CHECK-NEXT: [[R_SCALAR:%.*]] = udiv i64 [[X:%.*]], [[Y:%.*]]
112+ ; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> <i64 6, i64 undef>, i64 [[R_SCALAR]], i64 1
109113; CHECK-NEXT: ret <2 x i64> [[R]]
110114;
111115 %i0 = insertelement <2 x i64 > <i64 42 , i64 undef >, i64 %x , i32 1
@@ -114,11 +118,13 @@ define <2 x i64> @ins1_ins1_udiv(i64 %x, i64 %y) {
114118 ret <2 x i64 > %r
115119}
116120
121+ ; This could be simplified -- creates immediate UB without the transform because
122+ ; divisor has an undef element -- but that is hidden after the transform.
123+
117124define <2 x i64 > @ins1_ins1_urem (i64 %x , i64 %y ) {
118125; CHECK-LABEL: @ins1_ins1_urem(
119- ; CHECK-NEXT: [[I0:%.*]] = insertelement <2 x i64> <i64 42, i64 undef>, i64 [[X:%.*]], i64 1
120- ; CHECK-NEXT: [[I1:%.*]] = insertelement <2 x i64> <i64 undef, i64 128>, i64 [[Y:%.*]], i32 1
121- ; CHECK-NEXT: [[R:%.*]] = urem <2 x i64> [[I0]], [[I1]]
126+ ; CHECK-NEXT: [[R_SCALAR:%.*]] = urem i64 [[X:%.*]], [[Y:%.*]]
127+ ; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> <i64 undef, i64 0>, i64 [[R_SCALAR]], i64 1
122128; CHECK-NEXT: ret <2 x i64> [[R]]
123129;
124130 %i0 = insertelement <2 x i64 > <i64 42 , i64 undef >, i64 %x , i64 1
@@ -127,6 +133,9 @@ define <2 x i64> @ins1_ins1_urem(i64 %x, i64 %y) {
127133 ret <2 x i64 > %r
128134}
129135
136+ ; Negative test
137+ ; TODO: extra use can be accounted for in cost calculation.
138+
130139define <4 x i32 > @ins0_ins0_xor (i32 %x , i32 %y ) {
131140; CHECK-LABEL: @ins0_ins0_xor(
132141; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0
0 commit comments