@@ -527,15 +527,6 @@ define void @rt_stride_1_with_reordering(ptr %pl, i64 %stride, ptr %ps) {
527527 ret void
528528}
529529
530- ; TODO: We want to generate this code:
531- ; define void @constant_stride_widen_no_reordering(ptr %pl, i64 %stride, ptr %ps) {
532- ; %gep_l0 = getelementptr inbounds i8, ptr %pl, i64 %offset0
533- ; %gep_s0 = getelementptr inbounds i8, ptr %ps, i64 0
534- ; %strided_load = call <4 x i32> @llvm.experimental.vp.strided.load.v4i32.p0.i64(ptr align 1 %gep_l0, i64 8, <4 x i1> splat (i1 true), i32 4)
535- ; %bitcast_ = bitcast <4 x i32> %strided_load to <16 x i8>
536- ; store <16 x i8> %bitcast_, ptr %gep_s0, align 1
537- ; ret void
538- ; }
539530define void @constant_stride_widen_no_reordering (ptr %pl , i64 %stride , ptr %ps ) {
540531; CHECK-LABEL: define void @constant_stride_widen_no_reordering(
541532; CHECK-SAME: ptr [[PL:%.*]], i64 [[STRIDE:%.*]], ptr [[PS:%.*]]) #[[ATTR0]] {
@@ -545,7 +536,6 @@ define void @constant_stride_widen_no_reordering(ptr %pl, i64 %stride, ptr %ps)
545536; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <28 x i8> [[TMP1]], <28 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19, i32 24, i32 25, i32 26, i32 27>
546537; CHECK-NEXT: store <16 x i8> [[TMP8]], ptr [[GEP_S0]], align 1
547538; CHECK-NEXT: ret void
548- ;
549539 %gep_l0 = getelementptr inbounds i8 , ptr %pl , i64 0
550540 %gep_l1 = getelementptr inbounds i8 , ptr %pl , i64 1
551541 %gep_l2 = getelementptr inbounds i8 , ptr %pl , i64 2
@@ -617,6 +607,107 @@ define void @constant_stride_widen_no_reordering(ptr %pl, i64 %stride, ptr %ps)
617607 ret void
618608}
619609
610+ ; TODO: We want to generate this code:
611+ ; define void @constant_stride_widen_no_reordering(ptr %pl, i64 %stride, ptr %ps) #0 {
612+ ; %gep_l0 = getelementptr inbounds i8, ptr %pl, i64 0
613+ ; %gep_s0 = getelementptr inbounds i8, ptr %ps, i64 0
614+ ; %1 = call <4 x i32> @llvm.experimental.vp.strided.load.v4i32.p0.i64(ptr align 1 %gep_l0, i64 100, <4 x i1> splat (i1 true), i32 4)
615+ ; %2 = bitcast <4 x i32> %1 to <16 x i8>
616+ ; store <16 x i8> %2, ptr %gep_s0, align 1
617+ ; ret void
618+ ; }
619+ define void @constant_stride_widen_no_reordering (ptr %pl , i64 %stride , ptr %ps ) {
620+ ; CHECK-LABEL: define void @constant_stride_widen_no_reordering(
621+ ; CHECK-SAME: ptr [[PL:%.*]], i64 [[STRIDE:%.*]], ptr [[PS:%.*]]) #[[ATTR0]] {
622+ ; CHECK-NEXT: [[GEP_L0:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 0
623+ ; CHECK-NEXT: [[GEP_L4:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 100
624+ ; CHECK-NEXT: [[GEP_L8:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 200
625+ ; CHECK-NEXT: [[GEP_L12:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 300
626+ ; CHECK-NEXT: [[GEP_S0:%.*]] = getelementptr inbounds i8, ptr [[PS]], i64 0
627+ ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[GEP_L0]], align 1
628+ ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[GEP_L4]], align 1
629+ ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i8>, ptr [[GEP_L8]], align 1
630+ ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i8>, ptr [[GEP_L12]], align 1
631+ ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
632+ ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
633+ ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
634+ ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
635+ ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i8> [[TMP7]], <16 x i8> [[TMP11]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison>
636+ ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
637+ ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i8> [[TMP9]], <16 x i8> [[TMP10]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
638+ ; CHECK-NEXT: store <16 x i8> [[TMP8]], ptr [[GEP_S0]], align 1
639+ ; CHECK-NEXT: ret void
640+ ;
641+ %gep_l0 = getelementptr inbounds i8 , ptr %pl , i64 0
642+ %gep_l1 = getelementptr inbounds i8 , ptr %pl , i64 1
643+ %gep_l2 = getelementptr inbounds i8 , ptr %pl , i64 2
644+ %gep_l3 = getelementptr inbounds i8 , ptr %pl , i64 3
645+ %gep_l4 = getelementptr inbounds i8 , ptr %pl , i64 100
646+ %gep_l5 = getelementptr inbounds i8 , ptr %pl , i64 101
647+ %gep_l6 = getelementptr inbounds i8 , ptr %pl , i64 102
648+ %gep_l7 = getelementptr inbounds i8 , ptr %pl , i64 103
649+ %gep_l8 = getelementptr inbounds i8 , ptr %pl , i64 200
650+ %gep_l9 = getelementptr inbounds i8 , ptr %pl , i64 201
651+ %gep_l10 = getelementptr inbounds i8 , ptr %pl , i64 202
652+ %gep_l11 = getelementptr inbounds i8 , ptr %pl , i64 203
653+ %gep_l12 = getelementptr inbounds i8 , ptr %pl , i64 300
654+ %gep_l13 = getelementptr inbounds i8 , ptr %pl , i64 301
655+ %gep_l14 = getelementptr inbounds i8 , ptr %pl , i64 302
656+ %gep_l15 = getelementptr inbounds i8 , ptr %pl , i64 303
657+
658+ %load0 = load i8 , ptr %gep_l0 , align 1
659+ %load1 = load i8 , ptr %gep_l1 , align 1
660+ %load2 = load i8 , ptr %gep_l2 , align 1
661+ %load3 = load i8 , ptr %gep_l3 , align 1
662+ %load4 = load i8 , ptr %gep_l4 , align 1
663+ %load5 = load i8 , ptr %gep_l5 , align 1
664+ %load6 = load i8 , ptr %gep_l6 , align 1
665+ %load7 = load i8 , ptr %gep_l7 , align 1
666+ %load8 = load i8 , ptr %gep_l8 , align 1
667+ %load9 = load i8 , ptr %gep_l9 , align 1
668+ %load10 = load i8 , ptr %gep_l10 , align 1
669+ %load11 = load i8 , ptr %gep_l11 , align 1
670+ %load12 = load i8 , ptr %gep_l12 , align 1
671+ %load13 = load i8 , ptr %gep_l13 , align 1
672+ %load14 = load i8 , ptr %gep_l14 , align 1
673+ %load15 = load i8 , ptr %gep_l15 , align 1
674+
675+ %gep_s0 = getelementptr inbounds i8 , ptr %ps , i64 0
676+ %gep_s1 = getelementptr inbounds i8 , ptr %ps , i64 1
677+ %gep_s2 = getelementptr inbounds i8 , ptr %ps , i64 2
678+ %gep_s3 = getelementptr inbounds i8 , ptr %ps , i64 3
679+ %gep_s4 = getelementptr inbounds i8 , ptr %ps , i64 4
680+ %gep_s5 = getelementptr inbounds i8 , ptr %ps , i64 5
681+ %gep_s6 = getelementptr inbounds i8 , ptr %ps , i64 6
682+ %gep_s7 = getelementptr inbounds i8 , ptr %ps , i64 7
683+ %gep_s8 = getelementptr inbounds i8 , ptr %ps , i64 8
684+ %gep_s9 = getelementptr inbounds i8 , ptr %ps , i64 9
685+ %gep_s10 = getelementptr inbounds i8 , ptr %ps , i64 10
686+ %gep_s11 = getelementptr inbounds i8 , ptr %ps , i64 11
687+ %gep_s12 = getelementptr inbounds i8 , ptr %ps , i64 12
688+ %gep_s13 = getelementptr inbounds i8 , ptr %ps , i64 13
689+ %gep_s14 = getelementptr inbounds i8 , ptr %ps , i64 14
690+ %gep_s15 = getelementptr inbounds i8 , ptr %ps , i64 15
691+
692+ store i8 %load0 , ptr %gep_s0 , align 1
693+ store i8 %load1 , ptr %gep_s1 , align 1
694+ store i8 %load2 , ptr %gep_s2 , align 1
695+ store i8 %load3 , ptr %gep_s3 , align 1
696+ store i8 %load4 , ptr %gep_s4 , align 1
697+ store i8 %load5 , ptr %gep_s5 , align 1
698+ store i8 %load6 , ptr %gep_s6 , align 1
699+ store i8 %load7 , ptr %gep_s7 , align 1
700+ store i8 %load8 , ptr %gep_s8 , align 1
701+ store i8 %load9 , ptr %gep_s9 , align 1
702+ store i8 %load10 , ptr %gep_s10 , align 1
703+ store i8 %load11 , ptr %gep_s11 , align 1
704+ store i8 %load12 , ptr %gep_s12 , align 1
705+ store i8 %load13 , ptr %gep_s13 , align 1
706+ store i8 %load14 , ptr %gep_s14 , align 1
707+ store i8 %load15 , ptr %gep_s15 , align 1
708+
709+ ret void
710+ }
620711; TODO: We want to generate this code:
621712; define void @rt_stride_widen_no_reordering(ptr %pl, i64 %stride, ptr %ps) {
622713; %gep_l0 = getelementptr inbounds i8, ptr %pl, i64 %offset0
0 commit comments