diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/return-const-ref-from-parameter.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/return-const-ref-from-parameter.rst index f007dfe549990..2349e51477b7d 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone/return-const-ref-from-parameter.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/return-const-ref-from-parameter.rst @@ -12,6 +12,15 @@ after the call. When the function returns such a parameter also as constant refe then the returned reference can be used after the object it refers to has been destroyed. +This issue can be resolved by declaring an overload of the problematic function +where the ``const &`` parameter is instead declared as ``&&``. The developer has +to ensure that the implementation of that function does not produce a +use-after-free, the exact error that this check is warning against. +Marking such an ``&&`` overload as ``deleted``, will silence the warning as +well. In the case of different ``const &`` parameters being returned depending +on the control flow of the function, an overload where all problematic +``const &`` parameters have been declared as ``&&`` will resolve the issue. + Example ------- diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst index 847bf4baf7488..c124fefc78611 100644 --- a/clang/docs/analyzer/checkers.rst +++ b/clang/docs/analyzer/checkers.rst @@ -2571,8 +2571,8 @@ with the `offsetof` macro. .. _alpha-core-StackAddressAsyncEscape: -alpha.core.StackAddressAsyncEscape (C) -"""""""""""""""""""""""""""""""""""""" +alpha.core.StackAddressAsyncEscape (ObjC) +""""""""""""""""""""""""""""""""""""""""" Check that addresses to stack memory do not escape the function that involves dispatch_after or dispatch_async. This checker is a part of ``core.StackAddressEscape``, but is temporarily disabled until some false positives are fixed. diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index 62c382b67ad14..5448bd841959f 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -728,6 +728,9 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, case CK_ZNVER4: defineCPUMacros(Builder, "znver4"); break; + case CK_ZNVER5: + defineCPUMacros(Builder, "znver5"); + break; case CK_Geode: defineCPUMacros(Builder, "geode"); break; @@ -1626,6 +1629,7 @@ std::optional X86TargetInfo::getCPUCacheLineSize() const { case CK_ZNVER2: case CK_ZNVER3: case CK_ZNVER4: + case CK_ZNVER5: // Deprecated case CK_x86_64: case CK_x86_64_v2: diff --git a/clang/lib/Headers/module.modulemap b/clang/lib/Headers/module.modulemap index 9ffc249c8d1a2..dcaf09e8f2c55 100644 --- a/clang/lib/Headers/module.modulemap +++ b/clang/lib/Headers/module.modulemap @@ -66,6 +66,8 @@ module _Builtin_intrinsics [system] [extern_c] { textual header "__wmmintrin_aes.h" textual header "__wmmintrin_pclmul.h" + textual header "mm3dnow.h" + explicit module mm_malloc { requires !freestanding header "mm_malloc.h" @@ -122,10 +124,6 @@ module _Builtin_intrinsics [system] [extern_c] { header "popcntintrin.h" } - explicit module mm3dnow { - header "mm3dnow.h" - } - explicit module aes_pclmul { header "wmmintrin.h" export aes diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-build-pair-mma.c b/clang/test/CodeGen/PowerPC/builtins-ppc-build-pair-mma.c index 471a31a8c5eac..8a2bc93dd6cd0 100644 --- a/clang/test/CodeGen/PowerPC/builtins-ppc-build-pair-mma.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-build-pair-mma.c @@ -3,6 +3,8 @@ // RUN: -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK-LE // RUN: %clang_cc1 -O3 -triple powerpc64-unknown-unknown -target-cpu pwr10 \ // RUN: -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK-BE +// RUN: %clang_cc1 -O0 -triple powerpc64le-unknown-unknown -target-cpu pwr10 \ +// RUN: -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK-LE-NOOPT // CHECK-LE-LABEL: @test1( // CHECK-LE-NEXT: entry: @@ -16,6 +18,42 @@ // CHECK-BE-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2:![0-9]+]] // CHECK-BE-NEXT: ret void // +// CHECK-LE-NOOPT-LABEL: @test1( +// CHECK-LE-NOOPT-NEXT: entry: +// CHECK-LE-NOOPT-NEXT: [[VQP_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-LE-NOOPT-NEXT: [[VPP_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-LE-NOOPT-NEXT: [[VC1_ADDR:%.*]] = alloca <16 x i8>, align 16 +// CHECK-LE-NOOPT-NEXT: [[VC2_ADDR:%.*]] = alloca <16 x i8>, align 16 +// CHECK-LE-NOOPT-NEXT: [[VC3_ADDR:%.*]] = alloca <16 x i8>, align 16 +// CHECK-LE-NOOPT-NEXT: [[VC4_ADDR:%.*]] = alloca <16 x i8>, align 16 +// CHECK-LE-NOOPT-NEXT: [[RESP_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-LE-NOOPT-NEXT: [[VQ:%.*]] = alloca <512 x i1>, align 64 +// CHECK-LE-NOOPT-NEXT: [[VP:%.*]] = alloca <256 x i1>, align 32 +// CHECK-LE-NOOPT-NEXT: [[RES:%.*]] = alloca <512 x i1>, align 64 +// CHECK-LE-NOOPT-NEXT: store ptr [[VQP:%.*]], ptr [[VQP_ADDR]], align 8 +// CHECK-LE-NOOPT-NEXT: store ptr [[VPP:%.*]], ptr [[VPP_ADDR]], align 8 +// CHECK-LE-NOOPT-NEXT: store <16 x i8> [[VC1:%.*]], ptr [[VC1_ADDR]], align 16 +// CHECK-LE-NOOPT-NEXT: store <16 x i8> [[VC2:%.*]], ptr [[VC2_ADDR]], align 16 +// CHECK-LE-NOOPT-NEXT: store <16 x i8> [[VC3:%.*]], ptr [[VC3_ADDR]], align 16 +// CHECK-LE-NOOPT-NEXT: store <16 x i8> [[VC4:%.*]], ptr [[VC4_ADDR]], align 16 +// CHECK-LE-NOOPT-NEXT: store ptr [[RESP:%.*]], ptr [[RESP_ADDR]], align 8 +// CHECK-LE-NOOPT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VQP_ADDR]], align 8 +// CHECK-LE-NOOPT-NEXT: [[TMP1:%.*]] = load <512 x i1>, ptr [[TMP0]], align 64 +// CHECK-LE-NOOPT-NEXT: store <512 x i1> [[TMP1]], ptr [[VQ]], align 64 +// CHECK-LE-NOOPT-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VPP_ADDR]], align 8 +// CHECK-LE-NOOPT-NEXT: [[TMP3:%.*]] = load <256 x i1>, ptr [[TMP2]], align 32 +// CHECK-LE-NOOPT-NEXT: store <256 x i1> [[TMP3]], ptr [[VP]], align 32 +// CHECK-LE-NOOPT-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[VC1_ADDR]], align 16 +// CHECK-LE-NOOPT-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr [[VC2_ADDR]], align 16 +// CHECK-LE-NOOPT-NEXT: [[TMP6:%.*]] = load <16 x i8>, ptr [[VC3_ADDR]], align 16 +// CHECK-LE-NOOPT-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr [[VC4_ADDR]], align 16 +// CHECK-LE-NOOPT-NEXT: [[TMP8:%.*]] = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> [[TMP7]], <16 x i8> [[TMP6]], <16 x i8> [[TMP5]], <16 x i8> [[TMP4]]) +// CHECK-LE-NOOPT-NEXT: store <512 x i1> [[TMP8]], ptr [[RES]], align 64 +// CHECK-LE-NOOPT-NEXT: [[TMP9:%.*]] = load <512 x i1>, ptr [[RES]], align 64 +// CHECK-LE-NOOPT-NEXT: [[TMP10:%.*]] = load ptr, ptr [[RESP_ADDR]], align 8 +// CHECK-LE-NOOPT-NEXT: store <512 x i1> [[TMP9]], ptr [[TMP10]], align 64 +// CHECK-LE-NOOPT-NEXT: ret void +// void test1(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc1, vector unsigned char vc2, vector unsigned char vc3, vector unsigned char vc4, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); @@ -37,6 +75,36 @@ void test1(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc1, vec // CHECK-BE-NEXT: store <256 x i1> [[TMP0]], ptr [[RESP:%.*]], align 32, !tbaa [[TBAA6:![0-9]+]] // CHECK-BE-NEXT: ret void // +// CHECK-LE-NOOPT-LABEL: @test2( +// CHECK-LE-NOOPT-NEXT: entry: +// CHECK-LE-NOOPT-NEXT: [[VQP_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-LE-NOOPT-NEXT: [[VPP_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-LE-NOOPT-NEXT: [[VC1_ADDR:%.*]] = alloca <16 x i8>, align 16 +// CHECK-LE-NOOPT-NEXT: [[VC2_ADDR:%.*]] = alloca <16 x i8>, align 16 +// CHECK-LE-NOOPT-NEXT: [[RESP_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-LE-NOOPT-NEXT: [[VQ:%.*]] = alloca <512 x i1>, align 64 +// CHECK-LE-NOOPT-NEXT: [[VP:%.*]] = alloca <256 x i1>, align 32 +// CHECK-LE-NOOPT-NEXT: [[RES:%.*]] = alloca <256 x i1>, align 32 +// CHECK-LE-NOOPT-NEXT: store ptr [[VQP:%.*]], ptr [[VQP_ADDR]], align 8 +// CHECK-LE-NOOPT-NEXT: store ptr [[VPP:%.*]], ptr [[VPP_ADDR]], align 8 +// CHECK-LE-NOOPT-NEXT: store <16 x i8> [[VC1:%.*]], ptr [[VC1_ADDR]], align 16 +// CHECK-LE-NOOPT-NEXT: store <16 x i8> [[VC2:%.*]], ptr [[VC2_ADDR]], align 16 +// CHECK-LE-NOOPT-NEXT: store ptr [[RESP:%.*]], ptr [[RESP_ADDR]], align 8 +// CHECK-LE-NOOPT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VQP_ADDR]], align 8 +// CHECK-LE-NOOPT-NEXT: [[TMP1:%.*]] = load <512 x i1>, ptr [[TMP0]], align 64 +// CHECK-LE-NOOPT-NEXT: store <512 x i1> [[TMP1]], ptr [[VQ]], align 64 +// CHECK-LE-NOOPT-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VPP_ADDR]], align 8 +// CHECK-LE-NOOPT-NEXT: [[TMP3:%.*]] = load <256 x i1>, ptr [[TMP2]], align 32 +// CHECK-LE-NOOPT-NEXT: store <256 x i1> [[TMP3]], ptr [[VP]], align 32 +// CHECK-LE-NOOPT-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[VC1_ADDR]], align 16 +// CHECK-LE-NOOPT-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr [[VC2_ADDR]], align 16 +// CHECK-LE-NOOPT-NEXT: [[TMP6:%.*]] = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[TMP5]], <16 x i8> [[TMP4]]) +// CHECK-LE-NOOPT-NEXT: store <256 x i1> [[TMP6]], ptr [[RES]], align 64 +// CHECK-LE-NOOPT-NEXT: [[TMP7:%.*]] = load <256 x i1>, ptr [[RES]], align 32 +// CHECK-LE-NOOPT-NEXT: [[TMP8:%.*]] = load ptr, ptr [[RESP_ADDR]], align 8 +// CHECK-LE-NOOPT-NEXT: store <256 x i1> [[TMP7]], ptr [[TMP8]], align 32 +// CHECK-LE-NOOPT-NEXT: ret void +// void test2(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc1, vector unsigned char vc2, unsigned char *resp) { __vector_quad vq = *((__vector_quad *)vqp); diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-pair-mma-types.c b/clang/test/CodeGen/PowerPC/builtins-ppc-pair-mma-types.c index a414a2827b2c4..39c040967dc0c 100644 --- a/clang/test/CodeGen/PowerPC/builtins-ppc-pair-mma-types.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-pair-mma-types.c @@ -16,18 +16,18 @@ // CHECK-NEXT: store <16 x i8> [[VC:%.*]], ptr [[VC_ADDR]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8 // CHECK-NEXT: store ptr [[TMP0]], ptr [[VQP]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VQP]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = load <512 x i1>, ptr [[TMP2]], align 64 -// CHECK-NEXT: store <512 x i1> [[TMP3]], ptr [[VQ1]], align 64 -// CHECK-NEXT: [[TMP4:%.*]] = call <512 x i1> @llvm.ppc.mma.xxsetaccz() -// CHECK-NEXT: store <512 x i1> [[TMP4]], ptr [[VQ2]], align 64 +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VQP]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load <512 x i1>, ptr [[TMP1]], align 64 +// CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[VQ1]], align 64 +// CHECK-NEXT: [[TMP3:%.*]] = call <512 x i1> @llvm.ppc.mma.xxsetaccz() +// CHECK-NEXT: store <512 x i1> [[TMP3]], ptr [[VQ2]], align 64 +// CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16 // CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16 -// CHECK-NEXT: [[TMP6:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16 -// CHECK-NEXT: [[TMP7:%.*]] = call <512 x i1> @llvm.ppc.mma.xvi4ger8(<16 x i8> [[TMP5]], <16 x i8> [[TMP6]]) -// CHECK-NEXT: store <512 x i1> [[TMP7]], ptr [[VQ3]], align 64 -// CHECK-NEXT: [[TMP8:%.*]] = load <512 x i1>, ptr [[VQ3]], align 64 -// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[VQP]], align 8 -// CHECK-NEXT: store <512 x i1> [[TMP8]], ptr [[TMP9]], align 64 +// CHECK-NEXT: [[TMP6:%.*]] = call <512 x i1> @llvm.ppc.mma.xvi4ger8(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]]) +// CHECK-NEXT: store <512 x i1> [[TMP6]], ptr [[VQ3]], align 64 +// CHECK-NEXT: [[TMP7:%.*]] = load <512 x i1>, ptr [[VQ3]], align 64 +// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[VQP]], align 8 +// CHECK-NEXT: store <512 x i1> [[TMP7]], ptr [[TMP8]], align 64 // CHECK-NEXT: ret void // // CHECK-BE-LABEL: @testVQLocal( @@ -42,18 +42,18 @@ // CHECK-BE-NEXT: store <16 x i8> [[VC:%.*]], ptr [[VC_ADDR]], align 16 // CHECK-BE-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8 // CHECK-BE-NEXT: store ptr [[TMP0]], ptr [[VQP]], align 8 -// CHECK-BE-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VQP]], align 8 -// CHECK-BE-NEXT: [[TMP3:%.*]] = load <512 x i1>, ptr [[TMP2]], align 64 -// CHECK-BE-NEXT: store <512 x i1> [[TMP3]], ptr [[VQ1]], align 64 -// CHECK-BE-NEXT: [[TMP4:%.*]] = call <512 x i1> @llvm.ppc.mma.xxsetaccz() -// CHECK-BE-NEXT: store <512 x i1> [[TMP4]], ptr [[VQ2]], align 64 +// CHECK-BE-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VQP]], align 8 +// CHECK-BE-NEXT: [[TMP2:%.*]] = load <512 x i1>, ptr [[TMP1]], align 64 +// CHECK-BE-NEXT: store <512 x i1> [[TMP2]], ptr [[VQ1]], align 64 +// CHECK-BE-NEXT: [[TMP3:%.*]] = call <512 x i1> @llvm.ppc.mma.xxsetaccz() +// CHECK-BE-NEXT: store <512 x i1> [[TMP3]], ptr [[VQ2]], align 64 +// CHECK-BE-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16 // CHECK-BE-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16 -// CHECK-BE-NEXT: [[TMP6:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16 -// CHECK-BE-NEXT: [[TMP7:%.*]] = call <512 x i1> @llvm.ppc.mma.xvi4ger8(<16 x i8> [[TMP5]], <16 x i8> [[TMP6]]) -// CHECK-BE-NEXT: store <512 x i1> [[TMP7]], ptr [[VQ3]], align 64 -// CHECK-BE-NEXT: [[TMP8:%.*]] = load <512 x i1>, ptr [[VQ3]], align 64 -// CHECK-BE-NEXT: [[TMP9:%.*]] = load ptr, ptr [[VQP]], align 8 -// CHECK-BE-NEXT: store <512 x i1> [[TMP8]], ptr [[TMP9]], align 64 +// CHECK-BE-NEXT: [[TMP6:%.*]] = call <512 x i1> @llvm.ppc.mma.xvi4ger8(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]]) +// CHECK-BE-NEXT: store <512 x i1> [[TMP6]], ptr [[VQ3]], align 64 +// CHECK-BE-NEXT: [[TMP7:%.*]] = load <512 x i1>, ptr [[VQ3]], align 64 +// CHECK-BE-NEXT: [[TMP8:%.*]] = load ptr, ptr [[VQP]], align 8 +// CHECK-BE-NEXT: store <512 x i1> [[TMP7]], ptr [[TMP8]], align 64 // CHECK-BE-NEXT: ret void // void testVQLocal(int *ptr, vector unsigned char vc) { @@ -79,24 +79,24 @@ void testVQLocal(int *ptr, vector unsigned char vc) { // CHECK-NEXT: store <16 x i8> [[VC:%.*]], ptr [[VC_ADDR]], align 16 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8 // CHECK-NEXT: store ptr [[TMP0]], ptr [[VPP]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VPP]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, ptr [[TMP2]], align 32 -// CHECK-NEXT: store <256 x i1> [[TMP3]], ptr [[VP1]], align 32 +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VPP]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load <256 x i1>, ptr [[TMP1]], align 32 +// CHECK-NEXT: store <256 x i1> [[TMP2]], ptr [[VP1]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16 // CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16 -// CHECK-NEXT: [[TMP6:%.*]] = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]]) -// CHECK-NEXT: store <256 x i1> [[TMP6]], ptr [[VP2]], align 64 +// CHECK-NEXT: [[TMP5:%.*]] = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]]) +// CHECK-NEXT: store <256 x i1> [[TMP5]], ptr [[VP2]], align 64 +// CHECK-NEXT: [[TMP6:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16 // CHECK-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16 -// CHECK-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[TMP8]], <16 x i8> [[TMP7]]) -// CHECK-NEXT: store <256 x i1> [[TMP9]], ptr [[VP2]], align 64 -// CHECK-NEXT: [[TMP10:%.*]] = load <256 x i1>, ptr [[VP3]], align 32 -// CHECK-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16 -// CHECK-NEXT: [[TMP12:%.*]] = call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> [[TMP10]], <16 x i8> [[TMP11]]) -// CHECK-NEXT: store <512 x i1> [[TMP12]], ptr [[VQ]], align 64 -// CHECK-NEXT: [[TMP13:%.*]] = load <256 x i1>, ptr [[VP3]], align 32 -// CHECK-NEXT: [[TMP14:%.*]] = load ptr, ptr [[VPP]], align 8 -// CHECK-NEXT: store <256 x i1> [[TMP13]], ptr [[TMP14]], align 32 +// CHECK-NEXT: [[TMP8:%.*]] = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[TMP7]], <16 x i8> [[TMP6]]) +// CHECK-NEXT: store <256 x i1> [[TMP8]], ptr [[VP2]], align 64 +// CHECK-NEXT: [[TMP9:%.*]] = load <256 x i1>, ptr [[VP3]], align 32 +// CHECK-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16 +// CHECK-NEXT: [[TMP11:%.*]] = call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> [[TMP9]], <16 x i8> [[TMP10]]) +// CHECK-NEXT: store <512 x i1> [[TMP11]], ptr [[VQ]], align 64 +// CHECK-NEXT: [[TMP12:%.*]] = load <256 x i1>, ptr [[VP3]], align 32 +// CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[VPP]], align 8 +// CHECK-NEXT: store <256 x i1> [[TMP12]], ptr [[TMP13]], align 32 // CHECK-NEXT: ret void // // CHECK-BE-LABEL: @testVPLocal( @@ -112,24 +112,24 @@ void testVQLocal(int *ptr, vector unsigned char vc) { // CHECK-BE-NEXT: store <16 x i8> [[VC:%.*]], ptr [[VC_ADDR]], align 16 // CHECK-BE-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8 // CHECK-BE-NEXT: store ptr [[TMP0]], ptr [[VPP]], align 8 -// CHECK-BE-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VPP]], align 8 -// CHECK-BE-NEXT: [[TMP3:%.*]] = load <256 x i1>, ptr [[TMP2]], align 32 -// CHECK-BE-NEXT: store <256 x i1> [[TMP3]], ptr [[VP1]], align 32 +// CHECK-BE-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VPP]], align 8 +// CHECK-BE-NEXT: [[TMP2:%.*]] = load <256 x i1>, ptr [[TMP1]], align 32 +// CHECK-BE-NEXT: store <256 x i1> [[TMP2]], ptr [[VP1]], align 32 +// CHECK-BE-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16 // CHECK-BE-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16 -// CHECK-BE-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16 -// CHECK-BE-NEXT: [[TMP6:%.*]] = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]]) -// CHECK-BE-NEXT: store <256 x i1> [[TMP6]], ptr [[VP2]], align 64 +// CHECK-BE-NEXT: [[TMP5:%.*]] = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]]) +// CHECK-BE-NEXT: store <256 x i1> [[TMP5]], ptr [[VP2]], align 64 +// CHECK-BE-NEXT: [[TMP6:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16 // CHECK-BE-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16 -// CHECK-BE-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16 -// CHECK-BE-NEXT: [[TMP9:%.*]] = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[TMP7]], <16 x i8> [[TMP8]]) -// CHECK-BE-NEXT: store <256 x i1> [[TMP9]], ptr [[VP2]], align 64 -// CHECK-BE-NEXT: [[TMP10:%.*]] = load <256 x i1>, ptr [[VP3]], align 32 -// CHECK-BE-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16 -// CHECK-BE-NEXT: [[TMP12:%.*]] = call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> [[TMP10]], <16 x i8> [[TMP11]]) -// CHECK-BE-NEXT: store <512 x i1> [[TMP12]], ptr [[VQ]], align 64 -// CHECK-BE-NEXT: [[TMP13:%.*]] = load <256 x i1>, ptr [[VP3]], align 32 -// CHECK-BE-NEXT: [[TMP14:%.*]] = load ptr, ptr [[VPP]], align 8 -// CHECK-BE-NEXT: store <256 x i1> [[TMP13]], ptr [[TMP14]], align 32 +// CHECK-BE-NEXT: [[TMP8:%.*]] = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[TMP6]], <16 x i8> [[TMP7]]) +// CHECK-BE-NEXT: store <256 x i1> [[TMP8]], ptr [[VP2]], align 64 +// CHECK-BE-NEXT: [[TMP9:%.*]] = load <256 x i1>, ptr [[VP3]], align 32 +// CHECK-BE-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16 +// CHECK-BE-NEXT: [[TMP11:%.*]] = call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> [[TMP9]], <16 x i8> [[TMP10]]) +// CHECK-BE-NEXT: store <512 x i1> [[TMP11]], ptr [[VQ]], align 64 +// CHECK-BE-NEXT: [[TMP12:%.*]] = load <256 x i1>, ptr [[VP3]], align 32 +// CHECK-BE-NEXT: [[TMP13:%.*]] = load ptr, ptr [[VPP]], align 8 +// CHECK-BE-NEXT: store <256 x i1> [[TMP12]], ptr [[TMP13]], align 32 // CHECK-BE-NEXT: ret void // void testVPLocal(int *ptr, vector unsigned char vc) { @@ -154,18 +154,18 @@ void testVPLocal(int *ptr, vector unsigned char vc) { // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ACC_ADDR]], align 8 // CHECK-NEXT: [[TMP2:%.*]] = load <512 x i1>, ptr [[TMP1]], align 64 // CHECK-NEXT: [[TMP3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> [[TMP2]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 0 -// CHECK-NEXT: store <16 x i8> [[TMP5]], ptr [[TMP6]], align 16 -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 1 -// CHECK-NEXT: store <16 x i8> [[TMP7]], ptr [[TMP8]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 2 -// CHECK-NEXT: store <16 x i8> [[TMP9]], ptr [[TMP10]], align 16 -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 3 -// CHECK-NEXT: store <16 x i8> [[TMP11]], ptr [[TMP12]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 0 +// CHECK-NEXT: store <16 x i8> [[TMP4]], ptr [[TMP5]], align 16 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 1 +// CHECK-NEXT: store <16 x i8> [[TMP6]], ptr [[TMP7]], align 16 +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 2 +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 2 +// CHECK-NEXT: store <16 x i8> [[TMP8]], ptr [[TMP9]], align 16 +// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 3 +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 3 +// CHECK-NEXT: store <16 x i8> [[TMP10]], ptr [[TMP11]], align 16 // CHECK-NEXT: ret void // // CHECK-BE-LABEL: @testRestrictQualifiedPointer2( @@ -178,18 +178,18 @@ void testVPLocal(int *ptr, vector unsigned char vc) { // CHECK-BE-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ACC_ADDR]], align 8 // CHECK-BE-NEXT: [[TMP2:%.*]] = load <512 x i1>, ptr [[TMP1]], align 64 // CHECK-BE-NEXT: [[TMP3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> [[TMP2]]) -// CHECK-BE-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 0 -// CHECK-BE-NEXT: [[TMP6:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 0 -// CHECK-BE-NEXT: store <16 x i8> [[TMP5]], ptr [[TMP6]], align 16 -// CHECK-BE-NEXT: [[TMP7:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 1 -// CHECK-BE-NEXT: [[TMP8:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 1 -// CHECK-BE-NEXT: store <16 x i8> [[TMP7]], ptr [[TMP8]], align 16 -// CHECK-BE-NEXT: [[TMP9:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 2 -// CHECK-BE-NEXT: [[TMP10:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 2 -// CHECK-BE-NEXT: store <16 x i8> [[TMP9]], ptr [[TMP10]], align 16 -// CHECK-BE-NEXT: [[TMP11:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 3 -// CHECK-BE-NEXT: [[TMP12:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 3 -// CHECK-BE-NEXT: store <16 x i8> [[TMP11]], ptr [[TMP12]], align 16 +// CHECK-BE-NEXT: [[TMP4:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 0 +// CHECK-BE-NEXT: [[TMP5:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 0 +// CHECK-BE-NEXT: store <16 x i8> [[TMP4]], ptr [[TMP5]], align 16 +// CHECK-BE-NEXT: [[TMP6:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 1 +// CHECK-BE-NEXT: [[TMP7:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 1 +// CHECK-BE-NEXT: store <16 x i8> [[TMP6]], ptr [[TMP7]], align 16 +// CHECK-BE-NEXT: [[TMP8:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 2 +// CHECK-BE-NEXT: [[TMP9:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 2 +// CHECK-BE-NEXT: store <16 x i8> [[TMP8]], ptr [[TMP9]], align 16 +// CHECK-BE-NEXT: [[TMP10:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 3 +// CHECK-BE-NEXT: [[TMP11:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 3 +// CHECK-BE-NEXT: store <16 x i8> [[TMP10]], ptr [[TMP11]], align 16 // CHECK-BE-NEXT: ret void // void testRestrictQualifiedPointer2(__vector_quad *__restrict acc) { @@ -207,18 +207,18 @@ void testRestrictQualifiedPointer2(__vector_quad *__restrict acc) { // CHECK-NEXT: [[TMP1:%.*]] = load volatile ptr, ptr [[ACC_ADDR]], align 8 // CHECK-NEXT: [[TMP2:%.*]] = load <512 x i1>, ptr [[TMP1]], align 64 // CHECK-NEXT: [[TMP3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> [[TMP2]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 0 -// CHECK-NEXT: store <16 x i8> [[TMP5]], ptr [[TMP6]], align 16 -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 1 -// CHECK-NEXT: store <16 x i8> [[TMP7]], ptr [[TMP8]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 2 -// CHECK-NEXT: store <16 x i8> [[TMP9]], ptr [[TMP10]], align 16 -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 3 -// CHECK-NEXT: store <16 x i8> [[TMP11]], ptr [[TMP12]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 0 +// CHECK-NEXT: store <16 x i8> [[TMP4]], ptr [[TMP5]], align 16 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 1 +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 1 +// CHECK-NEXT: store <16 x i8> [[TMP6]], ptr [[TMP7]], align 16 +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 2 +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 2 +// CHECK-NEXT: store <16 x i8> [[TMP8]], ptr [[TMP9]], align 16 +// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 3 +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 3 +// CHECK-NEXT: store <16 x i8> [[TMP10]], ptr [[TMP11]], align 16 // CHECK-NEXT: ret void // // CHECK-BE-LABEL: @testVolatileQualifiedPointer2( @@ -231,18 +231,18 @@ void testRestrictQualifiedPointer2(__vector_quad *__restrict acc) { // CHECK-BE-NEXT: [[TMP1:%.*]] = load volatile ptr, ptr [[ACC_ADDR]], align 8 // CHECK-BE-NEXT: [[TMP2:%.*]] = load <512 x i1>, ptr [[TMP1]], align 64 // CHECK-BE-NEXT: [[TMP3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> [[TMP2]]) -// CHECK-BE-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 0 -// CHECK-BE-NEXT: [[TMP6:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 0 -// CHECK-BE-NEXT: store <16 x i8> [[TMP5]], ptr [[TMP6]], align 16 -// CHECK-BE-NEXT: [[TMP7:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 1 -// CHECK-BE-NEXT: [[TMP8:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 1 -// CHECK-BE-NEXT: store <16 x i8> [[TMP7]], ptr [[TMP8]], align 16 -// CHECK-BE-NEXT: [[TMP9:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 2 -// CHECK-BE-NEXT: [[TMP10:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 2 -// CHECK-BE-NEXT: store <16 x i8> [[TMP9]], ptr [[TMP10]], align 16 -// CHECK-BE-NEXT: [[TMP11:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 3 -// CHECK-BE-NEXT: [[TMP12:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 3 -// CHECK-BE-NEXT: store <16 x i8> [[TMP11]], ptr [[TMP12]], align 16 +// CHECK-BE-NEXT: [[TMP4:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 0 +// CHECK-BE-NEXT: [[TMP5:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 0 +// CHECK-BE-NEXT: store <16 x i8> [[TMP4]], ptr [[TMP5]], align 16 +// CHECK-BE-NEXT: [[TMP6:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 1 +// CHECK-BE-NEXT: [[TMP7:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 1 +// CHECK-BE-NEXT: store <16 x i8> [[TMP6]], ptr [[TMP7]], align 16 +// CHECK-BE-NEXT: [[TMP8:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 2 +// CHECK-BE-NEXT: [[TMP9:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 2 +// CHECK-BE-NEXT: store <16 x i8> [[TMP8]], ptr [[TMP9]], align 16 +// CHECK-BE-NEXT: [[TMP10:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 3 +// CHECK-BE-NEXT: [[TMP11:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 3 +// CHECK-BE-NEXT: store <16 x i8> [[TMP10]], ptr [[TMP11]], align 16 // CHECK-BE-NEXT: ret void // void testVolatileQualifiedPointer2(__vector_quad *__volatile acc) { diff --git a/clang/test/CodeGen/target-builtin-noerror.c b/clang/test/CodeGen/target-builtin-noerror.c index 14024e3953182..2a05074d7c2b6 100644 --- a/clang/test/CodeGen/target-builtin-noerror.c +++ b/clang/test/CodeGen/target-builtin-noerror.c @@ -207,4 +207,5 @@ void verifycpustrings(void) { (void)__builtin_cpu_is("znver2"); (void)__builtin_cpu_is("znver3"); (void)__builtin_cpu_is("znver4"); + (void)__builtin_cpu_is("znver5"); } diff --git a/clang/test/Driver/x86-march.c b/clang/test/Driver/x86-march.c index cc993b53937c1..3bc2a82ae778d 100644 --- a/clang/test/Driver/x86-march.c +++ b/clang/test/Driver/x86-march.c @@ -242,6 +242,10 @@ // RUN: %clang -target x86_64-unknown-unknown -c -### %s -march=znver4 2>&1 \ // RUN: | FileCheck %s -check-prefix=znver4 // znver4: "-target-cpu" "znver4" +// +// RUN: %clang -target x86_64-unknown-unknown -c -### %s -march=znver5 2>&1 \ +// RUN: | FileCheck %s -check-prefix=znver5 +// znver5: "-target-cpu" "znver5" // RUN: %clang -target x86_64 -c -### %s -march=x86-64 2>&1 | FileCheck %s --check-prefix=x86-64 // x86-64: "-target-cpu" "x86-64" diff --git a/clang/test/Frontend/x86-target-cpu.c b/clang/test/Frontend/x86-target-cpu.c index 6c8502ac2c21e..f2885a040c370 100644 --- a/clang/test/Frontend/x86-target-cpu.c +++ b/clang/test/Frontend/x86-target-cpu.c @@ -38,5 +38,6 @@ // RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu znver2 -verify %s // RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu znver3 -verify %s // RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu znver4 -verify %s +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu znver5 -verify %s // // expected-no-diagnostics diff --git a/clang/test/Misc/target-invalid-cpu-note/x86.c b/clang/test/Misc/target-invalid-cpu-note/x86.c index 607192a5409ba..7879676040af4 100644 --- a/clang/test/Misc/target-invalid-cpu-note/x86.c +++ b/clang/test/Misc/target-invalid-cpu-note/x86.c @@ -99,6 +99,7 @@ // X86-SAME: {{^}}, znver2 // X86-SAME: {{^}}, znver3 // X86-SAME: {{^}}, znver4 +// X86-SAME: {{^}}, znver5 // X86-SAME: {{^}}, x86-64 // X86-SAME: {{^}}, x86-64-v2 // X86-SAME: {{^}}, x86-64-v3 @@ -175,6 +176,7 @@ // X86_64-SAME: {{^}}, znver2 // X86_64-SAME: {{^}}, znver3 // X86_64-SAME: {{^}}, znver4 +// X86_64-SAME: {{^}}, znver5 // X86_64-SAME: {{^}}, x86-64 // X86_64-SAME: {{^}}, x86-64-v2 // X86_64-SAME: {{^}}, x86-64-v3 @@ -278,6 +280,7 @@ // TUNE_X86-SAME: {{^}}, znver2 // TUNE_X86-SAME: {{^}}, znver3 // TUNE_X86-SAME: {{^}}, znver4 +// TUNE_X86-SAME: {{^}}, znver5 // TUNE_X86-SAME: {{^}}, x86-64 // TUNE_X86-SAME: {{^}}, geode // TUNE_X86-SAME: {{$}} @@ -379,6 +382,7 @@ // TUNE_X86_64-SAME: {{^}}, znver2 // TUNE_X86_64-SAME: {{^}}, znver3 // TUNE_X86_64-SAME: {{^}}, znver4 +// TUNE_X86_64-SAME: {{^}}, znver5 // TUNE_X86_64-SAME: {{^}}, x86-64 // TUNE_X86_64-SAME: {{^}}, geode // TUNE_X86_64-SAME: {{$}} diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c index 49646d94d920c..a149c69ee0cdb 100644 --- a/clang/test/Preprocessor/predefined-arch-macros.c +++ b/clang/test/Preprocessor/predefined-arch-macros.c @@ -3923,6 +3923,148 @@ // CHECK_ZNVER4_M64: #define __znver4 1 // CHECK_ZNVER4_M64: #define __znver4__ 1 +// RUN: %clang -march=znver5 -m32 -E -dM %s -o - 2>&1 \ +// RUN: -target i386-unknown-linux \ +// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ZNVER5_M32 +// CHECK_ZNVER5_M32-NOT: #define __3dNOW_A__ 1 +// CHECK_ZNVER5_M32-NOT: #define __3dNOW__ 1 +// CHECK_ZNVER5_M32: #define __ADX__ 1 +// CHECK_ZNVER5_M32: #define __AES__ 1 +// CHECK_ZNVER5_M32: #define __AVX2__ 1 +// CHECK_ZNVER5_M32: #define __AVX512BF16__ 1 +// CHECK_ZNVER5_M32: #define __AVX512BITALG__ 1 +// CHECK_ZNVER5_M32: #define __AVX512BW__ 1 +// CHECK_ZNVER5_M32: #define __AVX512CD__ 1 +// CHECK_ZNVER5_M32: #define __AVX512DQ__ 1 +// CHECK_ZNVER5_M32: #define __AVX512F__ 1 +// CHECK_ZNVER5_M32: #define __AVX512IFMA__ 1 +// CHECK_ZNVER5_M32: #define __AVX512VBMI2__ 1 +// CHECK_ZNVER5_M32: #define __AVX512VBMI__ 1 +// CHECK_ZNVER5_M32: #define __AVX512VL__ 1 +// CHECK_ZNVER5_M32: #define __AVX512VNNI__ 1 +// CHECK_ZNVER5_M32: #define __AVX512VP2INTERSECT__ 1 +// CHECK_ZNVER5_M32: #define __AVX512VPOPCNTDQ__ 1 +// CHECK_ZNVER5_M32: #define __AVXVNNI__ 1 +// CHECK_ZNVER5_M32: #define __AVX__ 1 +// CHECK_ZNVER5_M32: #define __BMI2__ 1 +// CHECK_ZNVER5_M32: #define __BMI__ 1 +// CHECK_ZNVER5_M32: #define __CLFLUSHOPT__ 1 +// CHECK_ZNVER5_M32: #define __CLWB__ 1 +// CHECK_ZNVER5_M32: #define __CLZERO__ 1 +// CHECK_ZNVER5_M32: #define __F16C__ 1 +// CHECK_ZNVER5_M32-NOT: #define __FMA4__ 1 +// CHECK_ZNVER5_M32: #define __FMA__ 1 +// CHECK_ZNVER5_M32: #define __FSGSBASE__ 1 +// CHECK_ZNVER5_M32: #define __GFNI__ 1 +// CHECK_ZNVER5_M32: #define __LZCNT__ 1 +// CHECK_ZNVER5_M32: #define __MMX__ 1 +// CHECK_ZNVER5_M32: #define __MOVDIR64B__ 1 +// CHECK_ZNVER5_M32: #define __MOVDIRI__ 1 +// CHECK_ZNVER5_M32: #define __PCLMUL__ 1 +// CHECK_ZNVER5_M32: #define __PKU__ 1 +// CHECK_ZNVER5_M32: #define __POPCNT__ 1 +// CHECK_ZNVER5_M32: #define __PREFETCHI__ 1 +// CHECK_ZNVER5_M32: #define __PRFCHW__ 1 +// CHECK_ZNVER5_M32: #define __RDPID__ 1 +// CHECK_ZNVER5_M32: #define __RDPRU__ 1 +// CHECK_ZNVER5_M32: #define __RDRND__ 1 +// CHECK_ZNVER5_M32: #define __RDSEED__ 1 +// CHECK_ZNVER5_M32: #define __SHA__ 1 +// CHECK_ZNVER5_M32: #define __SSE2_MATH__ 1 +// CHECK_ZNVER5_M32: #define __SSE2__ 1 +// CHECK_ZNVER5_M32: #define __SSE3__ 1 +// CHECK_ZNVER5_M32: #define __SSE4A__ 1 +// CHECK_ZNVER5_M32: #define __SSE4_1__ 1 +// CHECK_ZNVER5_M32: #define __SSE4_2__ 1 +// CHECK_ZNVER5_M32: #define __SSE_MATH__ 1 +// CHECK_ZNVER5_M32: #define __SSE__ 1 +// CHECK_ZNVER5_M32: #define __SSSE3__ 1 +// CHECK_ZNVER5_M32-NOT: #define __TBM__ 1 +// CHECK_ZNVER5_M32: #define __WBNOINVD__ 1 +// CHECK_ZNVER5_M32-NOT: #define __XOP__ 1 +// CHECK_ZNVER5_M32: #define __XSAVEC__ 1 +// CHECK_ZNVER5_M32: #define __XSAVEOPT__ 1 +// CHECK_ZNVER5_M32: #define __XSAVES__ 1 +// CHECK_ZNVER5_M32: #define __XSAVE__ 1 +// CHECK_ZNVER5_M32: #define __i386 1 +// CHECK_ZNVER5_M32: #define __i386__ 1 +// CHECK_ZNVER5_M32: #define __tune_znver5__ 1 +// CHECK_ZNVER5_M32: #define __znver5 1 +// CHECK_ZNVER5_M32: #define __znver5__ 1 + +// RUN: %clang -march=znver5 -m64 -E -dM %s -o - 2>&1 \ +// RUN: -target i386-unknown-linux \ +// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ZNVER5_M64 +// CHECK_ZNVER5_M64-NOT: #define __3dNOW_A__ 1 +// CHECK_ZNVER5_M64-NOT: #define __3dNOW__ 1 +// CHECK_ZNVER5_M64: #define __ADX__ 1 +// CHECK_ZNVER5_M64: #define __AES__ 1 +// CHECK_ZNVER5_M64: #define __AVX2__ 1 +// CHECK_ZNVER5_M64: #define __AVX512BF16__ 1 +// CHECK_ZNVER5_M64: #define __AVX512BITALG__ 1 +// CHECK_ZNVER5_M64: #define __AVX512BW__ 1 +// CHECK_ZNVER5_M64: #define __AVX512CD__ 1 +// CHECK_ZNVER5_M64: #define __AVX512DQ__ 1 +// CHECK_ZNVER5_M64: #define __AVX512F__ 1 +// CHECK_ZNVER5_M64: #define __AVX512IFMA__ 1 +// CHECK_ZNVER5_M64: #define __AVX512VBMI2__ 1 +// CHECK_ZNVER5_M64: #define __AVX512VBMI__ 1 +// CHECK_ZNVER5_M64: #define __AVX512VL__ 1 +// CHECK_ZNVER5_M64: #define __AVX512VNNI__ 1 +// CHECK_ZNVER5_M64: #define __AVX512VP2INTERSECT__ 1 +// CHECK_ZNVER5_M64: #define __AVX512VPOPCNTDQ__ 1 +// CHECK_ZNVER5_M64: #define __AVXVNNI__ 1 +// CHECK_ZNVER5_M64: #define __AVX__ 1 +// CHECK_ZNVER5_M64: #define __BMI2__ 1 +// CHECK_ZNVER5_M64: #define __BMI__ 1 +// CHECK_ZNVER5_M64: #define __CLFLUSHOPT__ 1 +// CHECK_ZNVER5_M64: #define __CLWB__ 1 +// CHECK_ZNVER5_M64: #define __CLZERO__ 1 +// CHECK_ZNVER5_M64: #define __F16C__ 1 +// CHECK_ZNVER5_M64-NOT: #define __FMA4__ 1 +// CHECK_ZNVER5_M64: #define __FMA__ 1 +// CHECK_ZNVER5_M64: #define __FSGSBASE__ 1 +// CHECK_ZNVER5_M64: #define __GFNI__ 1 +// CHECK_ZNVER5_M64: #define __LZCNT__ 1 +// CHECK_ZNVER5_M64: #define __MMX__ 1 +// CHECK_ZNVER5_M64: #define __MOVDIR64B__ 1 +// CHECK_ZNVER5_M64: #define __MOVDIRI__ 1 +// CHECK_ZNVER5_M64: #define __PCLMUL__ 1 +// CHECK_ZNVER5_M64: #define __PKU__ 1 +// CHECK_ZNVER5_M64: #define __POPCNT__ 1 +// CHECK_ZNVER5_M64: #define __PREFETCHI__ 1 +// CHECK_ZNVER5_M64: #define __PRFCHW__ 1 +// CHECK_ZNVER5_M64: #define __RDPID__ 1 +// CHECK_ZNVER5_M64: #define __RDPRU__ 1 +// CHECK_ZNVER5_M64: #define __RDRND__ 1 +// CHECK_ZNVER5_M64: #define __RDSEED__ 1 +// CHECK_ZNVER5_M64: #define __SHA__ 1 +// CHECK_ZNVER5_M64: #define __SSE2_MATH__ 1 +// CHECK_ZNVER5_M64: #define __SSE2__ 1 +// CHECK_ZNVER5_M64: #define __SSE3__ 1 +// CHECK_ZNVER5_M64: #define __SSE4A__ 1 +// CHECK_ZNVER5_M64: #define __SSE4_1__ 1 +// CHECK_ZNVER5_M64: #define __SSE4_2__ 1 +// CHECK_ZNVER5_M64: #define __SSE_MATH__ 1 +// CHECK_ZNVER5_M64: #define __SSE__ 1 +// CHECK_ZNVER5_M64: #define __SSSE3__ 1 +// CHECK_ZNVER5_M64-NOT: #define __TBM__ 1 +// CHECK_ZNVER5_M64: #define __VAES__ 1 +// CHECK_ZNVER5_M64: #define __VPCLMULQDQ__ 1 +// CHECK_ZNVER5_M64: #define __WBNOINVD__ 1 +// CHECK_ZNVER5_M64-NOT: #define __XOP__ 1 +// CHECK_ZNVER5_M64: #define __XSAVEC__ 1 +// CHECK_ZNVER5_M64: #define __XSAVEOPT__ 1 +// CHECK_ZNVER5_M64: #define __XSAVES__ 1 +// CHECK_ZNVER5_M64: #define __XSAVE__ 1 +// CHECK_ZNVER5_M64: #define __amd64 1 +// CHECK_ZNVER5_M64: #define __amd64__ 1 +// CHECK_ZNVER5_M64: #define __tune_znver5__ 1 +// CHECK_ZNVER5_M64: #define __x86_64 1 +// CHECK_ZNVER5_M64: #define __x86_64__ 1 +// CHECK_ZNVER5_M64: #define __znver5 1 +// CHECK_ZNVER5_M64: #define __znver5__ 1 + // End X86/GCC/Linux tests ------------------ // Begin PPC/GCC/Linux tests ---------------- diff --git a/clang/test/utils/update-verify-tests/Inputs/duplicate-diag.c b/clang/test/utils/update-verify-tests/Inputs/duplicate-diag.c new file mode 100644 index 0000000000000..8c7e46c6eca9c --- /dev/null +++ b/clang/test/utils/update-verify-tests/Inputs/duplicate-diag.c @@ -0,0 +1,8 @@ +void foo() { + // expected-error@+1{{use of undeclared identifier 'a'}} + a = 2; a = 2; + b = 2; b = 2; + // expected-error@+1 3{{use of undeclared identifier 'c'}} + c = 2; c = 2; + // expected-error 2{{asdf}} +} diff --git a/clang/test/utils/update-verify-tests/Inputs/duplicate-diag.c.expected b/clang/test/utils/update-verify-tests/Inputs/duplicate-diag.c.expected new file mode 100644 index 0000000000000..6214ff382f449 --- /dev/null +++ b/clang/test/utils/update-verify-tests/Inputs/duplicate-diag.c.expected @@ -0,0 +1,8 @@ +void foo() { + // expected-error@+1 2{{use of undeclared identifier 'a'}} + a = 2; a = 2; + // expected-error@+1 2{{use of undeclared identifier 'b'}} + b = 2; b = 2; + // expected-error@+1 2{{use of undeclared identifier 'c'}} + c = 2; c = 2; +} diff --git a/clang/test/utils/update-verify-tests/Inputs/infer-indentation.c b/clang/test/utils/update-verify-tests/Inputs/infer-indentation.c new file mode 100644 index 0000000000000..0210ac35fd5cd --- /dev/null +++ b/clang/test/utils/update-verify-tests/Inputs/infer-indentation.c @@ -0,0 +1,8 @@ +void foo() { + // expected-error@+1 2 {{use of undeclared identifier 'a'}} + a = 2; a = 2; b = 2; b = 2; c = 2; + // expected-error@+1 2 {{asdf}} + d = 2; + e = 2; f = 2; // expected-error 2 {{use of undeclared identifier 'e'}} +} + diff --git a/clang/test/utils/update-verify-tests/Inputs/infer-indentation.c.expected b/clang/test/utils/update-verify-tests/Inputs/infer-indentation.c.expected new file mode 100644 index 0000000000000..5c5aaeeef97ac --- /dev/null +++ b/clang/test/utils/update-verify-tests/Inputs/infer-indentation.c.expected @@ -0,0 +1,11 @@ +void foo() { + // expected-error@+3 {{use of undeclared identifier 'c'}} + // expected-error@+2 2 {{use of undeclared identifier 'b'}} + // expected-error@+1 2 {{use of undeclared identifier 'a'}} + a = 2; a = 2; b = 2; b = 2; c = 2; + // expected-error@+1 {{use of undeclared identifier 'd'}} + d = 2; + // expected-error@+1 {{use of undeclared identifier 'f'}} + e = 2; f = 2; // expected-error {{use of undeclared identifier 'e'}} +} + diff --git a/clang/test/utils/update-verify-tests/Inputs/leave-existing-diags.c b/clang/test/utils/update-verify-tests/Inputs/leave-existing-diags.c new file mode 100644 index 0000000000000..1aa8d088e9727 --- /dev/null +++ b/clang/test/utils/update-verify-tests/Inputs/leave-existing-diags.c @@ -0,0 +1,11 @@ +void foo() { + a = 2; + // expected-error@-1{{use of undeclared identifier 'a'}} + b = 2;// expected-error{{use of undeclared identifier 'b'}} + c = 2; + // expected-error@5{{use of undeclared identifier 'c'}} + d = 2; // expected-error-re{{use of {{.*}} identifier 'd'}} + + e = 2; // error to trigger mismatch +} + diff --git a/clang/test/utils/update-verify-tests/Inputs/leave-existing-diags.c.expected b/clang/test/utils/update-verify-tests/Inputs/leave-existing-diags.c.expected new file mode 100644 index 0000000000000..6b621061bbfbb --- /dev/null +++ b/clang/test/utils/update-verify-tests/Inputs/leave-existing-diags.c.expected @@ -0,0 +1,12 @@ +void foo() { + a = 2; + // expected-error@-1{{use of undeclared identifier 'a'}} + b = 2;// expected-error{{use of undeclared identifier 'b'}} + c = 2; + // expected-error@5{{use of undeclared identifier 'c'}} + d = 2; // expected-error-re{{use of {{.*}} identifier 'd'}} + + // expected-error@+1{{use of undeclared identifier 'e'}} + e = 2; // error to trigger mismatch +} + diff --git a/clang/test/utils/update-verify-tests/Inputs/multiple-errors.c b/clang/test/utils/update-verify-tests/Inputs/multiple-errors.c new file mode 100644 index 0000000000000..e230e0a337bf4 --- /dev/null +++ b/clang/test/utils/update-verify-tests/Inputs/multiple-errors.c @@ -0,0 +1,6 @@ +void foo() { + a = 2; + b = 2; + + c = 2; +} diff --git a/clang/test/utils/update-verify-tests/Inputs/multiple-errors.c.expected b/clang/test/utils/update-verify-tests/Inputs/multiple-errors.c.expected new file mode 100644 index 0000000000000..27dc1f30a26fa --- /dev/null +++ b/clang/test/utils/update-verify-tests/Inputs/multiple-errors.c.expected @@ -0,0 +1,9 @@ +void foo() { + // expected-error@+1{{use of undeclared identifier 'a'}} + a = 2; + // expected-error@+1{{use of undeclared identifier 'b'}} + b = 2; + + // expected-error@+1{{use of undeclared identifier 'c'}} + c = 2; +} diff --git a/clang/test/utils/update-verify-tests/Inputs/multiple-missing-errors-same-line.c b/clang/test/utils/update-verify-tests/Inputs/multiple-missing-errors-same-line.c new file mode 100644 index 0000000000000..03f723d44bbe8 --- /dev/null +++ b/clang/test/utils/update-verify-tests/Inputs/multiple-missing-errors-same-line.c @@ -0,0 +1,8 @@ +void foo() { + a = 2; b = 2; c = 2; +} + +void bar() { + x = 2; y = 2; z = 2; + // expected-error@-1{{use of undeclared identifier 'x'}} +} diff --git a/clang/test/utils/update-verify-tests/Inputs/multiple-missing-errors-same-line.c.expected b/clang/test/utils/update-verify-tests/Inputs/multiple-missing-errors-same-line.c.expected new file mode 100644 index 0000000000000..24b57f4353d95 --- /dev/null +++ b/clang/test/utils/update-verify-tests/Inputs/multiple-missing-errors-same-line.c.expected @@ -0,0 +1,13 @@ +void foo() { + // expected-error@+3{{use of undeclared identifier 'c'}} + // expected-error@+2{{use of undeclared identifier 'b'}} + // expected-error@+1{{use of undeclared identifier 'a'}} + a = 2; b = 2; c = 2; +} + +void bar() { + x = 2; y = 2; z = 2; + // expected-error@-1{{use of undeclared identifier 'x'}} + // expected-error@-2{{use of undeclared identifier 'y'}} + // expected-error@-3{{use of undeclared identifier 'z'}} +} diff --git a/clang/test/utils/update-verify-tests/Inputs/no-checks.c b/clang/test/utils/update-verify-tests/Inputs/no-checks.c new file mode 100644 index 0000000000000..8fd1f7cd33370 --- /dev/null +++ b/clang/test/utils/update-verify-tests/Inputs/no-checks.c @@ -0,0 +1,3 @@ +void foo() { + bar = 2; +} diff --git a/clang/test/utils/update-verify-tests/Inputs/no-checks.c.expected b/clang/test/utils/update-verify-tests/Inputs/no-checks.c.expected new file mode 100644 index 0000000000000..e80548fbe50f2 --- /dev/null +++ b/clang/test/utils/update-verify-tests/Inputs/no-checks.c.expected @@ -0,0 +1,4 @@ +void foo() { + // expected-error@+1{{use of undeclared identifier 'bar'}} + bar = 2; +} diff --git a/clang/test/utils/update-verify-tests/Inputs/no-diags.c b/clang/test/utils/update-verify-tests/Inputs/no-diags.c new file mode 100644 index 0000000000000..66d169be43940 --- /dev/null +++ b/clang/test/utils/update-verify-tests/Inputs/no-diags.c @@ -0,0 +1,5 @@ +void foo() { + // expected-error@+1{{asdf}} + int a = 2; +} + diff --git a/clang/test/utils/update-verify-tests/Inputs/no-diags.c.expected b/clang/test/utils/update-verify-tests/Inputs/no-diags.c.expected new file mode 100644 index 0000000000000..0523028494570 --- /dev/null +++ b/clang/test/utils/update-verify-tests/Inputs/no-diags.c.expected @@ -0,0 +1,5 @@ +// expected-no-diagnostics +void foo() { + int a = 2; +} + diff --git a/clang/test/utils/update-verify-tests/Inputs/no-expected-diags.c b/clang/test/utils/update-verify-tests/Inputs/no-expected-diags.c new file mode 100644 index 0000000000000..78b72e1357da7 --- /dev/null +++ b/clang/test/utils/update-verify-tests/Inputs/no-expected-diags.c @@ -0,0 +1,4 @@ +// expected-no-diagnostics +void foo() { + a = 2; +} diff --git a/clang/test/utils/update-verify-tests/Inputs/no-expected-diags.c.expected b/clang/test/utils/update-verify-tests/Inputs/no-expected-diags.c.expected new file mode 100644 index 0000000000000..d948ffce56189 --- /dev/null +++ b/clang/test/utils/update-verify-tests/Inputs/no-expected-diags.c.expected @@ -0,0 +1,4 @@ +void foo() { + // expected-error@+1{{use of undeclared identifier 'a'}} + a = 2; +} diff --git a/clang/test/utils/update-verify-tests/Inputs/non-default-prefix.c b/clang/test/utils/update-verify-tests/Inputs/non-default-prefix.c new file mode 100644 index 0000000000000..3d63eaf0f1b87 --- /dev/null +++ b/clang/test/utils/update-verify-tests/Inputs/non-default-prefix.c @@ -0,0 +1,5 @@ +void foo() { + a = 2; // check-error{{asdf}} + // expected-error@-1{ignored}} +} + diff --git a/clang/test/utils/update-verify-tests/Inputs/non-default-prefix.c.expected b/clang/test/utils/update-verify-tests/Inputs/non-default-prefix.c.expected new file mode 100644 index 0000000000000..a877f86922123 --- /dev/null +++ b/clang/test/utils/update-verify-tests/Inputs/non-default-prefix.c.expected @@ -0,0 +1,5 @@ +void foo() { + a = 2; // check-error{{use of undeclared identifier 'a'}} + // expected-error@-1{ignored}} +} + diff --git a/clang/test/utils/update-verify-tests/Inputs/update-same-line.c b/clang/test/utils/update-verify-tests/Inputs/update-same-line.c new file mode 100644 index 0000000000000..5278ce0c57c31 --- /dev/null +++ b/clang/test/utils/update-verify-tests/Inputs/update-same-line.c @@ -0,0 +1,4 @@ +void foo() { + bar = 2; // expected-error {{asdf}} +} + diff --git a/clang/test/utils/update-verify-tests/Inputs/update-same-line.c.expected b/clang/test/utils/update-verify-tests/Inputs/update-same-line.c.expected new file mode 100644 index 0000000000000..8ba47f788319b --- /dev/null +++ b/clang/test/utils/update-verify-tests/Inputs/update-same-line.c.expected @@ -0,0 +1,4 @@ +void foo() { + bar = 2; // expected-error {{use of undeclared identifier 'bar'}} +} + diff --git a/clang/test/utils/update-verify-tests/Inputs/update-single-check.c b/clang/test/utils/update-verify-tests/Inputs/update-single-check.c new file mode 100644 index 0000000000000..20b011bfc3d77 --- /dev/null +++ b/clang/test/utils/update-verify-tests/Inputs/update-single-check.c @@ -0,0 +1,4 @@ +void foo() { + // expected-error@+1{{asdf}} + bar = 2; +} diff --git a/clang/test/utils/update-verify-tests/Inputs/update-single-check.c.expected b/clang/test/utils/update-verify-tests/Inputs/update-single-check.c.expected new file mode 100644 index 0000000000000..e80548fbe50f2 --- /dev/null +++ b/clang/test/utils/update-verify-tests/Inputs/update-single-check.c.expected @@ -0,0 +1,4 @@ +void foo() { + // expected-error@+1{{use of undeclared identifier 'bar'}} + bar = 2; +} diff --git a/clang/test/utils/update-verify-tests/duplicate-diag.test b/clang/test/utils/update-verify-tests/duplicate-diag.test new file mode 100644 index 0000000000000..3163ce46199c3 --- /dev/null +++ b/clang/test/utils/update-verify-tests/duplicate-diag.test @@ -0,0 +1,4 @@ +# RUN: cp %S/Inputs/duplicate-diag.c %t.c && not %clang_cc1 -verify %t.c 2>&1 | %update-verify-tests +# RUN: diff -u %S/Inputs/duplicate-diag.c.expected %t.c +# RUN: %clang_cc1 -verify %t.c + diff --git a/clang/test/utils/update-verify-tests/infer-indentation.test b/clang/test/utils/update-verify-tests/infer-indentation.test new file mode 100644 index 0000000000000..6ba2f5d9d505b --- /dev/null +++ b/clang/test/utils/update-verify-tests/infer-indentation.test @@ -0,0 +1,3 @@ +# RUN: cp %S/Inputs/infer-indentation.c %t.c && not %clang_cc1 -verify %t.c 2>&1 | %update-verify-tests +# RUN: diff -u %S/Inputs/infer-indentation.c.expected %t.c +# RUN: %clang_cc1 -verify %t.c diff --git a/clang/test/utils/update-verify-tests/leave-existing-diags.test b/clang/test/utils/update-verify-tests/leave-existing-diags.test new file mode 100644 index 0000000000000..cde690ef715a6 --- /dev/null +++ b/clang/test/utils/update-verify-tests/leave-existing-diags.test @@ -0,0 +1,4 @@ +# RUN: cp %S/Inputs/leave-existing-diags.c %t.c && not %clang_cc1 -verify %t.c 2>&1 | %update-verify-tests +# RUN: diff -u %S/Inputs/leave-existing-diags.c.expected %t.c +# RUN: %clang_cc1 -verify %t.c + diff --git a/clang/test/utils/update-verify-tests/lit.local.cfg b/clang/test/utils/update-verify-tests/lit.local.cfg new file mode 100644 index 0000000000000..a0b6afccc2501 --- /dev/null +++ b/clang/test/utils/update-verify-tests/lit.local.cfg @@ -0,0 +1,25 @@ +import lit.util + +# python 2.7 backwards compatibility +try: + from shlex import quote as shell_quote +except ImportError: + from pipes import quote as shell_quote + +if config.standalone_build: + # These tests require the update-verify-tests.py script from the clang + # source tree, so skip these tests if we are doing standalone builds. + config.unsupported = True +else: + config.suffixes = [".test"] + + script_path = os.path.join( + config.clang_src_dir, "utils", "update-verify-tests.py" + ) + python = shell_quote(config.python_executable) + config.substitutions.append( + ( + "%update-verify-tests", + "%s %s" % (python, shell_quote(script_path)), + ) + ) diff --git a/clang/test/utils/update-verify-tests/multiple-errors.test b/clang/test/utils/update-verify-tests/multiple-errors.test new file mode 100644 index 0000000000000..1332ef365dc86 --- /dev/null +++ b/clang/test/utils/update-verify-tests/multiple-errors.test @@ -0,0 +1,3 @@ +# RUN: cp %S/Inputs/multiple-errors.c %t.c && not %clang_cc1 -verify %t.c 2>&1 | %update-verify-tests +# RUN: diff -u %S/Inputs/multiple-errors.c.expected %t.c +# RUN: %clang_cc1 -verify %t.c diff --git a/clang/test/utils/update-verify-tests/multiple-missing-errors-same-line.test b/clang/test/utils/update-verify-tests/multiple-missing-errors-same-line.test new file mode 100644 index 0000000000000..a9c21cd77e192 --- /dev/null +++ b/clang/test/utils/update-verify-tests/multiple-missing-errors-same-line.test @@ -0,0 +1,3 @@ +# RUN: cp %S/Inputs/multiple-missing-errors-same-line.c %t.c && not %clang_cc1 -verify %t.c 2>&1 | %update-verify-tests +# RUN: diff -u %S/Inputs/multiple-missing-errors-same-line.c.expected %t.c +# RUN: %clang_cc1 -verify %t.c diff --git a/clang/test/utils/update-verify-tests/no-checks.test b/clang/test/utils/update-verify-tests/no-checks.test new file mode 100644 index 0000000000000..f6ea91fa552be --- /dev/null +++ b/clang/test/utils/update-verify-tests/no-checks.test @@ -0,0 +1,3 @@ +# RUN: cp %S/Inputs/no-checks.c %t.c && not %clang_cc1 -verify %t.c 2>&1 | %update-verify-tests +# RUN: diff -u %S/Inputs/no-checks.c.expected %t.c +# RUN: %clang_cc1 -verify %t.c diff --git a/clang/test/utils/update-verify-tests/no-diags.test b/clang/test/utils/update-verify-tests/no-diags.test new file mode 100644 index 0000000000000..464fe8894253b --- /dev/null +++ b/clang/test/utils/update-verify-tests/no-diags.test @@ -0,0 +1,4 @@ +# RUN: cp %S/Inputs/no-diags.c %t.c && not %clang_cc1 -verify %t.c 2>&1 | %update-verify-tests +# RUN: diff -u %S/Inputs/no-diags.c.expected %t.c +# RUN: %clang_cc1 -verify %t.c + diff --git a/clang/test/utils/update-verify-tests/no-expected-diags.test b/clang/test/utils/update-verify-tests/no-expected-diags.test new file mode 100644 index 0000000000000..75235f17a64a2 --- /dev/null +++ b/clang/test/utils/update-verify-tests/no-expected-diags.test @@ -0,0 +1,4 @@ +# RUN: cp %S/Inputs/no-expected-diags.c %t.c && not %clang_cc1 -verify %t.c 2>&1 | %update-verify-tests +# RUN: diff -u %S/Inputs/no-expected-diags.c.expected %t.c +# RUN: %clang_cc1 -verify %t.c + diff --git a/clang/test/utils/update-verify-tests/non-default-prefix.test b/clang/test/utils/update-verify-tests/non-default-prefix.test new file mode 100644 index 0000000000000..e581755a6e603 --- /dev/null +++ b/clang/test/utils/update-verify-tests/non-default-prefix.test @@ -0,0 +1,4 @@ +# RUN: cp %S/Inputs/non-default-prefix.c %t.c && not %clang_cc1 -verify=check %t.c 2>&1 | %update-verify-tests --prefix check +# RUN: diff -u %S/Inputs/non-default-prefix.c.expected %t.c +# RUN: %clang_cc1 -verify=check %t.c + diff --git a/clang/test/utils/update-verify-tests/update-same-line.test b/clang/test/utils/update-verify-tests/update-same-line.test new file mode 100644 index 0000000000000..324768eae5faa --- /dev/null +++ b/clang/test/utils/update-verify-tests/update-same-line.test @@ -0,0 +1,4 @@ +# RUN: cp %S/Inputs/update-same-line.c %t.c && not %clang_cc1 -verify %t.c 2>&1 | %update-verify-tests +# RUN: diff -u %S/Inputs/update-same-line.c.expected %t.c +# RUN: %clang_cc1 -verify %t.c + diff --git a/clang/test/utils/update-verify-tests/update-single-check.test b/clang/test/utils/update-verify-tests/update-single-check.test new file mode 100644 index 0000000000000..2cb1ae3bcbd3b --- /dev/null +++ b/clang/test/utils/update-verify-tests/update-single-check.test @@ -0,0 +1,3 @@ +# RUN: cp %S/Inputs/update-single-check.c %t.c && not %clang_cc1 -verify %t.c 2>&1 | %update-verify-tests +# RUN: diff -u %S/Inputs/update-single-check.c.expected %t.c +# RUN: %clang_cc1 -verify %t.c diff --git a/clang/utils/TableGen/ClangSACheckersEmitter.cpp b/clang/utils/TableGen/ClangSACheckersEmitter.cpp index 2a2e466ae1979..44c2d8b31655d 100644 --- a/clang/utils/TableGen/ClangSACheckersEmitter.cpp +++ b/clang/utils/TableGen/ClangSACheckersEmitter.cpp @@ -174,9 +174,11 @@ static void printOption(llvm::raw_ostream &OS, StringRef FullName, OS << "true"; } -void clang::EmitClangSACheckers(RecordKeeper &Records, raw_ostream &OS) { - std::vector checkers = Records.getAllDerivedDefinitions("Checker"); - std::vector packages = Records.getAllDerivedDefinitions("Package"); +void clang::EmitClangSACheckers(const RecordKeeper &Records, raw_ostream &OS) { + ArrayRef checkers = + Records.getAllDerivedDefinitions("Checker"); + ArrayRef packages = + Records.getAllDerivedDefinitions("Package"); using SortedRecords = llvm::StringMap; diff --git a/clang/utils/TableGen/ClangSyntaxEmitter.cpp b/clang/utils/TableGen/ClangSyntaxEmitter.cpp index 2a69e4c353b6b..66b27be88f56f 100644 --- a/clang/utils/TableGen/ClangSyntaxEmitter.cpp +++ b/clang/utils/TableGen/ClangSyntaxEmitter.cpp @@ -41,11 +41,12 @@ using llvm::formatv; // stable and useful way, where abstract Node subclasses correspond to ranges. class Hierarchy { public: - Hierarchy(llvm::RecordKeeper &Records) { - for (llvm::Record *T : Records.getAllDerivedDefinitions("NodeType")) + Hierarchy(const llvm::RecordKeeper &Records) { + for (const llvm::Record *T : Records.getAllDerivedDefinitions("NodeType")) add(T); - for (llvm::Record *Derived : Records.getAllDerivedDefinitions("NodeType")) - if (llvm::Record *Base = Derived->getValueAsOptionalDef("base")) + for (const llvm::Record *Derived : + Records.getAllDerivedDefinitions("NodeType")) + if (const llvm::Record *Base = Derived->getValueAsOptionalDef("base")) link(Derived, Base); for (NodeType &N : AllTypes) { llvm::sort(N.Derived, [](const NodeType *L, const NodeType *R) { @@ -127,7 +128,7 @@ struct SyntaxConstraint { } // namespace -void clang::EmitClangSyntaxNodeList(llvm::RecordKeeper &Records, +void clang::EmitClangSyntaxNodeList(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS) { llvm::emitSourceFileHeader("Syntax tree node list", OS, Records); Hierarchy H(Records); @@ -186,7 +187,7 @@ static void printDoc(llvm::StringRef Doc, llvm::raw_ostream &OS) { } } -void clang::EmitClangSyntaxNodeClasses(llvm::RecordKeeper &Records, +void clang::EmitClangSyntaxNodeClasses(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS) { llvm::emitSourceFileHeader("Syntax tree node list", OS, Records); Hierarchy H(Records); diff --git a/clang/utils/TableGen/ClangTypeNodesEmitter.cpp b/clang/utils/TableGen/ClangTypeNodesEmitter.cpp index 66bdf5e67602b..41a2d0cd066fe 100644 --- a/clang/utils/TableGen/ClangTypeNodesEmitter.cpp +++ b/clang/utils/TableGen/ClangTypeNodesEmitter.cpp @@ -74,16 +74,15 @@ using namespace clang::tblgen; namespace { class TypeNodeEmitter { - RecordKeeper &Records; + const RecordKeeper &Records; raw_ostream &Out; - const std::vector Types; + ArrayRef Types; std::vector MacrosToUndef; public: - TypeNodeEmitter(RecordKeeper &records, raw_ostream &out) - : Records(records), Out(out), - Types(Records.getAllDerivedDefinitions(TypeNodeClassName)) { - } + TypeNodeEmitter(const RecordKeeper &records, raw_ostream &out) + : Records(records), Out(out), + Types(Records.getAllDerivedDefinitions(TypeNodeClassName)) {} void emit(); @@ -203,6 +202,6 @@ void TypeNodeEmitter::emitUndefs() { } } -void clang::EmitClangTypeNodes(RecordKeeper &records, raw_ostream &out) { +void clang::EmitClangTypeNodes(const RecordKeeper &records, raw_ostream &out) { TypeNodeEmitter(records, out).emit(); } diff --git a/clang/utils/TableGen/MveEmitter.cpp b/clang/utils/TableGen/MveEmitter.cpp index bb4f091604f5e..6cfaa891241fa 100644 --- a/clang/utils/TableGen/MveEmitter.cpp +++ b/clang/utils/TableGen/MveEmitter.cpp @@ -958,7 +958,7 @@ class ACLEIntrinsic { ";\n"; } - ACLEIntrinsic(EmitterBase &ME, Record *R, const Type *Param); + ACLEIntrinsic(EmitterBase &ME, const Record *R, const Type *Param); }; // ----------------------------------------------------------------------------- @@ -988,7 +988,7 @@ class EmitterBase { const ScalarType *getScalarType(StringRef Name) { return ScalarTypes[std::string(Name)].get(); } - const ScalarType *getScalarType(Record *R) { + const ScalarType *getScalarType(const Record *R) { return getScalarType(R->getName()); } const VectorType *getVectorType(const ScalarType *ST, unsigned Lanes) { @@ -1028,7 +1028,7 @@ class EmitterBase { // the Params list in the Tablegen record for the intrinsic), which is used // to expand Tablegen classes like 'Vector' which mean something different in // each member of a parametric family. - const Type *getType(Record *R, const Type *Param); + const Type *getType(const Record *R, const Type *Param); const Type *getType(DagInit *D, const Type *Param); const Type *getType(Init *I, const Type *Param); @@ -1046,7 +1046,7 @@ class EmitterBase { // Constructor and top-level functions. - EmitterBase(RecordKeeper &Records); + EmitterBase(const RecordKeeper &Records); virtual ~EmitterBase() = default; virtual void EmitHeader(raw_ostream &OS) = 0; @@ -1065,7 +1065,7 @@ const Type *EmitterBase::getType(Init *I, const Type *Param) { PrintFatalError("Could not convert this value into a type"); } -const Type *EmitterBase::getType(Record *R, const Type *Param) { +const Type *EmitterBase::getType(const Record *R, const Type *Param) { // Pass to a subfield of any wrapper records. We don't expect more than one // of these: immediate operands are used as plain numbers rather than as // llvm::Value, so it's meaningless to promote their type anyway. @@ -1088,7 +1088,7 @@ const Type *EmitterBase::getType(DagInit *D, const Type *Param) { // The meat of the getType system: types in the Tablegen are represented by a // dag whose operators select sub-cases of this function. - Record *Op = cast(D->getOperator())->getDef(); + const Record *Op = cast(D->getOperator())->getDef(); if (!Op->isSubClassOf("ComplexTypeOp")) PrintFatalError( "Expected ComplexTypeOp as dag operator in type expression"); @@ -1154,7 +1154,7 @@ const Type *EmitterBase::getType(DagInit *D, const Type *Param) { Result::Ptr EmitterBase::getCodeForDag(DagInit *D, const Result::Scope &Scope, const Type *Param) { - Record *Op = cast(D->getOperator())->getDef(); + const Record *Op = cast(D->getOperator())->getDef(); if (Op->getName() == "seq") { Result::Scope SubScope = Scope; @@ -1211,7 +1211,7 @@ Result::Ptr EmitterBase::getCodeForDag(DagInit *D, const Result::Scope &Scope, } else if (Op->getName() == "unsignedflag") { if (D->getNumArgs() != 1) PrintFatalError("unsignedflag should have exactly one argument"); - Record *TypeRec = cast(D->getArg(0))->getDef(); + const Record *TypeRec = cast(D->getArg(0))->getDef(); if (!TypeRec->isSubClassOf("Type")) PrintFatalError("unsignedflag's argument should be a type"); if (const auto *ST = dyn_cast(getType(TypeRec, Param))) { @@ -1223,7 +1223,7 @@ Result::Ptr EmitterBase::getCodeForDag(DagInit *D, const Result::Scope &Scope, } else if (Op->getName() == "bitsize") { if (D->getNumArgs() != 1) PrintFatalError("bitsize should have exactly one argument"); - Record *TypeRec = cast(D->getArg(0))->getDef(); + const Record *TypeRec = cast(D->getArg(0))->getDef(); if (!TypeRec->isSubClassOf("Type")) PrintFatalError("bitsize's argument should be a type"); if (const auto *ST = dyn_cast(getType(TypeRec, Param))) { @@ -1239,7 +1239,7 @@ Result::Ptr EmitterBase::getCodeForDag(DagInit *D, const Result::Scope &Scope, if (Op->isSubClassOf("IRBuilderBase")) { std::set AddressArgs; std::map IntegerArgs; - for (Record *sp : Op->getValueAsListOfDefs("special_params")) { + for (const Record *sp : Op->getValueAsListOfDefs("special_params")) { unsigned Index = sp->getValueAsInt("index"); if (sp->isSubClassOf("IRBuilderAddrParam")) { AddressArgs.insert(Index); @@ -1251,7 +1251,7 @@ Result::Ptr EmitterBase::getCodeForDag(DagInit *D, const Result::Scope &Scope, Args, AddressArgs, IntegerArgs); } else if (Op->isSubClassOf("IRIntBase")) { std::vector ParamTypes; - for (Record *RParam : Op->getValueAsListOfDefs("params")) + for (const Record *RParam : Op->getValueAsListOfDefs("params")) ParamTypes.push_back(getType(RParam, Param)); std::string IntName = std::string(Op->getValueAsString("intname")); if (Op->getValueAsBit("appendKind")) @@ -1294,7 +1294,7 @@ Result::Ptr EmitterBase::getCodeForDagArg(DagInit *D, unsigned ArgNum, return getCodeForDag(DI, Scope, Param); if (auto *DI = dyn_cast(Arg)) { - Record *Rec = DI->getDef(); + const Record *Rec = DI->getDef(); if (Rec->isSubClassOf("Type")) { const Type *T = getType(Rec, Param); return std::make_shared(T); @@ -1328,7 +1328,8 @@ Result::Ptr EmitterBase::getCodeForArg(unsigned ArgNum, const Type *ArgType, return V; } -ACLEIntrinsic::ACLEIntrinsic(EmitterBase &ME, Record *R, const Type *Param) +ACLEIntrinsic::ACLEIntrinsic(EmitterBase &ME, const Record *R, + const Type *Param) : ReturnType(ME.getType(R->getValueAsDef("ret"), Param)) { // Derive the intrinsic's full name, by taking the name of the // Tablegen record (or override) and appending the suffix from its @@ -1346,7 +1347,7 @@ ACLEIntrinsic::ACLEIntrinsic(EmitterBase &ME, Record *R, const Type *Param) // full name as specified by its 'pnt' member ('polymorphic name type'), // which indicates how many type suffixes to remove, and any other piece of // the name that should be removed. - Record *PolymorphicNameType = R->getValueAsDef("pnt"); + const Record *PolymorphicNameType = R->getValueAsDef("pnt"); SmallVector NameParts; StringRef(FullName).split(NameParts, '_'); for (unsigned i = 0, e = PolymorphicNameType->getValueAsInt( @@ -1393,11 +1394,11 @@ ACLEIntrinsic::ACLEIntrinsic(EmitterBase &ME, Record *R, const Type *Param) // what values it can take, for Sema checking. bool Immediate = false; if (auto TypeDI = dyn_cast(TypeInit)) { - Record *TypeRec = TypeDI->getDef(); + const Record *TypeRec = TypeDI->getDef(); if (TypeRec->isSubClassOf("Immediate")) { Immediate = true; - Record *Bounds = TypeRec->getValueAsDef("bounds"); + const Record *Bounds = TypeRec->getValueAsDef("bounds"); ImmediateArg &IA = ImmediateArgs[i]; if (Bounds->isSubClassOf("IB_ConstRange")) { IA.boundsType = ImmediateArg::BoundsType::ExplicitRange; @@ -1440,7 +1441,7 @@ ACLEIntrinsic::ACLEIntrinsic(EmitterBase &ME, Record *R, const Type *Param) // Finally, go through the codegen dag and translate it into a Result object // (with an arbitrary DAG of depended-on Results hanging off it). DagInit *CodeDag = R->getValueAsDag("codegen"); - Record *MainOp = cast(CodeDag->getOperator())->getDef(); + const Record *MainOp = cast(CodeDag->getOperator())->getDef(); if (MainOp->isSubClassOf("CustomCodegen")) { // Or, if it's the special case of CustomCodegen, just accumulate // a list of parameters we're going to assign to variables before @@ -1464,7 +1465,7 @@ ACLEIntrinsic::ACLEIntrinsic(EmitterBase &ME, Record *R, const Type *Param) } } -EmitterBase::EmitterBase(RecordKeeper &Records) { +EmitterBase::EmitterBase(const RecordKeeper &Records) { // Construct the whole EmitterBase. // First, look up all the instances of PrimitiveType. This gives us the list @@ -1472,13 +1473,13 @@ EmitterBase::EmitterBase(RecordKeeper &Records) { // collect all the useful ScalarType instances into a big list so that we can // use it for operations such as 'find the unsigned version of this signed // integer type'. - for (Record *R : Records.getAllDerivedDefinitions("PrimitiveType")) + for (const Record *R : Records.getAllDerivedDefinitions("PrimitiveType")) ScalarTypes[std::string(R->getName())] = std::make_unique(R); // Now go through the instances of Intrinsic, and for each one, iterate // through its list of type parameters making an ACLEIntrinsic for each one. - for (Record *R : Records.getAllDerivedDefinitions("Intrinsic")) { - for (Record *RParam : R->getValueAsListOfDefs("params")) { + for (const Record *R : Records.getAllDerivedDefinitions("Intrinsic")) { + for (const Record *RParam : R->getValueAsListOfDefs("params")) { const Type *Param = getType(RParam, getVoidType()); auto Intrinsic = std::make_unique(*this, R, Param); ACLEIntrinsics[Intrinsic->fullName()] = std::move(Intrinsic); @@ -1752,7 +1753,7 @@ void EmitterBase::GroupSemaChecks( class MveEmitter : public EmitterBase { public: - MveEmitter(RecordKeeper &Records) : EmitterBase(Records){}; + MveEmitter(const RecordKeeper &Records) : EmitterBase(Records) {} void EmitHeader(raw_ostream &OS) override; void EmitBuiltinDef(raw_ostream &OS) override; void EmitBuiltinSema(raw_ostream &OS) override; @@ -2010,14 +2011,14 @@ class CdeEmitter : public EmitterBase { std::map FunctionMacros; public: - CdeEmitter(RecordKeeper &Records); + CdeEmitter(const RecordKeeper &Records); void EmitHeader(raw_ostream &OS) override; void EmitBuiltinDef(raw_ostream &OS) override; void EmitBuiltinSema(raw_ostream &OS) override; }; -CdeEmitter::CdeEmitter(RecordKeeper &Records) : EmitterBase(Records) { - for (Record *R : Records.getAllDerivedDefinitions("FunctionMacro")) +CdeEmitter::CdeEmitter(const RecordKeeper &Records) : EmitterBase(Records) { + for (const Record *R : Records.getAllDerivedDefinitions("FunctionMacro")) FunctionMacros.emplace(R->getName(), FunctionMacro(*R)); } @@ -2179,45 +2180,45 @@ namespace clang { // MVE -void EmitMveHeader(RecordKeeper &Records, raw_ostream &OS) { +void EmitMveHeader(const RecordKeeper &Records, raw_ostream &OS) { MveEmitter(Records).EmitHeader(OS); } -void EmitMveBuiltinDef(RecordKeeper &Records, raw_ostream &OS) { +void EmitMveBuiltinDef(const RecordKeeper &Records, raw_ostream &OS) { MveEmitter(Records).EmitBuiltinDef(OS); } -void EmitMveBuiltinSema(RecordKeeper &Records, raw_ostream &OS) { +void EmitMveBuiltinSema(const RecordKeeper &Records, raw_ostream &OS) { MveEmitter(Records).EmitBuiltinSema(OS); } -void EmitMveBuiltinCG(RecordKeeper &Records, raw_ostream &OS) { +void EmitMveBuiltinCG(const RecordKeeper &Records, raw_ostream &OS) { MveEmitter(Records).EmitBuiltinCG(OS); } -void EmitMveBuiltinAliases(RecordKeeper &Records, raw_ostream &OS) { +void EmitMveBuiltinAliases(const RecordKeeper &Records, raw_ostream &OS) { MveEmitter(Records).EmitBuiltinAliases(OS); } // CDE -void EmitCdeHeader(RecordKeeper &Records, raw_ostream &OS) { +void EmitCdeHeader(const RecordKeeper &Records, raw_ostream &OS) { CdeEmitter(Records).EmitHeader(OS); } -void EmitCdeBuiltinDef(RecordKeeper &Records, raw_ostream &OS) { +void EmitCdeBuiltinDef(const RecordKeeper &Records, raw_ostream &OS) { CdeEmitter(Records).EmitBuiltinDef(OS); } -void EmitCdeBuiltinSema(RecordKeeper &Records, raw_ostream &OS) { +void EmitCdeBuiltinSema(const RecordKeeper &Records, raw_ostream &OS) { CdeEmitter(Records).EmitBuiltinSema(OS); } -void EmitCdeBuiltinCG(RecordKeeper &Records, raw_ostream &OS) { +void EmitCdeBuiltinCG(const RecordKeeper &Records, raw_ostream &OS) { CdeEmitter(Records).EmitBuiltinCG(OS); } -void EmitCdeBuiltinAliases(RecordKeeper &Records, raw_ostream &OS) { +void EmitCdeBuiltinAliases(const RecordKeeper &Records, raw_ostream &OS) { CdeEmitter(Records).EmitBuiltinAliases(OS); } diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp index 4707ce1ea3b79..9e5480be20ada 100644 --- a/clang/utils/TableGen/NeonEmitter.cpp +++ b/clang/utils/TableGen/NeonEmitter.cpp @@ -59,7 +59,7 @@ namespace { // While globals are generally bad, this one allows us to perform assertions // liberally and somehow still trace them back to the def they indirectly // came from. -static Record *CurrentRecord = nullptr; +static const Record *CurrentRecord = nullptr; static void assert_with_loc(bool Assertion, const std::string &Str) { if (!Assertion) { if (CurrentRecord) @@ -308,7 +308,7 @@ class Variable { /// a particular typespec and prototype. class Intrinsic { /// The Record this intrinsic was created from. - Record *R; + const Record *R; /// The unmangled name. std::string Name; /// The input and output typespecs. InTS == OutTS except when @@ -371,7 +371,7 @@ class Intrinsic { } public: - Intrinsic(Record *R, StringRef Name, StringRef Proto, TypeSpec OutTS, + Intrinsic(const Record *R, StringRef Name, StringRef Proto, TypeSpec OutTS, TypeSpec InTS, ClassKind CK, ListInit *Body, NeonEmitter &Emitter, StringRef ArchGuard, StringRef TargetGuard, bool IsUnavailable, bool BigEndianSafe) @@ -442,7 +442,7 @@ class Intrinsic { } /// Get the Record that this intrinsic is based off. - Record *getRecord() const { return R; } + const Record *getRecord() const { return R; } /// Get the set of Intrinsics that this intrinsic calls. /// this is the set of immediate dependencies, NOT the /// transitive closure. @@ -576,12 +576,12 @@ class Intrinsic { //===----------------------------------------------------------------------===// class NeonEmitter { - RecordKeeper &Records; - DenseMap ClassMap; + const RecordKeeper &Records; + DenseMap ClassMap; std::map> IntrinsicMap; unsigned UniqueNumber; - void createIntrinsic(Record *R, SmallVectorImpl &Out); + void createIntrinsic(const Record *R, SmallVectorImpl &Out); void genBuiltinsDef(raw_ostream &OS, SmallVectorImpl &Defs); void genStreamingSVECompatibleList(raw_ostream &OS, SmallVectorImpl &Defs); @@ -601,15 +601,15 @@ class NeonEmitter { /// Called by Intrinsic - returns a globally-unique number. unsigned getUniqueNumber() { return UniqueNumber++; } - NeonEmitter(RecordKeeper &R) : Records(R), UniqueNumber(0) { - Record *SI = R.getClass("SInst"); - Record *II = R.getClass("IInst"); - Record *WI = R.getClass("WInst"); - Record *SOpI = R.getClass("SOpInst"); - Record *IOpI = R.getClass("IOpInst"); - Record *WOpI = R.getClass("WOpInst"); - Record *LOpI = R.getClass("LOpInst"); - Record *NoTestOpI = R.getClass("NoTestOpInst"); + NeonEmitter(const RecordKeeper &R) : Records(R), UniqueNumber(0) { + const Record *SI = R.getClass("SInst"); + const Record *II = R.getClass("IInst"); + const Record *WI = R.getClass("WInst"); + const Record *SOpI = R.getClass("SOpInst"); + const Record *IOpI = R.getClass("IOpInst"); + const Record *WOpI = R.getClass("WOpInst"); + const Record *LOpI = R.getClass("LOpInst"); + const Record *NoTestOpI = R.getClass("NoTestOpInst"); ClassMap[SI] = ClassS; ClassMap[II] = ClassI; @@ -1979,12 +1979,12 @@ Intrinsic &NeonEmitter::getIntrinsic(StringRef Name, ArrayRef Types, return *GoodVec.front(); } -void NeonEmitter::createIntrinsic(Record *R, +void NeonEmitter::createIntrinsic(const Record *R, SmallVectorImpl &Out) { std::string Name = std::string(R->getValueAsString("Name")); std::string Proto = std::string(R->getValueAsString("Prototype")); std::string Types = std::string(R->getValueAsString("Types")); - Record *OperationRec = R->getValueAsDef("Operation"); + const Record *OperationRec = R->getValueAsDef("Operation"); bool BigEndianSafe = R->getValueAsBit("BigEndianSafe"); std::string ArchGuard = std::string(R->getValueAsString("ArchGuard")); std::string TargetGuard = std::string(R->getValueAsString("TargetGuard")); @@ -2240,10 +2240,8 @@ void NeonEmitter::genIntrinsicRangeCheckCode( /// 2. the SemaChecking code for the type overload checking. /// 3. the SemaChecking code for validation of intrinsic immediate arguments. void NeonEmitter::runHeader(raw_ostream &OS) { - std::vector RV = Records.getAllDerivedDefinitions("Inst"); - SmallVector Defs; - for (auto *R : RV) + for (const Record *R : Records.getAllDerivedDefinitions("Inst")) createIntrinsic(R, Defs); // Generate shared BuiltinsXXX.def @@ -2402,8 +2400,7 @@ void NeonEmitter::run(raw_ostream &OS) { "__nodebug__))\n\n"; SmallVector Defs; - std::vector RV = Records.getAllDerivedDefinitions("Inst"); - for (auto *R : RV) + for (const Record *R : Records.getAllDerivedDefinitions("Inst")) createIntrinsic(R, Defs); for (auto *I : Defs) @@ -2510,8 +2507,7 @@ void NeonEmitter::runFP16(raw_ostream &OS) { "__nodebug__))\n\n"; SmallVector Defs; - std::vector RV = Records.getAllDerivedDefinitions("Inst"); - for (auto *R : RV) + for (const Record *R : Records.getAllDerivedDefinitions("Inst")) createIntrinsic(R, Defs); for (auto *I : Defs) @@ -2619,8 +2615,7 @@ void NeonEmitter::runBF16(raw_ostream &OS) { "__nodebug__))\n\n"; SmallVector Defs; - std::vector RV = Records.getAllDerivedDefinitions("Inst"); - for (auto *R : RV) + for (const Record *R : Records.getAllDerivedDefinitions("Inst")) createIntrinsic(R, Defs); for (auto *I : Defs) @@ -2674,26 +2669,26 @@ void NeonEmitter::runBF16(raw_ostream &OS) { OS << "#endif\n"; } -void clang::EmitNeon(RecordKeeper &Records, raw_ostream &OS) { +void clang::EmitNeon(const RecordKeeper &Records, raw_ostream &OS) { NeonEmitter(Records).run(OS); } -void clang::EmitFP16(RecordKeeper &Records, raw_ostream &OS) { +void clang::EmitFP16(const RecordKeeper &Records, raw_ostream &OS) { NeonEmitter(Records).runFP16(OS); } -void clang::EmitBF16(RecordKeeper &Records, raw_ostream &OS) { +void clang::EmitBF16(const RecordKeeper &Records, raw_ostream &OS) { NeonEmitter(Records).runBF16(OS); } -void clang::EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) { +void clang::EmitNeonSema(const RecordKeeper &Records, raw_ostream &OS) { NeonEmitter(Records).runHeader(OS); } -void clang::EmitVectorTypes(RecordKeeper &Records, raw_ostream &OS) { +void clang::EmitVectorTypes(const RecordKeeper &Records, raw_ostream &OS) { NeonEmitter(Records).runVectorTypes(OS); } -void clang::EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) { +void clang::EmitNeonTest(const RecordKeeper &Records, raw_ostream &OS) { llvm_unreachable("Neon test generation no longer implemented!"); } diff --git a/clang/utils/TableGen/RISCVVEmitter.cpp b/clang/utils/TableGen/RISCVVEmitter.cpp index d05236bb4e909..4ef83e7b608dc 100644 --- a/clang/utils/TableGen/RISCVVEmitter.cpp +++ b/clang/utils/TableGen/RISCVVEmitter.cpp @@ -95,11 +95,11 @@ class SemaSignatureTable { class RVVEmitter { private: - RecordKeeper &Records; + const RecordKeeper &Records; RVVTypeCache TypeCache; public: - RVVEmitter(RecordKeeper &R) : Records(R) {} + RVVEmitter(const RecordKeeper &R) : Records(R) {} /// Emit riscv_vector.h void createHeader(raw_ostream &o); @@ -554,8 +554,7 @@ void RVVEmitter::createCodeGen(raw_ostream &OS) { void RVVEmitter::createRVVIntrinsics( std::vector> &Out, std::vector *SemaRecords) { - std::vector RV = Records.getAllDerivedDefinitions("RVVBuiltin"); - for (auto *R : RV) { + for (const Record *R : Records.getAllDerivedDefinitions("RVVBuiltin")) { StringRef Name = R->getValueAsString("Name"); StringRef SuffixProto = R->getValueAsString("Suffix"); StringRef OverloadedName = R->getValueAsString("OverloadedName"); @@ -565,10 +564,10 @@ void RVVEmitter::createRVVIntrinsics( bool HasMasked = R->getValueAsBit("HasMasked"); bool HasMaskedOffOperand = R->getValueAsBit("HasMaskedOffOperand"); bool HasVL = R->getValueAsBit("HasVL"); - Record *MPSRecord = R->getValueAsDef("MaskedPolicyScheme"); + const Record *MPSRecord = R->getValueAsDef("MaskedPolicyScheme"); auto MaskedPolicyScheme = static_cast(MPSRecord->getValueAsInt("Value")); - Record *UMPSRecord = R->getValueAsDef("UnMaskedPolicyScheme"); + const Record *UMPSRecord = R->getValueAsDef("UnMaskedPolicyScheme"); auto UnMaskedPolicyScheme = static_cast(UMPSRecord->getValueAsInt("Value")); std::vector Log2LMULList = R->getValueAsListOfInts("Log2LMUL"); @@ -752,9 +751,7 @@ void RVVEmitter::createRVVIntrinsics( } void RVVEmitter::printHeaderCode(raw_ostream &OS) { - std::vector RVVHeaders = - Records.getAllDerivedDefinitions("RVVHeader"); - for (auto *R : RVVHeaders) { + for (const Record *R : Records.getAllDerivedDefinitions("RVVHeader")) { StringRef HeaderCodeStr = R->getValueAsString("HeaderCode"); OS << HeaderCodeStr.str(); } @@ -822,19 +819,19 @@ void RVVEmitter::createSema(raw_ostream &OS) { } namespace clang { -void EmitRVVHeader(RecordKeeper &Records, raw_ostream &OS) { +void EmitRVVHeader(const RecordKeeper &Records, raw_ostream &OS) { RVVEmitter(Records).createHeader(OS); } -void EmitRVVBuiltins(RecordKeeper &Records, raw_ostream &OS) { +void EmitRVVBuiltins(const RecordKeeper &Records, raw_ostream &OS) { RVVEmitter(Records).createBuiltins(OS); } -void EmitRVVBuiltinCG(RecordKeeper &Records, raw_ostream &OS) { +void EmitRVVBuiltinCG(const RecordKeeper &Records, raw_ostream &OS) { RVVEmitter(Records).createCodeGen(OS); } -void EmitRVVBuiltinSema(RecordKeeper &Records, raw_ostream &OS) { +void EmitRVVBuiltinSema(const RecordKeeper &Records, raw_ostream &OS) { RVVEmitter(Records).createSema(OS); } diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp index b2e2db1a40990..5abf6fc49bc30 100644 --- a/clang/utils/TableGen/SveEmitter.cpp +++ b/clang/utils/TableGen/SveEmitter.cpp @@ -280,7 +280,7 @@ class SVEEmitter { static const std::array Reinterprets; - RecordKeeper &Records; + const RecordKeeper &Records; llvm::StringMap EltTypes; llvm::StringMap MemEltTypes; llvm::StringMap FlagTypes; @@ -288,7 +288,7 @@ class SVEEmitter { llvm::StringMap ImmCheckTypes; public: - SVEEmitter(RecordKeeper &R) : Records(R) { + SVEEmitter(const RecordKeeper &R) : Records(R) { for (auto *RV : Records.getAllDerivedDefinitions("EltType")) EltTypes[RV->getNameInitAsString()] = RV->getValueAsInt("Value"); for (auto *RV : Records.getAllDerivedDefinitions("MemEltType")) @@ -397,7 +397,7 @@ class SVEEmitter { void createBuiltinZAState(raw_ostream &OS); /// Create intrinsic and add it to \p Out - void createIntrinsic(Record *R, + void createIntrinsic(const Record *R, SmallVectorImpl> &Out); }; @@ -1151,7 +1151,7 @@ uint64_t SVEEmitter::encodeTypeFlags(const SVEType &T) { } void SVEEmitter::createIntrinsic( - Record *R, SmallVectorImpl> &Out) { + const Record *R, SmallVectorImpl> &Out) { StringRef Name = R->getValueAsString("Name"); StringRef Proto = R->getValueAsString("Prototype"); StringRef Types = R->getValueAsString("Types"); @@ -1225,7 +1225,7 @@ void SVEEmitter::createCoreHeaderIntrinsics(raw_ostream &OS, SVEEmitter &Emitter, ACLEKind Kind) { SmallVector, 128> Defs; - std::vector RV = Records.getAllDerivedDefinitions("Inst"); + std::vector RV = Records.getAllDerivedDefinitions("Inst"); for (auto *R : RV) createIntrinsic(R, Defs); @@ -1427,7 +1427,7 @@ void SVEEmitter::createHeader(raw_ostream &OS) { } void SVEEmitter::createBuiltins(raw_ostream &OS) { - std::vector RV = Records.getAllDerivedDefinitions("Inst"); + std::vector RV = Records.getAllDerivedDefinitions("Inst"); SmallVector, 128> Defs; for (auto *R : RV) createIntrinsic(R, Defs); @@ -1469,7 +1469,7 @@ void SVEEmitter::createBuiltins(raw_ostream &OS) { } void SVEEmitter::createCodeGenMap(raw_ostream &OS) { - std::vector RV = Records.getAllDerivedDefinitions("Inst"); + std::vector RV = Records.getAllDerivedDefinitions("Inst"); SmallVector, 128> Defs; for (auto *R : RV) createIntrinsic(R, Defs); @@ -1502,7 +1502,7 @@ void SVEEmitter::createCodeGenMap(raw_ostream &OS) { } void SVEEmitter::createRangeChecks(raw_ostream &OS) { - std::vector RV = Records.getAllDerivedDefinitions("Inst"); + std::vector RV = Records.getAllDerivedDefinitions("Inst"); SmallVector, 128> Defs; for (auto *R : RV) createIntrinsic(R, Defs); @@ -1634,7 +1634,7 @@ void SVEEmitter::createSMEHeader(raw_ostream &OS) { } void SVEEmitter::createSMEBuiltins(raw_ostream &OS) { - std::vector RV = Records.getAllDerivedDefinitions("Inst"); + std::vector RV = Records.getAllDerivedDefinitions("Inst"); SmallVector, 128> Defs; for (auto *R : RV) { createIntrinsic(R, Defs); @@ -1662,7 +1662,7 @@ void SVEEmitter::createSMEBuiltins(raw_ostream &OS) { } void SVEEmitter::createSMECodeGenMap(raw_ostream &OS) { - std::vector RV = Records.getAllDerivedDefinitions("Inst"); + std::vector RV = Records.getAllDerivedDefinitions("Inst"); SmallVector, 128> Defs; for (auto *R : RV) { createIntrinsic(R, Defs); @@ -1696,7 +1696,7 @@ void SVEEmitter::createSMECodeGenMap(raw_ostream &OS) { } void SVEEmitter::createSMERangeChecks(raw_ostream &OS) { - std::vector RV = Records.getAllDerivedDefinitions("Inst"); + std::vector RV = Records.getAllDerivedDefinitions("Inst"); SmallVector, 128> Defs; for (auto *R : RV) { createIntrinsic(R, Defs); @@ -1733,7 +1733,7 @@ void SVEEmitter::createSMERangeChecks(raw_ostream &OS) { } void SVEEmitter::createBuiltinZAState(raw_ostream &OS) { - std::vector RV = Records.getAllDerivedDefinitions("Inst"); + std::vector RV = Records.getAllDerivedDefinitions("Inst"); SmallVector, 128> Defs; for (auto *R : RV) createIntrinsic(R, Defs); @@ -1773,7 +1773,7 @@ void SVEEmitter::createBuiltinZAState(raw_ostream &OS) { } void SVEEmitter::createStreamingAttrs(raw_ostream &OS, ACLEKind Kind) { - std::vector RV = Records.getAllDerivedDefinitions("Inst"); + std::vector RV = Records.getAllDerivedDefinitions("Inst"); SmallVector, 128> Defs; for (auto *R : RV) createIntrinsic(R, Defs); @@ -1826,55 +1826,55 @@ void SVEEmitter::createStreamingAttrs(raw_ostream &OS, ACLEKind Kind) { } namespace clang { -void EmitSveHeader(RecordKeeper &Records, raw_ostream &OS) { +void EmitSveHeader(const RecordKeeper &Records, raw_ostream &OS) { SVEEmitter(Records).createHeader(OS); } -void EmitSveBuiltins(RecordKeeper &Records, raw_ostream &OS) { +void EmitSveBuiltins(const RecordKeeper &Records, raw_ostream &OS) { SVEEmitter(Records).createBuiltins(OS); } -void EmitSveBuiltinCG(RecordKeeper &Records, raw_ostream &OS) { +void EmitSveBuiltinCG(const RecordKeeper &Records, raw_ostream &OS) { SVEEmitter(Records).createCodeGenMap(OS); } -void EmitSveRangeChecks(RecordKeeper &Records, raw_ostream &OS) { +void EmitSveRangeChecks(const RecordKeeper &Records, raw_ostream &OS) { SVEEmitter(Records).createRangeChecks(OS); } -void EmitSveTypeFlags(RecordKeeper &Records, raw_ostream &OS) { +void EmitSveTypeFlags(const RecordKeeper &Records, raw_ostream &OS) { SVEEmitter(Records).createTypeFlags(OS); } -void EmitImmCheckTypes(RecordKeeper &Records, raw_ostream &OS) { +void EmitImmCheckTypes(const RecordKeeper &Records, raw_ostream &OS) { SVEEmitter(Records).createImmCheckTypes(OS); } -void EmitSveStreamingAttrs(RecordKeeper &Records, raw_ostream &OS) { +void EmitSveStreamingAttrs(const RecordKeeper &Records, raw_ostream &OS) { SVEEmitter(Records).createStreamingAttrs(OS, ACLEKind::SVE); } -void EmitSmeHeader(RecordKeeper &Records, raw_ostream &OS) { +void EmitSmeHeader(const RecordKeeper &Records, raw_ostream &OS) { SVEEmitter(Records).createSMEHeader(OS); } -void EmitSmeBuiltins(RecordKeeper &Records, raw_ostream &OS) { +void EmitSmeBuiltins(const RecordKeeper &Records, raw_ostream &OS) { SVEEmitter(Records).createSMEBuiltins(OS); } -void EmitSmeBuiltinCG(RecordKeeper &Records, raw_ostream &OS) { +void EmitSmeBuiltinCG(const RecordKeeper &Records, raw_ostream &OS) { SVEEmitter(Records).createSMECodeGenMap(OS); } -void EmitSmeRangeChecks(RecordKeeper &Records, raw_ostream &OS) { +void EmitSmeRangeChecks(const RecordKeeper &Records, raw_ostream &OS) { SVEEmitter(Records).createSMERangeChecks(OS); } -void EmitSmeStreamingAttrs(RecordKeeper &Records, raw_ostream &OS) { +void EmitSmeStreamingAttrs(const RecordKeeper &Records, raw_ostream &OS) { SVEEmitter(Records).createStreamingAttrs(OS, ACLEKind::SME); } -void EmitSmeBuiltinZAState(RecordKeeper &Records, raw_ostream &OS) { +void EmitSmeBuiltinZAState(const RecordKeeper &Records, raw_ostream &OS) { SVEEmitter(Records).createBuiltinZAState(OS); } } // End namespace clang diff --git a/clang/utils/TableGen/TableGenBackends.h b/clang/utils/TableGen/TableGenBackends.h index 01d16d2dc3e5f..f7527ac535a87 100644 --- a/clang/utils/TableGen/TableGenBackends.h +++ b/clang/utils/TableGen/TableGenBackends.h @@ -39,7 +39,8 @@ void EmitClangBasicReader(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitClangBasicWriter(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitClangTypeNodes(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); +void EmitClangTypeNodes(const llvm::RecordKeeper &Records, + llvm::raw_ostream &OS); void EmitClangTypeReader(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitClangTypeWriter(const llvm::RecordKeeper &Records, @@ -93,7 +94,8 @@ void EmitClangDiagGroups(const llvm::RecordKeeper &Records, void EmitClangDiagsIndexName(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitClangSACheckers(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); +void EmitClangSACheckers(const llvm::RecordKeeper &Records, + llvm::raw_ostream &OS); void EmitClangCommentHTMLTags(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); @@ -108,49 +110,62 @@ void EmitClangCommentCommandList(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitClangOpcodes(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitClangSyntaxNodeList(llvm::RecordKeeper &Records, +void EmitClangSyntaxNodeList(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitClangSyntaxNodeClasses(llvm::RecordKeeper &Records, +void EmitClangSyntaxNodeClasses(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitNeon(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitFP16(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitBF16(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitNeonSema(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitVectorTypes(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitNeonTest(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); - -void EmitImmCheckTypes(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitSveHeader(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitSveBuiltins(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitSveBuiltinCG(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitSveTypeFlags(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitSveRangeChecks(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitSveStreamingAttrs(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); - -void EmitSmeHeader(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitSmeBuiltins(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitSmeBuiltinCG(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitSmeRangeChecks(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitSmeStreamingAttrs(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitSmeBuiltinZAState(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); - -void EmitMveHeader(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitMveBuiltinDef(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitMveBuiltinSema(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitMveBuiltinCG(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitMveBuiltinAliases(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); - -void EmitRVVHeader(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitRVVBuiltins(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitRVVBuiltinCG(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitRVVBuiltinSema(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); - -void EmitCdeHeader(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitCdeBuiltinDef(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitCdeBuiltinSema(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitCdeBuiltinCG(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitCdeBuiltinAliases(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); +void EmitNeon(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); +void EmitFP16(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); +void EmitBF16(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); +void EmitNeonSema(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); +void EmitVectorTypes(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); +void EmitNeonTest(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); + +void EmitImmCheckTypes(const llvm::RecordKeeper &Records, + llvm::raw_ostream &OS); +void EmitSveHeader(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); +void EmitSveBuiltins(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); +void EmitSveBuiltinCG(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); +void EmitSveTypeFlags(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); +void EmitSveRangeChecks(const llvm::RecordKeeper &Records, + llvm::raw_ostream &OS); +void EmitSveStreamingAttrs(const llvm::RecordKeeper &Records, + llvm::raw_ostream &OS); + +void EmitSmeHeader(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); +void EmitSmeBuiltins(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); +void EmitSmeBuiltinCG(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); +void EmitSmeRangeChecks(const llvm::RecordKeeper &Records, + llvm::raw_ostream &OS); +void EmitSmeStreamingAttrs(const llvm::RecordKeeper &Records, + llvm::raw_ostream &OS); +void EmitSmeBuiltinZAState(const llvm::RecordKeeper &Records, + llvm::raw_ostream &OS); + +void EmitMveHeader(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); +void EmitMveBuiltinDef(const llvm::RecordKeeper &Records, + llvm::raw_ostream &OS); +void EmitMveBuiltinSema(const llvm::RecordKeeper &Records, + llvm::raw_ostream &OS); +void EmitMveBuiltinCG(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); +void EmitMveBuiltinAliases(const llvm::RecordKeeper &Records, + llvm::raw_ostream &OS); + +void EmitRVVHeader(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); +void EmitRVVBuiltins(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); +void EmitRVVBuiltinCG(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); +void EmitRVVBuiltinSema(const llvm::RecordKeeper &Records, + llvm::raw_ostream &OS); + +void EmitCdeHeader(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); +void EmitCdeBuiltinDef(const llvm::RecordKeeper &Records, + llvm::raw_ostream &OS); +void EmitCdeBuiltinSema(const llvm::RecordKeeper &Records, + llvm::raw_ostream &OS); +void EmitCdeBuiltinCG(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); +void EmitCdeBuiltinAliases(const llvm::RecordKeeper &Records, + llvm::raw_ostream &OS); void EmitClangAttrDocs(const llvm::RecordKeeper &Records, llvm::raw_ostream &OS); diff --git a/clang/utils/UpdateVerifyTests/core.py b/clang/utils/UpdateVerifyTests/core.py new file mode 100644 index 0000000000000..d1350cdbb698b --- /dev/null +++ b/clang/utils/UpdateVerifyTests/core.py @@ -0,0 +1,452 @@ +import sys +import re + +DEBUG = False + + +def dprint(*args): + if DEBUG: + print(*args, file=sys.stderr) + + +class KnownException(Exception): + pass + + +def parse_error_category(s, prefix): + if "no expected directives found" in s: + return None + parts = s.split("diagnostics") + diag_category = parts[0] + category_parts = parts[0].strip().strip("'").split("-") + expected = category_parts[0] + if expected != prefix: + raise Exception( + f"expected prefix '{prefix}', but found '{expected}'. Multiple verify prefixes are not supported." + ) + diag_category = category_parts[1] + if "seen but not expected" in parts[1]: + seen = True + elif "expected but not seen" in parts[1]: + seen = False + else: + raise KnownException(f"unexpected category '{parts[1]}'") + return (diag_category, seen) + + +diag_error_re = re.compile(r"File (\S+) Line (\d+): (.+)") +diag_error_re2 = re.compile(r"File \S+ Line \d+ \(directive at (\S+):(\d+)\): (.+)") + + +def parse_diag_error(s): + m = diag_error_re2.match(s) + if not m: + m = diag_error_re.match(s) + if not m: + return None + return (m.group(1), int(m.group(2)), m.group(3)) + + +class Line: + def __init__(self, content, line_n): + self.content = content + self.diag = None + self.line_n = line_n + self.targeting_diags = [] + + def update_line_n(self, n): + self.line_n = n + + def render(self): + if not self.diag: + return self.content + assert "{{DIAG}}" in self.content + res = self.content.replace("{{DIAG}}", self.diag.render()) + if not res.strip(): + return "" + return res + + +class Diag: + def __init__( + self, + prefix, + diag_content, + category, + parsed_target_line_n, + line_is_absolute, + count, + line, + is_re, + whitespace_strings, + is_from_source_file, + ): + self.prefix = prefix + self.diag_content = diag_content + self.category = category + self.parsed_target_line_n = parsed_target_line_n + self.line_is_absolute = line_is_absolute + self.count = count + self.line = line + self.target = None + self.is_re = is_re + self.absolute_target() + self.whitespace_strings = whitespace_strings + self.is_from_source_file = is_from_source_file + + def decrement_count(self): + self.count -= 1 + assert self.count >= 0 + + def increment_count(self): + assert self.count >= 0 + self.count += 1 + + def unset_target(self): + assert self.target is not None + self.target.targeting_diags.remove(self) + self.target = None + + def set_target(self, target): + if self.target: + self.unset_target() + self.target = target + self.target.targeting_diags.append(self) + + def absolute_target(self): + if self.target: + return self.target.line_n + if self.line_is_absolute: + return self.parsed_target_line_n + return self.line.line_n + self.parsed_target_line_n + + def relative_target(self): + return self.absolute_target() - self.line.line_n + + def take(self, other_diag): + assert self.count == 0 + assert other_diag.count > 0 + assert other_diag.target == self.target + assert not other_diag.line_is_absolute + assert not other_diag.is_re and not self.is_re + self.line_is_absolute = False + self.diag_content = other_diag.diag_content + self.count = other_diag.count + self.category = other_diag.category + self.count = other_diag.count + other_diag.count = 0 + + def render(self): + assert self.count >= 0 + if self.count == 0: + return "" + line_location_s = "" + if self.relative_target() != 0: + if self.line_is_absolute: + line_location_s = f"@{self.absolute_target()}" + elif self.relative_target() > 0: + line_location_s = f"@+{self.relative_target()}" + else: + line_location_s = ( + f"@{self.relative_target()}" # the minus sign is implicit + ) + count_s = "" if self.count == 1 else f"{self.count}" + re_s = "-re" if self.is_re else "" + if self.whitespace_strings: + whitespace1_s = self.whitespace_strings[0] + whitespace2_s = self.whitespace_strings[1] + whitespace3_s = self.whitespace_strings[2] + else: + whitespace1_s = " " + whitespace2_s = "" + whitespace3_s = "" + if count_s and not whitespace2_s: + whitespace2_s = " " # required to parse correctly + elif not count_s and whitespace2_s == " ": + """Don't emit a weird extra space. + However if the whitespace is something other than the + standard single space, let it be to avoid disrupting manual formatting. + The existence of a non-empty whitespace2_s implies this was parsed with + a count > 1 and then decremented, otherwise this whitespace would have + been parsed as whitespace3_s. + """ + whitespace2_s = "" + return f"//{whitespace1_s}{self.prefix}-{self.category}{re_s}{line_location_s}{whitespace2_s}{count_s}{whitespace3_s}{{{{{self.diag_content}}}}}" + + +expected_diag_re = re.compile( + r"//(\s*)([a-zA-Z]+)-(note|warning|error)(-re)?(@[+-]?\d+)?(?:(\s*)(\d+))?(\s*)\{\{(.*)\}\}" +) + + +def parse_diag(line, filename, lines, prefix): + s = line.content + ms = expected_diag_re.findall(s) + if not ms: + return None + if len(ms) > 1: + raise KnownException( + f"multiple diags on line {filename}:{line.line_n}. Aborting due to missing implementation." + ) + [ + whitespace1_s, + check_prefix, + category_s, + re_s, + target_line_s, + whitespace2_s, + count_s, + whitespace3_s, + diag_s, + ] = ms[0] + if check_prefix != prefix: + return None + if not target_line_s: + target_line_n = 0 + is_absolute = False + elif target_line_s.startswith("@+"): + target_line_n = int(target_line_s[2:]) + is_absolute = False + elif target_line_s.startswith("@-"): + target_line_n = int(target_line_s[1:]) + is_absolute = False + else: + target_line_n = int(target_line_s[1:]) + is_absolute = True + count = int(count_s) if count_s else 1 + line.content = expected_diag_re.sub("{{DIAG}}", s) + + return Diag( + prefix, + diag_s, + category_s, + target_line_n, + is_absolute, + count, + line, + bool(re_s), + [whitespace1_s, whitespace2_s, whitespace3_s], + True, + ) + + +def add_line(new_line, lines): + lines.insert(new_line.line_n - 1, new_line) + for i in range(new_line.line_n, len(lines)): + line = lines[i] + assert line.line_n == i + line.update_line_n(i + 1) + assert all(line.line_n == i + 1 for i, line in enumerate(lines)) + + +def remove_line(old_line, lines): + lines.remove(old_line) + for i in range(old_line.line_n - 1, len(lines)): + line = lines[i] + assert line.line_n == i + 2 + line.update_line_n(i + 1) + assert all(line.line_n == i + 1 for i, line in enumerate(lines)) + + +indent_re = re.compile(r"\s*") + + +def get_indent(s): + return indent_re.match(s).group(0) + + +def orig_line_n_to_new_line_n(line_n, orig_lines): + return orig_lines[line_n - 1].line_n + + +def add_diag(orig_line_n, diag_s, diag_category, lines, orig_lines, prefix): + line_n = orig_line_n_to_new_line_n(orig_line_n, orig_lines) + target = lines[line_n - 1] + for other in target.targeting_diags: + if other.is_re: + raise KnownException( + "mismatching diag on line with regex matcher. Skipping due to missing implementation" + ) + reverse = ( + True + if [other for other in target.targeting_diags if other.relative_target() < 0] + else False + ) + + targeting = [ + other for other in target.targeting_diags if not other.line_is_absolute + ] + targeting.sort(reverse=reverse, key=lambda d: d.relative_target()) + prev_offset = 0 + prev_line = target + direction = -1 if reverse else 1 + for d in targeting: + if d.relative_target() != prev_offset + direction: + break + prev_offset = d.relative_target() + prev_line = d.line + total_offset = prev_offset - 1 if reverse else prev_offset + 1 + if reverse: + new_line_n = prev_line.line_n + 1 + else: + new_line_n = prev_line.line_n + assert new_line_n == line_n + (not reverse) - total_offset + + new_line = Line(get_indent(prev_line.content) + "{{DIAG}}\n", new_line_n) + add_line(new_line, lines) + + whitespace_strings = prev_line.diag.whitespace_strings if prev_line.diag else None + new_diag = Diag( + prefix, + diag_s, + diag_category, + total_offset, + False, + 1, + new_line, + False, + whitespace_strings, + False, + ) + new_line.diag = new_diag + new_diag.set_target(target) + + +def remove_dead_diags(lines): + for line in lines: + if not line.diag or line.diag.count != 0: + continue + if line.render() == "": + remove_line(line, lines) + else: + assert line.diag.is_from_source_file + for other_diag in line.targeting_diags: + if ( + other_diag.is_from_source_file + or other_diag.count == 0 + or other_diag.category != line.diag.category + ): + continue + if other_diag.is_re or line.diag.is_re: + continue + line.diag.take(other_diag) + remove_line(other_diag.line, lines) + + +def has_live_diags(lines): + for line in lines: + if line.diag and line.diag.count > 0: + return True + return False + + +def get_expected_no_diags_line_n(lines, prefix): + for line in lines: + if f"{prefix}-no-diagnostics" in line.content: + return line.line_n + return None + + +def update_test_file(filename, diag_errors, prefix, updated_test_files): + dprint(f"updating test file {filename}") + if filename in updated_test_files: + raise KnownException(f"{filename} already updated, but got new output") + else: + updated_test_files.add(filename) + with open(filename, "r") as f: + lines = [Line(line, i + 1) for i, line in enumerate(f.readlines())] + orig_lines = list(lines) + expected_no_diags_line_n = get_expected_no_diags_line_n(orig_lines, prefix) + + for line in lines: + diag = parse_diag(line, filename, lines, prefix) + if diag: + line.diag = diag + diag.set_target(lines[diag.absolute_target() - 1]) + + for line_n, diag_s, diag_category, seen in diag_errors: + if seen: + continue + # this is a diagnostic expected but not seen + assert lines[line_n - 1].diag + if diag_s != lines[line_n - 1].diag.diag_content: + raise KnownException( + f"{filename}:{line_n} - found diag {lines[line_n - 1].diag.diag_content} but expected {diag_s}" + ) + if diag_category != lines[line_n - 1].diag.category: + raise KnownException( + f"{filename}:{line_n} - found {lines[line_n - 1].diag.category} diag but expected {diag_category}" + ) + lines[line_n - 1].diag.decrement_count() + diag_errors_left = [] + diag_errors.sort(reverse=True, key=lambda t: t[0]) + for line_n, diag_s, diag_category, seen in diag_errors: + if not seen: + continue + target = orig_lines[line_n - 1] + other_diags = [ + d + for d in target.targeting_diags + if d.diag_content == diag_s and d.category == diag_category + ] + other_diag = other_diags[0] if other_diags else None + if other_diag: + other_diag.increment_count() + else: + add_diag(line_n, diag_s, diag_category, lines, orig_lines, prefix) + remove_dead_diags(lines) + has_diags = has_live_diags(lines) + with open(filename, "w") as f: + if not has_diags and expected_no_diags_line_n is None: + f.write("// expected-no-diagnostics\n") + for line in lines: + if has_diags and line.line_n == expected_no_diags_line_n: + continue + f.write(line.render()) + + +def update_test_files(errors, prefix): + errors_by_file = {} + for (filename, line, diag_s), (diag_category, seen) in errors: + if filename not in errors_by_file: + errors_by_file[filename] = [] + errors_by_file[filename].append((line, diag_s, diag_category, seen)) + updated_test_files = set() + for filename, diag_errors in errors_by_file.items(): + try: + update_test_file(filename, diag_errors, prefix, updated_test_files) + except KnownException as e: + return f"Error in update-verify-tests while updating {filename}: {e}" + updated_files = list(updated_test_files) + assert updated_files + if len(updated_files) == 1: + return f"updated file {updated_files[0]}" + updated_files_s = "\n\t".join(updated_files) + return "updated files:\n\t{updated_files_s}" + + +def check_expectations(tool_output, prefix): + """ + The entry point function. + Called by the stand-alone update-verify-tests.py as well as litplugin.py. + """ + curr = [] + curr_category = None + try: + for line in tool_output: + if line.startswith("error: "): + curr_category = parse_error_category(line[len("error: ") :], prefix) + continue + + diag_error = parse_diag_error(line.strip()) + if diag_error: + curr.append((diag_error, curr_category)) + else: + dprint("no match") + dprint(line.strip()) + except KnownException as e: + return f"Error in update-verify-tests while parsing tool output: {e}" + if curr: + return update_test_files(curr, prefix) + else: + return "no mismatching diagnostics found" diff --git a/clang/utils/update-verify-tests.py b/clang/utils/update-verify-tests.py new file mode 100644 index 0000000000000..e2874a8c049ef --- /dev/null +++ b/clang/utils/update-verify-tests.py @@ -0,0 +1,38 @@ +import sys +import argparse +from UpdateVerifyTests.core import check_expectations + +""" + Pipe output from clang's -verify into this script to have the test case updated to expect the actual diagnostic output. + When inserting new expected-* checks it will place them on the line before the location of the diagnostic, with an @+1, + or @+N for some N if there are multiple diagnostics emitted on the same line. If the current checks are using @-N for + this line, the new check will follow that convention also. + Existing checks will be left untouched as much as possible, including their location and whitespace content, to minimize + diffs. If inaccurate their count will be updated, or the check removed entirely. + + Missing features: + - multiple prefixes on the same line (-verify=my-prefix,my-other-prefix) + - multiple prefixes on separate RUN lines (RUN: -verify=my-prefix\nRUN: -verify my-other-prefix) + - regexes with expected-*-re: existing ones will be left untouched if accurate, but the script will abort if there are any + diagnostic mismatches on the same line. + - multiple checks targeting the same line are supported, but a line may only contain one check + - if multiple checks targeting the same line are failing the script is not guaranteed to produce a minimal diff + +Example usage: + clang -verify [file] | python3 update-verify-tests.py + clang -verify=check [file] | python3 update-verify-tests.py --prefix check +""" + + +def main(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--prefix", default="expected", help="The prefix passed to -verify" + ) + args = parser.parse_args() + output = check_expectations(sys.stdin.readlines(), args.prefix) + print(output) + + +if __name__ == "__main__": + main() diff --git a/compiler-rt/lib/builtins/cpu_model/x86.c b/compiler-rt/lib/builtins/cpu_model/x86.c index 069defc970190..dbe6094541f63 100644 --- a/compiler-rt/lib/builtins/cpu_model/x86.c +++ b/compiler-rt/lib/builtins/cpu_model/x86.c @@ -63,6 +63,7 @@ enum ProcessorTypes { INTEL_SIERRAFOREST, INTEL_GRANDRIDGE, INTEL_CLEARWATERFOREST, + AMDFAM1AH, CPU_TYPE_MAX }; @@ -101,6 +102,7 @@ enum ProcessorSubtypes { INTEL_COREI7_ARROWLAKE, INTEL_COREI7_ARROWLAKE_S, INTEL_COREI7_PANTHERLAKE, + AMDFAM1AH_ZNVER5, CPU_SUBTYPE_MAX }; @@ -748,6 +750,24 @@ static const char *getAMDProcessorTypeAndSubtype(unsigned Family, break; // "znver4" } break; // family 19h + case 26: + CPU = "znver5"; + *Type = AMDFAM1AH; + if (Model <= 0x77) { + // Models 00h-0Fh (Breithorn). + // Models 10h-1Fh (Breithorn-Dense). + // Models 20h-2Fh (Strix 1). + // Models 30h-37h (Strix 2). + // Models 38h-3Fh (Strix 3). + // Models 40h-4Fh (Granite Ridge). + // Models 50h-5Fh (Weisshorn). + // Models 60h-6Fh (Krackan1). + // Models 70h-77h (Sarlak). + CPU = "znver5"; + *Subtype = AMDFAM1AH_ZNVER5; + break; // "znver5" + } + break; default: break; // Unknown AMD CPU. } diff --git a/libc/src/__support/OSUtil/linux/vdso_sym.h b/libc/src/__support/OSUtil/linux/vdso_sym.h index eb5f204a82f30..968e1536c4d27 100644 --- a/libc/src/__support/OSUtil/linux/vdso_sym.h +++ b/libc/src/__support/OSUtil/linux/vdso_sym.h @@ -44,8 +44,8 @@ template LIBC_INLINE constexpr auto dispatcher() { else if constexpr (sym == VDSOSym::ClockGetTime64) return static_cast(nullptr); else if constexpr (sym == VDSOSym::GetTimeOfDay) - return static_cast( - nullptr); + return static_cast(nullptr); else if constexpr (sym == VDSOSym::GetCpu) return static_cast( nullptr); diff --git a/libc/src/__support/time/linux/CMakeLists.txt b/libc/src/__support/time/linux/CMakeLists.txt index 1b41c7cb0a98a..f038cb8854b9b 100644 --- a/libc/src/__support/time/linux/CMakeLists.txt +++ b/libc/src/__support/time/linux/CMakeLists.txt @@ -9,6 +9,7 @@ add_header_library( libc.src.__support.common libc.src.__support.error_or libc.src.__support.OSUtil.osutil + libc.src.__support.OSUtil.linux.vdso ) add_header_library( diff --git a/libc/src/__support/time/linux/clock_gettime.h b/libc/src/__support/time/linux/clock_gettime.h index eca1ba70de592..517cca91391a7 100644 --- a/libc/src/__support/time/linux/clock_gettime.h +++ b/libc/src/__support/time/linux/clock_gettime.h @@ -11,26 +11,47 @@ #include "hdr/types/clockid_t.h" #include "hdr/types/struct_timespec.h" +#include "src/__support/OSUtil/linux/vdso.h" #include "src/__support/OSUtil/syscall.h" #include "src/__support/common.h" #include "src/__support/error_or.h" #include "src/__support/macros/config.h" #include +#if defined(SYS_clock_gettime64) +#include +#endif + namespace LIBC_NAMESPACE_DECL { namespace internal { LIBC_INLINE ErrorOr clock_gettime(clockid_t clockid, timespec *ts) { -#if SYS_clock_gettime - int ret = LIBC_NAMESPACE::syscall_impl(SYS_clock_gettime, - static_cast(clockid), - reinterpret_cast(ts)); + using namespace vdso; + int ret; +#if defined(SYS_clock_gettime) + TypedSymbol clock_gettime; + if (LIBC_LIKELY(clock_gettime != nullptr)) + ret = clock_gettime(clockid, ts); + else + ret = LIBC_NAMESPACE::syscall_impl(SYS_clock_gettime, + static_cast(clockid), + reinterpret_cast(ts)); #elif defined(SYS_clock_gettime64) static_assert( sizeof(time_t) == sizeof(int64_t), "SYS_clock_gettime64 requires struct timespec with 64-bit members."); - int ret = LIBC_NAMESPACE::syscall_impl(SYS_clock_gettime64, - static_cast(clockid), - reinterpret_cast(ts)); + + TypedSymbol clock_gettime64; + __kernel_timespec ts64{}; + if (LIBC_LIKELY(clock_gettime64 != nullptr)) + ret = clock_gettime64(clockid, &ts64); + else + ret = LIBC_NAMESPACE::syscall_impl(SYS_clock_gettime64, + static_cast(clockid), + reinterpret_cast(&ts64)); + if (ret == 0) { + ts->tv_sec = static_casttv_sec)>(ts64.tv_sec); + ts->tv_nsec = static_casttv_nsec)>(ts64.tv_nsec); + } #else #error "SYS_clock_gettime and SYS_clock_gettime64 syscalls not available." #endif diff --git a/libcxx/docs/Status/Cxx20Papers.csv b/libcxx/docs/Status/Cxx20Papers.csv index b3c26933a9c2a..d449c9d39c3b3 100644 --- a/libcxx/docs/Status/Cxx20Papers.csv +++ b/libcxx/docs/Status/Cxx20Papers.csv @@ -152,7 +152,7 @@ "`P1855R0 `__","Make ````\ freestanding","2019-11 (Belfast)","","","" "`P1862R1 `__","Ranges adaptors for non-copyable iterators","2019-11 (Belfast)","|Complete|","16.0","" "`P1865R1 `__","Add max() to latch and barrier","2019-11 (Belfast)","|Complete|","11.0","" -"`P1869R1 `__","Rename 'condition_variable_any' interruptible wait methods","2019-11 (Belfast)","","","" +"`P1869R1 `__","Rename 'condition_variable_any' interruptible wait methods","2019-11 (Belfast)","|Complete|","18.0","" "`P1870R1 `__","forwarding-range is too subtle","2019-11 (Belfast)","|Complete|","15.0","" "`P1871R1 `__","Concept traits should be named after concepts","2019-11 (Belfast)","|Complete|","14.0","" "`P1872R0 `__","span should have size_type, not index_type","2019-11 (Belfast)","|Complete|","10.0","" diff --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp index 0f33885f7df37..ee54fa39fc3d6 100644 --- a/lld/COFF/Chunks.cpp +++ b/lld/COFF/Chunks.cpp @@ -774,6 +774,10 @@ void StringChunk::writeTo(uint8_t *buf) const { buf[str.size()] = '\0'; } +ImportThunkChunk::ImportThunkChunk(COFFLinkerContext &ctx, Defined *s) + : NonSectionCodeChunk(ImportThunkKind), live(!ctx.config.doGC), + impSymbol(s), ctx(ctx) {} + ImportThunkChunkX64::ImportThunkChunkX64(COFFLinkerContext &ctx, Defined *s) : ImportThunkChunk(ctx, s) { // Intel Optimization Manual says that all branch targets diff --git a/lld/COFF/Chunks.h b/lld/COFF/Chunks.h index 040a249aabf59..24d7c37de7f3b 100644 --- a/lld/COFF/Chunks.h +++ b/lld/COFF/Chunks.h @@ -557,10 +557,13 @@ static const uint8_t importThunkARM64EC[] = { // contents will be a JMP instruction to some __imp_ symbol. class ImportThunkChunk : public NonSectionCodeChunk { public: - ImportThunkChunk(COFFLinkerContext &ctx, Defined *s) - : NonSectionCodeChunk(ImportThunkKind), impSymbol(s), ctx(ctx) {} + ImportThunkChunk(COFFLinkerContext &ctx, Defined *s); static bool classof(const Chunk *c) { return c->kind() == ImportThunkKind; } + // We track the usage of the thunk symbol separately from the import file + // to avoid generating unnecessary thunks. + bool live; + protected: Defined *impSymbol; COFFLinkerContext &ctx; @@ -598,13 +601,17 @@ class ImportThunkChunkARM : public ImportThunkChunk { class ImportThunkChunkARM64 : public ImportThunkChunk { public: - explicit ImportThunkChunkARM64(COFFLinkerContext &ctx, Defined *s) - : ImportThunkChunk(ctx, s) { + explicit ImportThunkChunkARM64(COFFLinkerContext &ctx, Defined *s, + MachineTypes machine) + : ImportThunkChunk(ctx, s), machine(machine) { setAlignment(4); } size_t getSize() const override { return sizeof(importThunkARM64); } void writeTo(uint8_t *buf) const override; - MachineTypes getMachine() const override { return ARM64; } + MachineTypes getMachine() const override { return machine; } + +private: + MachineTypes machine; }; // ARM64EC __impchk_* thunk implementation. diff --git a/lld/COFF/InputFiles.cpp b/lld/COFF/InputFiles.cpp index 569220468e96a..94ad7f3ceb306 100644 --- a/lld/COFF/InputFiles.cpp +++ b/lld/COFF/InputFiles.cpp @@ -1002,7 +1002,7 @@ void ObjFile::enqueuePdbFile(StringRef path, ObjFile *fromFile) { } ImportFile::ImportFile(COFFLinkerContext &ctx, MemoryBufferRef m) - : InputFile(ctx, ImportKind, m), live(!ctx.config.doGC), thunkLive(live) {} + : InputFile(ctx, ImportKind, m), live(!ctx.config.doGC) {} MachineTypes ImportFile::getMachineType() const { uint16_t machine = @@ -1018,7 +1018,7 @@ ImportThunkChunk *ImportFile::makeImportThunk() { case I386: return make(ctx, impSym); case ARM64: - return make(ctx, impSym); + return make(ctx, impSym, ARM64); case ARMNT: return make(ctx, impSym); } @@ -1109,7 +1109,14 @@ void ImportFile::parse() { } else { thunkSym = ctx.symtab.addImportThunk( name, impSym, make(ctx, impSym)); - // FIXME: Add aux IAT symbols. + + if (std::optional mangledName = + getArm64ECMangledFunctionName(name)) { + StringRef auxThunkName = saver().save(*mangledName); + auxThunkSym = ctx.symtab.addImportThunk( + auxThunkName, impECSym, + make(ctx, impECSym, ARM64EC)); + } StringRef impChkName = saver().save("__impchk_" + name); impchkThunk = make(this); diff --git a/lld/COFF/InputFiles.h b/lld/COFF/InputFiles.h index 8140a031f7116..acf221d85ae8f 100644 --- a/lld/COFF/InputFiles.h +++ b/lld/COFF/InputFiles.h @@ -365,17 +365,15 @@ class ImportFile : public InputFile { // Auxiliary IAT symbol and chunk on ARM64EC. DefinedImportData *impECSym = nullptr; Chunk *auxLocation = nullptr; + Symbol *auxThunkSym = nullptr; // We want to eliminate dllimported symbols if no one actually refers to them. // These "Live" bits are used to keep track of which import library members // are actually in use. // // If the Live bit is turned off by MarkLive, Writer will ignore dllimported - // symbols provided by this import library member. We also track whether the - // imported symbol is used separately from whether the thunk is used in order - // to avoid creating unnecessary thunks. + // symbols provided by this import library member. bool live; - bool thunkLive; }; // Used for LTO. diff --git a/lld/COFF/MapFile.cpp b/lld/COFF/MapFile.cpp index ed521dd375ed0..52e9ce996f239 100644 --- a/lld/COFF/MapFile.cpp +++ b/lld/COFF/MapFile.cpp @@ -125,7 +125,7 @@ static void getSymbols(const COFFLinkerContext &ctx, if (!file->thunkSym) continue; - if (!file->thunkLive) + if (!file->thunkSym->isLive()) continue; if (auto *thunkSym = dyn_cast(file->thunkSym)) diff --git a/lld/COFF/MarkLive.cpp b/lld/COFF/MarkLive.cpp index 8af58780e1358..3c09baa73a9f7 100644 --- a/lld/COFF/MarkLive.cpp +++ b/lld/COFF/MarkLive.cpp @@ -58,7 +58,7 @@ void markLive(COFFLinkerContext &ctx) { addImportFile(sym->file); } else if (auto *sym = dyn_cast(b)) { addImportFile(sym->wrappedSym->file); - sym->wrappedSym->file->thunkLive = true; + sym->getChunk()->live = true; } }; diff --git a/lld/COFF/PDB.cpp b/lld/COFF/PDB.cpp index c0739b37aeb0f..9b035f53ef49c 100644 --- a/lld/COFF/PDB.cpp +++ b/lld/COFF/PDB.cpp @@ -1527,8 +1527,8 @@ void PDBLinker::addImportFilesToPDB() { if (!file->thunkSym) continue; - if (!file->thunkLive) - continue; + if (!file->thunkSym->isLive()) + continue; std::string dll = StringRef(file->dllName).lower(); llvm::pdb::DbiModuleDescriptorBuilder *&mod = dllToModuleDbi[dll]; diff --git a/lld/COFF/Symbols.cpp b/lld/COFF/Symbols.cpp index 5f4d797f74a2d..567c2b93776c9 100644 --- a/lld/COFF/Symbols.cpp +++ b/lld/COFF/Symbols.cpp @@ -84,7 +84,7 @@ bool Symbol::isLive() const { if (auto *imp = dyn_cast(this)) return imp->file->live; if (auto *imp = dyn_cast(this)) - return imp->wrappedSym->file->thunkLive; + return imp->getChunk()->live; // Assume any other kind of symbol is live. return true; } diff --git a/lld/COFF/Symbols.h b/lld/COFF/Symbols.h index 2df60a01ec813..9b21e09bf83a4 100644 --- a/lld/COFF/Symbols.h +++ b/lld/COFF/Symbols.h @@ -395,12 +395,12 @@ class DefinedImportThunk : public Defined { } uint64_t getRVA() { return data->getRVA(); } - Chunk *getChunk() { return data; } + ImportThunkChunk *getChunk() const { return data; } DefinedImportData *wrappedSym; private: - Chunk *data; + ImportThunkChunk *data; }; // If you have a symbol "foo" in your object file, a symbol name diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp index 9a8040008e73c..216db652c10aa 100644 --- a/lld/COFF/Writer.cpp +++ b/lld/COFF/Writer.cpp @@ -1252,14 +1252,22 @@ void Writer::appendImportThunks() { if (!file->live) continue; - if (!file->thunkSym) - continue; + if (file->thunkSym) { + if (!isa(file->thunkSym)) + fatal(toString(ctx, *file->thunkSym) + " was replaced"); + auto *chunk = cast(file->thunkSym)->getChunk(); + if (chunk->live) + textSec->addChunk(chunk); + } + + if (file->auxThunkSym) { + if (!isa(file->auxThunkSym)) + fatal(toString(ctx, *file->auxThunkSym) + " was replaced"); + auto *chunk = cast(file->auxThunkSym)->getChunk(); + if (chunk->live) + textSec->addChunk(chunk); + } - if (!isa(file->thunkSym)) - fatal(toString(ctx, *file->thunkSym) + " was replaced"); - DefinedImportThunk *thunk = cast(file->thunkSym); - if (file->thunkLive) - textSec->addChunk(thunk->getChunk()); if (file->impchkThunk) textSec->addChunk(file->impchkThunk); } diff --git a/lld/test/COFF/arm64ec-import.test b/lld/test/COFF/arm64ec-import.test index f8279cefc3bcf..e403daa41f368 100644 --- a/lld/test/COFF/arm64ec-import.test +++ b/lld/test/COFF/arm64ec-import.test @@ -39,25 +39,31 @@ RUN: llvm-objdump -d out2.dll | FileCheck --check-prefix=DISASM %s DISASM: 180001000: 52800000 mov w0, #0x0 // =0 DISASM-NEXT: 180001004: d65f03c0 ret -DISASM-NEXT: 180001008: d000000b adrp x11, 0x180003000 -DISASM-NEXT: 18000100c: f940056b ldr x11, [x11, #0x8] -DISASM-NEXT: 180001010: 9000000a adrp x10, 0x180001000 <.text> -DISASM-NEXT: 180001014: 9101114a add x10, x10, #0x44 -DISASM-NEXT: 180001018: 17fffffa b 0x180001000 <.text> -DISASM-NEXT: 18000101c: d000000b adrp x11, 0x180003000 -DISASM-NEXT: 180001020: f940096b ldr x11, [x11, #0x10] -DISASM-NEXT: 180001024: f0ffffea adrp x10, 0x180000000 -DISASM-NEXT: 180001028: 9100014a add x10, x10, #0x0 -DISASM-NEXT: 18000102c: 17fffff5 b 0x180001000 <.text> -DISASM-NEXT: 180001030: d000000b adrp x11, 0x180003000 -DISASM-NEXT: 180001034: f940116b ldr x11, [x11, #0x20] -DISASM-NEXT: 180001038: 9000000a adrp x10, 0x180001000 <.text> -DISASM-NEXT: 18000103c: 9101314a add x10, x10, #0x4c -DISASM-NEXT: 180001040: 17fffff0 b 0x180001000 <.text> -DISASM-NEXT: 180001044: 52800020 mov w0, #0x1 // =1 -DISASM-NEXT: 180001048: d65f03c0 ret -DISASM-NEXT: 18000104c: 52800040 mov w0, #0x2 // =2 -DISASM-NEXT: 180001050: d65f03c0 ret +DISASM-NEXT: 180001008: 90000030 adrp x16, 0x180005000 +DISASM-NEXT: 18000100c: f9400610 ldr x16, [x16, #0x8] +DISASM-NEXT: 180001010: d61f0200 br x16 +DISASM-NEXT: 180001014: d000000b adrp x11, 0x180003000 +DISASM-NEXT: 180001018: f940056b ldr x11, [x11, #0x8] +DISASM-NEXT: 18000101c: 9000000a adrp x10, 0x180001000 <.text> +DISASM-NEXT: 180001020: 9101714a add x10, x10, #0x5c +DISASM-NEXT: 180001024: 17fffff7 b 0x180001000 <.text> +DISASM-NEXT: 180001028: d000000b adrp x11, 0x180003000 +DISASM-NEXT: 18000102c: f940096b ldr x11, [x11, #0x10] +DISASM-NEXT: 180001030: f0ffffea adrp x10, 0x180000000 +DISASM-NEXT: 180001034: 9100014a add x10, x10, #0x0 +DISASM-NEXT: 180001038: 17fffff2 b 0x180001000 <.text> +DISASM-NEXT: 18000103c: 90000030 adrp x16, 0x180005000 +DISASM-NEXT: 180001040: f9401210 ldr x16, [x16, #0x20] +DISASM-NEXT: 180001044: d61f0200 br x16 +DISASM-NEXT: 180001048: d000000b adrp x11, 0x180003000 +DISASM-NEXT: 18000104c: f940116b ldr x11, [x11, #0x20] +DISASM-NEXT: 180001050: 9000000a adrp x10, 0x180001000 <.text> +DISASM-NEXT: 180001054: 9101914a add x10, x10, #0x64 +DISASM-NEXT: 180001058: 17ffffea b 0x180001000 <.text> +DISASM-NEXT: 18000105c: 52800020 mov w0, #0x1 // =1 +DISASM-NEXT: 180001060: d65f03c0 ret +DISASM-NEXT: 180001064: 52800040 mov w0, #0x2 // =2 +DISASM-NEXT: 180001068: d65f03c0 ret DISASM-NEXT: ... DISASM-NEXT: 180002000: ff 25 02 10 00 00 jmpq *0x1002(%rip) # 0x180003008 @@ -65,7 +71,8 @@ RUN: llvm-readobj --hex-dump=.test out.dll | FileCheck --check-prefix=TESTSEC %s RUN: llvm-readobj --hex-dump=.test out2.dll | FileCheck --check-prefix=TESTSEC %s TESTSEC: 0x180007000 08500000 00300000 10500000 20500000 TESTSEC-NEXT: 0x180007010 08300000 00500000 10300000 20300000 -TESTSEC-NEXT: 0x180007020 08100000 1c100000 00200000 +TESTSEC-NEXT: 0x180007020 14100000 28100000 00200000 08100000 +TESTSEC-NEXT: 0x180007030 3c100000 RUN: llvm-readobj --headers out.dll | FileCheck -check-prefix=HEADERS %s HEADERS: LoadConfigTableRVA: 0x4010 @@ -76,9 +83,9 @@ RUN: llvm-readobj --coff-load-config out.dll | FileCheck -check-prefix=LOADCONFI LOADCONFIG: AuxiliaryIAT: 0x5000 RUN: llvm-readobj --hex-dump=.rdata out.dll | FileCheck -check-prefix=RDATA %s -RDATA: 0x180005000 00000000 00000000 08100080 01000000 -RDATA-NEXT: 0x180005010 1c100080 01000000 00000000 00000000 -RDATA-NEXT: 0x180005020 30100080 01000000 00000000 00000000 +RDATA: 0x180005000 00000000 00000000 14100080 01000000 +RDATA-NEXT: 0x180005010 28100080 01000000 00000000 00000000 +RDATA-NEXT: 0x180005020 48100080 01000000 00000000 00000000 RUN: llvm-readobj --coff-basereloc out.dll | FileCheck -check-prefix=BASERELOC %s BASERELOC: BaseReloc [ @@ -110,6 +117,8 @@ arm64ec_data_sym: .rva __impchk_func .rva __impchk_func2 .rva func + .rva "#func" + .rva "#t2func" #--- icall.s .text diff --git a/lldb/docs/resources/sbapi.rst b/lldb/docs/resources/sbapi.rst index cf32cc6c81558..4ca3909e0f291 100644 --- a/lldb/docs/resources/sbapi.rst +++ b/lldb/docs/resources/sbapi.rst @@ -72,6 +72,17 @@ building the LLDB framework for macOS, the headers are processed with ``unifdef`` prior to being copied into the framework bundle to remove macros involving SWIG. +Another good principle when adding SB API methods is: if you find yourself +implementing a significant algorithm in the SB API method, you should not do +that, but instead look for and then add it - if not found - as a method in the +underlying lldb_private class, and then call that from your SB API method. +If it was a useful algorithm, it's very likely it already exists +because the lldb_private code also needed to do it. And if it doesn't at +present, if it was a useful thing to do, it's likely someone will later need +it in lldb_private and then we end up with two implementations of the same +algorithm. If we keep the SB API code to just what's needed to manage the SB +objects and requests, we won't get into this situation. + Lifetime -------- Many SB API methods will return strings in the form of ``const char *`` values. diff --git a/lldb/examples/synthetic/gnu_libstdcpp.py b/lldb/examples/synthetic/gnu_libstdcpp.py index d98495b8a9df3..a6605a7a7eb5b 100644 --- a/lldb/examples/synthetic/gnu_libstdcpp.py +++ b/lldb/examples/synthetic/gnu_libstdcpp.py @@ -473,11 +473,7 @@ def get_child_at_index(self, index): "[" + str(index) + "]", element_offset, element_type ) bit = element.GetValueAsUnsigned(0) & (1 << bit_offset) - if bit != 0: - value_expr = "(bool)true" - else: - value_expr = "(bool)false" - return self.valobj.CreateValueFromExpression("[%d]" % index, value_expr) + return self.valobj.CreateBoolValue("[%d]" % index, bool(bit)) def update(self): try: diff --git a/lldb/include/lldb/API/SBValue.h b/lldb/include/lldb/API/SBValue.h index bec816fb45184..9090cece80f7c 100644 --- a/lldb/include/lldb/API/SBValue.h +++ b/lldb/include/lldb/API/SBValue.h @@ -145,6 +145,8 @@ class LLDB_API SBValue { // AddressOf() on the return of this call all return invalid lldb::SBValue CreateValueFromData(const char *name, lldb::SBData data, lldb::SBType type); + // Returned value has no address. + lldb::SBValue CreateBoolValue(const char *name, bool value); /// Get a child value by index from a value. /// diff --git a/lldb/source/API/SBValue.cpp b/lldb/source/API/SBValue.cpp index 273aac5ad4798..e1a31708d46ff 100644 --- a/lldb/source/API/SBValue.cpp +++ b/lldb/source/API/SBValue.cpp @@ -645,6 +645,22 @@ lldb::SBValue SBValue::CreateValueFromData(const char *name, SBData data, return sb_value; } +lldb::SBValue SBValue::CreateBoolValue(const char *name, bool value) { + LLDB_INSTRUMENT_VA(this, name); + + lldb::SBValue sb_value; + lldb::ValueObjectSP new_value_sp; + ValueLocker locker; + lldb::ValueObjectSP value_sp(GetSP(locker)); + lldb::TargetSP target_sp = m_opaque_sp->GetTargetSP(); + if (value_sp && target_sp) { + new_value_sp = + ValueObject::CreateValueObjectFromBool(target_sp, value, name); + } + sb_value.SetSP(new_value_sp); + return sb_value; +} + SBValue SBValue::GetChildAtIndex(uint32_t idx) { LLDB_INSTRUMENT_VA(this, idx); diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp index 06da83e26a26a..c36748963db37 100644 --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp @@ -3768,7 +3768,6 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) { SymbolType type = eSymbolTypeInvalid; SectionSP symbol_section; - lldb::addr_t symbol_byte_size = 0; bool add_nlist = true; bool is_gsym = false; bool demangled_is_synthesized = false; @@ -4354,47 +4353,6 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) { if (symbol_section) { const addr_t section_file_addr = symbol_section->GetFileAddress(); - if (symbol_byte_size == 0 && function_starts_count > 0) { - addr_t symbol_lookup_file_addr = nlist.n_value; - // Do an exact address match for non-ARM addresses, else get the - // closest since the symbol might be a thumb symbol which has an - // address with bit zero set. - FunctionStarts::Entry *func_start_entry = - function_starts.FindEntry(symbol_lookup_file_addr, !is_arm); - if (is_arm && func_start_entry) { - // Verify that the function start address is the symbol address - // (ARM) or the symbol address + 1 (thumb). - if (func_start_entry->addr != symbol_lookup_file_addr && - func_start_entry->addr != (symbol_lookup_file_addr + 1)) { - // Not the right entry, NULL it out... - func_start_entry = nullptr; - } - } - if (func_start_entry) { - func_start_entry->data = true; - - addr_t symbol_file_addr = func_start_entry->addr; - if (is_arm) - symbol_file_addr &= THUMB_ADDRESS_BIT_MASK; - - const FunctionStarts::Entry *next_func_start_entry = - function_starts.FindNextEntry(func_start_entry); - const addr_t section_end_file_addr = - section_file_addr + symbol_section->GetByteSize(); - if (next_func_start_entry) { - addr_t next_symbol_file_addr = next_func_start_entry->addr; - // Be sure the clear the Thumb address bit when we calculate the - // size from the current and next address - if (is_arm) - next_symbol_file_addr &= THUMB_ADDRESS_BIT_MASK; - symbol_byte_size = std::min( - next_symbol_file_addr - symbol_file_addr, - section_end_file_addr - symbol_file_addr); - } else { - symbol_byte_size = section_end_file_addr - symbol_file_addr; - } - } - } symbol_value -= section_file_addr; } @@ -4501,9 +4459,6 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) { if (nlist.n_desc & N_WEAK_REF) sym[sym_idx].SetIsWeak(true); - if (symbol_byte_size > 0) - sym[sym_idx].SetByteSize(symbol_byte_size); - if (demangled_is_synthesized) sym[sym_idx].SetDemangledNameIsSynthesized(true); @@ -4622,23 +4577,7 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) { Address symbol_addr; if (module_sp->ResolveFileAddress(symbol_file_addr, symbol_addr)) { SectionSP symbol_section(symbol_addr.GetSection()); - uint32_t symbol_byte_size = 0; if (symbol_section) { - const addr_t section_file_addr = symbol_section->GetFileAddress(); - const FunctionStarts::Entry *next_func_start_entry = - function_starts.FindNextEntry(func_start_entry); - const addr_t section_end_file_addr = - section_file_addr + symbol_section->GetByteSize(); - if (next_func_start_entry) { - addr_t next_symbol_file_addr = next_func_start_entry->addr; - if (is_arm) - next_symbol_file_addr &= THUMB_ADDRESS_BIT_MASK; - symbol_byte_size = std::min( - next_symbol_file_addr - symbol_file_addr, - section_end_file_addr - symbol_file_addr); - } else { - symbol_byte_size = section_end_file_addr - symbol_file_addr; - } sym[sym_idx].SetID(synthetic_sym_id++); // Don't set the name for any synthetic symbols, the Symbol // object will generate one if needed when the name is accessed @@ -4650,8 +4589,6 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) { add_symbol_addr(symbol_addr.GetFileAddress()); if (symbol_flags) sym[sym_idx].SetFlags(symbol_flags); - if (symbol_byte_size) - sym[sym_idx].SetByteSize(symbol_byte_size); ++sym_idx; } } diff --git a/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp b/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp index edc568a6b47e0..ca22dacb2ba6c 100644 --- a/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp +++ b/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp @@ -1218,3 +1218,15 @@ Status MinidumpFileBuilder::DumpFile() { return error; } + +void MinidumpFileBuilder::DeleteFile() noexcept { + Log *log = GetLog(LLDBLog::Object); + + if (m_core_file) { + Status error = m_core_file->Close(); + if (error.Fail()) + LLDB_LOGF(log, "Failed to close minidump file: %s", error.AsCString()); + + m_core_file.reset(); + } +} diff --git a/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.h b/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.h index 71001e26c00e9..72e5658718b3c 100644 --- a/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.h +++ b/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.h @@ -115,6 +115,9 @@ class MinidumpFileBuilder { // Run cleanup and write all remaining bytes to file lldb_private::Status DumpFile(); + // Delete the file if it exists + void DeleteFile() noexcept; + private: // Add data to the end of the buffer, if the buffer exceeds the flush level, // trigger a flush. diff --git a/lldb/source/Plugins/ObjectFile/Minidump/ObjectFileMinidump.cpp b/lldb/source/Plugins/ObjectFile/Minidump/ObjectFileMinidump.cpp index 5da69dd4f2ce7..be47991bb09fc 100644 --- a/lldb/source/Plugins/ObjectFile/Minidump/ObjectFileMinidump.cpp +++ b/lldb/source/Plugins/ObjectFile/Minidump/ObjectFileMinidump.cpp @@ -55,6 +55,21 @@ size_t ObjectFileMinidump::GetModuleSpecifications( return 0; } +struct DumpFailRemoveHolder { + DumpFailRemoveHolder(MinidumpFileBuilder &builder) : m_builder(builder) {} + + ~DumpFailRemoveHolder() { + if (!m_success) + m_builder.DeleteFile(); + } + + void SetSuccess() { m_success = true; } + +private: + MinidumpFileBuilder &m_builder; + bool m_success = false; +}; + bool ObjectFileMinidump::SaveCore(const lldb::ProcessSP &process_sp, lldb_private::SaveCoreOptions &options, lldb_private::Status &error) { @@ -75,6 +90,7 @@ bool ObjectFileMinidump::SaveCore(const lldb::ProcessSP &process_sp, } MinidumpFileBuilder builder(std::move(maybe_core_file.get()), process_sp, options); + DumpFailRemoveHolder request(builder); Log *log = GetLog(LLDBLog::Object); error = builder.AddHeaderAndCalculateDirectories(); @@ -133,5 +149,7 @@ bool ObjectFileMinidump::SaveCore(const lldb::ProcessSP &process_sp, return false; } + request.SetSuccess(); + return true; } diff --git a/lldb/source/Plugins/Process/FreeBSD/NativeProcessFreeBSD.cpp b/lldb/source/Plugins/Process/FreeBSD/NativeProcessFreeBSD.cpp index 97fff4b9f65a8..80b27571f43d5 100644 --- a/lldb/source/Plugins/Process/FreeBSD/NativeProcessFreeBSD.cpp +++ b/lldb/source/Plugins/Process/FreeBSD/NativeProcessFreeBSD.cpp @@ -319,9 +319,12 @@ void NativeProcessFreeBSD::MonitorSIGTRAP(lldb::pid_t pid) { info.pl_siginfo.si_addr); if (thread) { + auto ®ctx = static_cast( + thread->GetRegisterContext()); auto thread_info = m_threads_stepping_with_breakpoint.find(thread->GetID()); - if (thread_info != m_threads_stepping_with_breakpoint.end()) { + if (thread_info != m_threads_stepping_with_breakpoint.end() && + threads_info->second == regctx.GetPC()) { thread->SetStoppedByTrace(); Status brkpt_error = RemoveBreakpoint(thread_info->second); if (brkpt_error.Fail()) diff --git a/lldb/source/Plugins/Process/Linux/NativeProcessLinux.cpp b/lldb/source/Plugins/Process/Linux/NativeProcessLinux.cpp index 5c262db8db7fd..38b7092682873 100644 --- a/lldb/source/Plugins/Process/Linux/NativeProcessLinux.cpp +++ b/lldb/source/Plugins/Process/Linux/NativeProcessLinux.cpp @@ -829,8 +829,11 @@ void NativeProcessLinux::MonitorBreakpoint(NativeThreadLinux &thread) { thread.SetStoppedByBreakpoint(); FixupBreakpointPCAsNeeded(thread); - if (m_threads_stepping_with_breakpoint.find(thread.GetID()) != - m_threads_stepping_with_breakpoint.end()) + NativeRegisterContextLinux ®_ctx = thread.GetRegisterContext(); + auto stepping_with_bp_it = + m_threads_stepping_with_breakpoint.find(thread.GetID()); + if (stepping_with_bp_it != m_threads_stepping_with_breakpoint.end() && + stepping_with_bp_it->second == reg_ctx.GetPC()) thread.SetStoppedByTrace(); StopRunningThreads(thread.GetID()); diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp index 5eaf9ce2a302a..271ff61a7188a 100644 --- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp @@ -2317,6 +2317,8 @@ StateType ProcessGDBRemote::SetThreadStopInfo(StringExtractor &stop_packet) { StreamString ostr; ostr.Printf("%" PRIu64, wp_addr); description = std::string(ostr.GetString()); + } else if (key.compare("swbreak") == 0 || key.compare("hwbreak") == 0) { + reason = "breakpoint"; } else if (key.compare("library") == 0) { auto error = LoadModules(); if (error) { diff --git a/lldb/test/API/functionalities/process_save_core_minidump/TestProcessSaveCoreMinidump.py b/lldb/test/API/functionalities/process_save_core_minidump/TestProcessSaveCoreMinidump.py index 2cbe20ee10b1a..ccdb6653cf16f 100644 --- a/lldb/test/API/functionalities/process_save_core_minidump/TestProcessSaveCoreMinidump.py +++ b/lldb/test/API/functionalities/process_save_core_minidump/TestProcessSaveCoreMinidump.py @@ -493,3 +493,32 @@ def test_save_minidump_custom_save_style_duplicated_regions(self): finally: self.assertTrue(self.dbg.DeleteTarget(target)) + + @skipUnlessPlatform(["linux"]) + def minidump_deleted_on_save_failure(self): + """Test that verifies the minidump file is deleted after an error""" + + self.build() + exe = self.getBuildArtifact("a.out") + try: + target = self.dbg.CreateTarget(exe) + process = target.LaunchSimple( + None, None, self.get_process_working_directory() + ) + self.assertState(process.GetState(), lldb.eStateStopped) + + custom_file = self.getBuildArtifact("core.should.be.deleted.custom.dmp") + options = lldb.SBSaveCoreOptions() + options.SetOutputFile(lldb.SBFileSpec(custom_file)) + options.SetPluginName("minidump") + options.SetStyle(lldb.eSaveCoreCustomOnly) + # We set custom only and have no thread list and have no memory. + error = process.SaveCore(options) + self.assertTrue(error.Fail()) + self.assertIn( + "no valid address ranges found for core style", error.GetCString() + ) + self.assertTrue(not os.path.isfile(custom_file)) + + finally: + self.assertTrue(self.dbg.DeleteTarget(target)) diff --git a/llvm/docs/RISCVUsage.rst b/llvm/docs/RISCVUsage.rst index a15af9adfa945..cf52cd1522847 100644 --- a/llvm/docs/RISCVUsage.rst +++ b/llvm/docs/RISCVUsage.rst @@ -207,7 +207,7 @@ on support follow. ``Zkt`` Supported ``Zmmul`` Supported ``Ztso`` Supported - ``Zvbb`` Assembly Support + ``Zvbb`` Supported ``Zvbc`` Assembly Support ``Zve32x`` (`Partially <#riscv-vlen-32-note>`__) Supported ``Zve32f`` (`Partially <#riscv-vlen-32-note>`__) Supported @@ -217,7 +217,7 @@ on support follow. ``Zvfbfmin`` Supported ``Zvfbfwma`` Supported ``Zvfh`` Supported - ``Zvkb`` Assembly Support + ``Zvkb`` Suppported ``Zvkg`` Assembly Support ``Zvkn`` Assembly Support ``Zvknc`` Assembly Support diff --git a/llvm/include/llvm/SandboxIR/PassManager.h b/llvm/include/llvm/SandboxIR/PassManager.h index 5e250641f3b3f..2cc669a966e0b 100644 --- a/llvm/include/llvm/SandboxIR/PassManager.h +++ b/llvm/include/llvm/SandboxIR/PassManager.h @@ -72,6 +72,7 @@ class PassRegistry { DenseMap NameToPassMap; public: + static constexpr const char PassDelimToken = ','; PassRegistry() = default; /// Registers \p PassPtr and takes ownership. Pass ®isterPass(std::unique_ptr &&PassPtr) { @@ -85,6 +86,9 @@ class PassRegistry { auto It = NameToPassMap.find(Name); return It != NameToPassMap.end() ? It->second : nullptr; } + /// Creates a pass pipeline and returns the first pass manager. + FunctionPassManager &parseAndCreatePassPipeline(StringRef Pipeline); + #ifndef NDEBUG void print(raw_ostream &OS) const { for (const auto &PassPtr : Passes) diff --git a/llvm/include/llvm/SandboxIR/SandboxIR.h b/llvm/include/llvm/SandboxIR/SandboxIR.h index 5b57d5cebc334..d21b8a85161e4 100644 --- a/llvm/include/llvm/SandboxIR/SandboxIR.h +++ b/llvm/include/llvm/SandboxIR/SandboxIR.h @@ -124,6 +124,7 @@ class ConstantAggregateZero; class ConstantPointerNull; class PoisonValue; class BlockAddress; +class DSOLocalEquivalent; class ConstantTokenNone; class GlobalValue; class Context; @@ -328,6 +329,7 @@ class Value { friend class PoisonValue; // For `Val`. friend class BlockAddress; // For `Val`. friend class GlobalValue; // For `Val`. + friend class DSOLocalEquivalent; // For `Val`. /// All values point to the context. Context &Ctx; @@ -1218,6 +1220,38 @@ class BlockAddress final : public Constant { } }; +class DSOLocalEquivalent final : public Constant { + DSOLocalEquivalent(llvm::DSOLocalEquivalent *C, Context &Ctx) + : Constant(ClassID::DSOLocalEquivalent, C, Ctx) {} + friend class Context; // For constructor. + +public: + /// Return a DSOLocalEquivalent for the specified global value. + static DSOLocalEquivalent *get(GlobalValue *GV); + + GlobalValue *getGlobalValue() const; + + /// For isa/dyn_cast. + static bool classof(const sandboxir::Value *From) { + return From->getSubclassID() == ClassID::DSOLocalEquivalent; + } + + unsigned getUseOperandNo(const Use &Use) const final { + llvm_unreachable("DSOLocalEquivalent has no operands!"); + } + +#ifndef NDEBUG + void verify() const override { + assert(isa(Val) && + "Expected a DSOLocalEquivalent!"); + } + void dumpOS(raw_ostream &OS) const override { + dumpCommonPrefix(OS); + dumpCommonSuffix(OS); + } +#endif +}; + // TODO: This should inherit from ConstantData. class ConstantTokenNone final : public Constant { ConstantTokenNone(llvm::ConstantTokenNone *C, Context &Ctx) diff --git a/llvm/include/llvm/SandboxIR/SandboxIRValues.def b/llvm/include/llvm/SandboxIR/SandboxIRValues.def index 7b72f9b7173e6..c218ffee3ce38 100644 --- a/llvm/include/llvm/SandboxIR/SandboxIRValues.def +++ b/llvm/include/llvm/SandboxIR/SandboxIRValues.def @@ -38,6 +38,7 @@ DEF_CONST(GlobalVariable, GlobalVariable) DEF_CONST(GlobalIFunc, GlobalIFunc) DEF_CONST(GlobalAlias, GlobalAlias) DEF_CONST(BlockAddress, BlockAddress) +DEF_CONST(DSOLocalEquivalent, DSOLocalEquivalent) DEF_CONST(ConstantTokenNone, ConstantTokenNone) #ifndef DEF_INSTR diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.def b/llvm/include/llvm/TargetParser/X86TargetParser.def index cd160f54e6670..e5bf196559ba6 100644 --- a/llvm/include/llvm/TargetParser/X86TargetParser.def +++ b/llvm/include/llvm/TargetParser/X86TargetParser.def @@ -49,11 +49,13 @@ X86_CPU_TYPE(ZHAOXIN_FAM7H, "zhaoxin_fam7h") X86_CPU_TYPE(INTEL_SIERRAFOREST, "sierraforest") X86_CPU_TYPE(INTEL_GRANDRIDGE, "grandridge") X86_CPU_TYPE(INTEL_CLEARWATERFOREST, "clearwaterforest") +X86_CPU_TYPE(AMDFAM1AH, "amdfam1ah") // Alternate names supported by __builtin_cpu_is and target multiversioning. X86_CPU_TYPE_ALIAS(INTEL_BONNELL, "atom") X86_CPU_TYPE_ALIAS(AMDFAM10H, "amdfam10") X86_CPU_TYPE_ALIAS(AMDFAM15H, "amdfam15") +X86_CPU_TYPE_ALIAS(AMDFAM1AH, "amdfam1a") X86_CPU_TYPE_ALIAS(INTEL_SILVERMONT, "slm") #undef X86_CPU_TYPE_ALIAS @@ -104,6 +106,7 @@ X86_CPU_SUBTYPE(INTEL_COREI7_GRANITERAPIDS_D,"graniterapids-d") X86_CPU_SUBTYPE(INTEL_COREI7_ARROWLAKE, "arrowlake") X86_CPU_SUBTYPE(INTEL_COREI7_ARROWLAKE_S, "arrowlake-s") X86_CPU_SUBTYPE(INTEL_COREI7_PANTHERLAKE, "pantherlake") +X86_CPU_SUBTYPE(AMDFAM1AH_ZNVER5, "znver5") // Alternate names supported by __builtin_cpu_is and target multiversioning. X86_CPU_SUBTYPE_ALIAS(INTEL_COREI7_ALDERLAKE, "raptorlake") diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.h b/llvm/include/llvm/TargetParser/X86TargetParser.h index 2083e585af4ac..0e17c4674719c 100644 --- a/llvm/include/llvm/TargetParser/X86TargetParser.h +++ b/llvm/include/llvm/TargetParser/X86TargetParser.h @@ -142,6 +142,7 @@ enum CPUKind { CK_ZNVER2, CK_ZNVER3, CK_ZNVER4, + CK_ZNVER5, CK_x86_64, CK_x86_64_v2, CK_x86_64_v3, diff --git a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp index 41815c633fdf2..42e986e6179dd 100644 --- a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp +++ b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp @@ -1420,16 +1420,8 @@ void IRSimilarityIdentifier::findCandidates( // IRSimilarityCandidates that include that instruction. for (IRSimilarityCandidate &IRCand : SimilarityCandidates->back()) { for (unsigned Idx = IRCand.getStartIdx(), Edx = IRCand.getEndIdx(); - Idx <= Edx; ++Idx) { - DenseMap>::iterator - IdIt; - IdIt = IndexToIncludedCand.find(Idx); - bool Inserted = false; - if (IdIt == IndexToIncludedCand.end()) - std::tie(IdIt, Inserted) = IndexToIncludedCand.insert( - std::make_pair(Idx, DenseSet())); - IdIt->second.insert(&IRCand); - } + Idx <= Edx; ++Idx) + IndexToIncludedCand[Idx].insert(&IRCand); // Add mapping of candidate to the overall similarity group number. CandToGroup.insert( std::make_pair(&IRCand, SimilarityCandidates->size() - 1)); diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp index a69dbbbbdab3c..a73a3aa59403b 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp @@ -2231,11 +2231,9 @@ void InstrRefBasedLDV::accumulateFragmentMap(MachineInstr &MI) { // If this is the first sighting of this variable, then we are guaranteed // there are currently no overlapping fragments either. Initialize the set // of seen fragments, record no overlaps for the current one, and return. - auto SeenIt = SeenFragments.find(MIVar.getVariable()); - if (SeenIt == SeenFragments.end()) { - SmallSet OneFragment; - OneFragment.insert(ThisFragment); - SeenFragments.insert({MIVar.getVariable(), OneFragment}); + auto [SeenIt, Inserted] = SeenFragments.try_emplace(MIVar.getVariable()); + if (Inserted) { + SeenIt->second.insert(ThisFragment); OverlapFragments.insert({{MIVar.getVariable(), ThisFragment}, {}}); return; diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp index fab36f4858e09..8bcc437cbfb86 100644 --- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -108,9 +108,10 @@ static std::optional isCopyInstr(const MachineInstr &MI, class CopyTracker { struct CopyInfo { - MachineInstr *MI, *LastSeenUseInCopy; + MachineInstr *MI = nullptr; + MachineInstr *LastSeenUseInCopy = nullptr; SmallVector DefRegs; - bool Avail; + bool Avail = false; }; DenseMap Copies; @@ -240,8 +241,7 @@ class CopyTracker { // Remember source that's copied to Def. Once it's clobbered, then // it's no longer available for copy propagation. for (MCRegUnit Unit : TRI.regunits(Src)) { - auto I = Copies.insert({Unit, {nullptr, nullptr, {}, false}}); - auto &Copy = I.first->second; + auto &Copy = Copies[Unit]; if (!is_contained(Copy.DefRegs, Def)) Copy.DefRegs.push_back(Def); Copy.LastSeenUseInCopy = MI; diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index bb907633e1f82..fe8ae5c9e9af6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -15142,26 +15142,42 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // Note: We only run this optimization after type legalization (which often // creates this pattern) and before operation legalization after which // we need to be more careful about the vector instructions that we generate. - if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && - LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) { - EVT VecTy = N0.getOperand(0).getValueType(); - EVT ExTy = N0.getValueType(); + if (LegalTypes && !LegalOperations && VT.isScalarInteger() && VT != MVT::i1 && + N0->hasOneUse()) { EVT TrTy = N->getValueType(0); + SDValue Src = N0; + + // Check for cases where we shift down an upper element before truncation. + int EltOffset = 0; + if (Src.getOpcode() == ISD::SRL && Src.getOperand(0)->hasOneUse()) { + if (auto ShAmt = DAG.getValidShiftAmount(Src)) { + if ((*ShAmt % TrTy.getSizeInBits()) == 0) { + Src = Src.getOperand(0); + EltOffset = *ShAmt / TrTy.getSizeInBits(); + } + } + } - auto EltCnt = VecTy.getVectorElementCount(); - unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits(); - auto NewEltCnt = EltCnt * SizeRatio; + if (Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { + EVT VecTy = Src.getOperand(0).getValueType(); + EVT ExTy = Src.getValueType(); - EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt); - assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size"); + auto EltCnt = VecTy.getVectorElementCount(); + unsigned SizeRatio = ExTy.getSizeInBits() / TrTy.getSizeInBits(); + auto NewEltCnt = EltCnt * SizeRatio; - SDValue EltNo = N0->getOperand(1); - if (isa(EltNo) && isTypeLegal(NVT)) { - int Elt = EltNo->getAsZExtVal(); - int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1)); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy, - DAG.getBitcast(NVT, N0.getOperand(0)), - DAG.getVectorIdxConstant(Index, DL)); + EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt); + assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size"); + + SDValue EltNo = Src->getOperand(1); + if (isa(EltNo) && isTypeLegal(NVT)) { + int Elt = EltNo->getAsZExtVal(); + int Index = isLE ? (Elt * SizeRatio + EltOffset) + : (Elt * SizeRatio + (SizeRatio - 1) - EltOffset); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy, + DAG.getBitcast(NVT, Src.getOperand(0)), + DAG.getVectorIdxConstant(Index, DL)); + } } } diff --git a/llvm/lib/SandboxIR/PassManager.cpp b/llvm/lib/SandboxIR/PassManager.cpp index 2dd19e74734db..4abd39b28e87a 100644 --- a/llvm/lib/SandboxIR/PassManager.cpp +++ b/llvm/lib/SandboxIR/PassManager.cpp @@ -20,6 +20,38 @@ bool FunctionPassManager::runOnFunction(Function &F) { // TODO: Check ChangeAll against hashes before/after. return Change; } + +FunctionPassManager & +PassRegistry::parseAndCreatePassPipeline(StringRef Pipeline) { + static constexpr const char EndToken = '\0'; + // Add EndToken to the end to ease parsing. + std::string PipelineStr = std::string(Pipeline) + EndToken; + int FlagBeginIdx = 0; + // Start with a FunctionPassManager. + auto &InitialPM = static_cast( + registerPass(std::make_unique("init-fpm"))); + + for (auto [Idx, C] : enumerate(PipelineStr)) { + // Keep moving Idx until we find the end of the pass name. + bool FoundDelim = C == EndToken || C == PassDelimToken; + if (!FoundDelim) + continue; + unsigned Sz = Idx - FlagBeginIdx; + std::string PassName(&PipelineStr[FlagBeginIdx], Sz); + FlagBeginIdx = Idx + 1; + + // Get the pass that corresponds to PassName and add it to the pass manager. + auto *Pass = getPassByName(PassName); + if (Pass == nullptr) { + errs() << "Pass '" << PassName << "' not registered!\n"; + exit(1); + } + // TODO: This is safe for now, but would require proper upcasting once we + // add more Pass sub-classes. + InitialPM.addPass(static_cast(Pass)); + } + return InitialPM; +} #ifndef NDEBUG void PassRegistry::dump() const { print(dbgs()); diff --git a/llvm/lib/SandboxIR/SandboxIR.cpp b/llvm/lib/SandboxIR/SandboxIR.cpp index 8a7c3981e6680..04243564809db 100644 --- a/llvm/lib/SandboxIR/SandboxIR.cpp +++ b/llvm/lib/SandboxIR/SandboxIR.cpp @@ -2535,6 +2535,16 @@ BasicBlock *BlockAddress::getBasicBlock() const { Ctx.getValue(cast(Val)->getBasicBlock())); } +DSOLocalEquivalent *DSOLocalEquivalent::get(GlobalValue *GV) { + auto *LLVMC = llvm::DSOLocalEquivalent::get(cast(GV->Val)); + return cast(GV->getContext().getValue(LLVMC)); +} + +GlobalValue *DSOLocalEquivalent::getGlobalValue() const { + return cast( + Ctx.getValue(cast(Val)->getGlobalValue())); +} + ConstantTokenNone *ConstantTokenNone::get(Context &Ctx) { auto *LLVMC = llvm::ConstantTokenNone::get(Ctx.LLVMCtx); return cast(Ctx.getOrCreateConstant(LLVMC)); @@ -2669,6 +2679,14 @@ Value *Context::getOrCreateValueInternal(llvm::Value *LLVMV, llvm::User *U) { It->second = std::unique_ptr( new UndefValue(cast(C), *this)); return It->second.get(); + case llvm::Value::DSOLocalEquivalentVal: { + auto *DSOLE = cast(C); + It->second = std::unique_ptr( + new DSOLocalEquivalent(DSOLE, *this)); + auto *Ret = It->second.get(); + getOrCreateValueInternal(DSOLE->getGlobalValue(), DSOLE); + return Ret; + } case llvm::Value::ConstantArrayVal: It->second = std::unique_ptr( new ConstantArray(cast(C), *this)); diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index db5cd1d32d73d..3957d21ea695b 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -179,7 +179,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .clampNumElements(0, v2s32, v4s32) .clampNumElements(0, v2s64, v2s64) .moreElementsToNextPow2(0) - .minScalarSameAs(1, 0); + .minScalarSameAs(1, 0) + .scalarizeIf(scalarOrEltWiderThan(0, 64), 0); getActionDefinitionsBuilder(G_PTR_ADD) .legalFor({{p0, s64}, {v2p0, v2s64}}) @@ -542,6 +543,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .widenScalarOrEltToNextPow2(1) .clampScalar(1, s32, s64) .clampScalar(0, s32, s32) + .scalarizeIf(scalarOrEltWiderThan(1, 64), 1) .minScalarEltSameAsIf( [=](const LegalityQuery &Query) { const LLT &Ty = Query.Types[0]; @@ -784,6 +786,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .widenScalarToNextPow2(0) .clampScalar(0, s32, s64) .clampScalar(1, s32, s32) + .scalarizeIf(scalarOrEltWiderThan(0, 64), 0) .minScalarEltSameAsIf(all(isVector(0), isVector(1)), 1, 0) .lowerIf(isVector(0)); diff --git a/llvm/lib/Target/RISCV/RISCVCallingConv.cpp b/llvm/lib/Target/RISCV/RISCVCallingConv.cpp index fc276d1063281..b7ed9de6ca84d 100644 --- a/llvm/lib/Target/RISCV/RISCVCallingConv.cpp +++ b/llvm/lib/Target/RISCV/RISCVCallingConv.cpp @@ -448,8 +448,12 @@ bool llvm::CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT, if (Reg) { // Fixed-length vectors are located in the corresponding scalable-vector // container types. - if (ValVT.isFixedLengthVector()) + if (ValVT.isFixedLengthVector()) { LocVT = TLI.getContainerForFixedLengthVector(LocVT); + State.addLoc( + CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } } else { // For return values, the vector must be passed fully via registers or // via the stack. @@ -583,8 +587,12 @@ bool llvm::CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, if (MCRegister Reg = allocateRVVReg(ValVT, ValNo, State, TLI)) { // Fixed-length vectors are located in the corresponding scalable-vector // container types. - if (LocVT.isFixedLengthVector()) + if (LocVT.isFixedLengthVector()) { LocVT = TLI.getContainerForFixedLengthVector(LocVT); + State.addLoc( + CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; } diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 6f2dc710cb3d4..ab49315c12d68 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -19090,20 +19090,18 @@ static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, if (VA.needsCustom()) { if (VA.getLocVT().isInteger() && (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) - Val = DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val); - else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) - Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val); - else - llvm_unreachable("Unexpected Custom handling."); - return Val; + return DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val); + if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) + return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val); + if (VA.getValVT().isFixedLengthVector() && VA.getLocVT().isScalableVector()) + return convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget); + llvm_unreachable("Unexpected Custom handling."); } switch (VA.getLocInfo()) { default: llvm_unreachable("Unexpected CCValAssign::LocInfo"); case CCValAssign::Full: - if (VA.getValVT().isFixedLengthVector() && VA.getLocVT().isScalableVector()) - Val = convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget); break; case CCValAssign::BCvt: Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); @@ -19155,20 +19153,18 @@ static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, if (VA.needsCustom()) { if (LocVT.isInteger() && (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) - Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val); - else if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32) - Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val); - else - llvm_unreachable("Unexpected Custom handling."); - return Val; + return DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val); + if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32) + return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val); + if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector()) + return convertToScalableVector(LocVT, Val, DAG, Subtarget); + llvm_unreachable("Unexpected Custom handling."); } switch (VA.getLocInfo()) { default: llvm_unreachable("Unexpected CCValAssign::LocInfo"); case CCValAssign::Full: - if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector()) - Val = convertToScalableVector(LocVT, Val, DAG, Subtarget); break; case CCValAssign::BCvt: Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val); diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index 6b4e47a49eb17..735f9dcefb97f 100644 --- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -434,7 +434,8 @@ class X86AsmParser : public MCTargetAsmParser { class IntelExprStateMachine { IntelExprState State = IES_INIT, PrevState = IES_ERROR; - unsigned BaseReg = 0, IndexReg = 0, TmpReg = 0, Scale = 0; + MCRegister BaseReg, IndexReg, TmpReg; + unsigned Scale = 0; int64_t Imm = 0; const MCExpr *Sym = nullptr; StringRef SymName; @@ -468,8 +469,8 @@ class X86AsmParser : public MCTargetAsmParser { bool isBracketUsed() const { return BracketUsed; } bool isOffsetOperator() const { return OffsetOperator; } SMLoc getOffsetLoc() const { return OffsetOperatorLoc; } - unsigned getBaseReg() const { return BaseReg; } - unsigned getIndexReg() const { return IndexReg; } + MCRegister getBaseReg() const { return BaseReg; } + MCRegister getIndexReg() const { return IndexReg; } unsigned getScale() const { return Scale; } const MCExpr *getSym() const { return Sym; } StringRef getSymName() const { return SymName; } @@ -791,7 +792,7 @@ class X86AsmParser : public MCTargetAsmParser { } PrevState = CurrState; } - bool onRegister(unsigned Reg, StringRef &ErrMsg) { + bool onRegister(MCRegister Reg, StringRef &ErrMsg) { IntelExprState CurrState = State; switch (State) { default: @@ -1111,8 +1112,8 @@ class X86AsmParser : public MCTargetAsmParser { std::unique_ptr DefaultMemSIOperand(SMLoc Loc); std::unique_ptr DefaultMemDIOperand(SMLoc Loc); - bool IsSIReg(unsigned Reg); - unsigned GetSIDIForRegClass(unsigned RegClassID, unsigned Reg, bool IsSIReg); + bool IsSIReg(MCRegister Reg); + MCRegister GetSIDIForRegClass(unsigned RegClassID, bool IsSIReg); void AddDefaultSrcDestOperands(OperandVector &Operands, std::unique_ptr &&Src, @@ -1145,14 +1146,14 @@ class X86AsmParser : public MCTargetAsmParser { void tryParseOperandIdx(AsmToken::TokenKind PrevTK, IntelExprStateMachine &SM); - bool ParseMemOperand(unsigned SegReg, const MCExpr *Disp, SMLoc StartLoc, + bool ParseMemOperand(MCRegister SegReg, const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc, OperandVector &Operands); X86::CondCode ParseConditionCode(StringRef CCode); bool ParseIntelMemoryOperandSize(unsigned &Size); - bool CreateMemForMSInlineAsm(unsigned SegReg, const MCExpr *Disp, - unsigned BaseReg, unsigned IndexReg, + bool CreateMemForMSInlineAsm(MCRegister SegReg, const MCExpr *Disp, + MCRegister BaseReg, MCRegister IndexReg, unsigned Scale, bool NonAbsMem, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier, const InlineAsmIdentifierInfo &Info, @@ -1300,14 +1301,15 @@ class X86AsmParser : public MCTargetAsmParser { #define GET_SUBTARGET_FEATURE_NAME #include "X86GenAsmMatcher.inc" -static bool CheckBaseRegAndIndexRegAndScale(unsigned BaseReg, unsigned IndexReg, - unsigned Scale, bool Is64BitMode, +static bool CheckBaseRegAndIndexRegAndScale(MCRegister BaseReg, + MCRegister IndexReg, unsigned Scale, + bool Is64BitMode, StringRef &ErrMsg) { // If we have both a base register and an index register make sure they are // both 64-bit or 32-bit registers. // To support VSIB, IndexReg can be 128-bit or 256-bit registers. - if (BaseReg != 0 && + if (BaseReg && !(BaseReg == X86::RIP || BaseReg == X86::EIP || X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) || X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) || @@ -1316,7 +1318,7 @@ static bool CheckBaseRegAndIndexRegAndScale(unsigned BaseReg, unsigned IndexReg, return true; } - if (IndexReg != 0 && + if (IndexReg && !(IndexReg == X86::EIZ || IndexReg == X86::RIZ || X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) || @@ -1328,9 +1330,9 @@ static bool CheckBaseRegAndIndexRegAndScale(unsigned BaseReg, unsigned IndexReg, return true; } - if (((BaseReg == X86::RIP || BaseReg == X86::EIP) && IndexReg != 0) || - IndexReg == X86::EIP || IndexReg == X86::RIP || - IndexReg == X86::ESP || IndexReg == X86::RSP) { + if (((BaseReg == X86::RIP || BaseReg == X86::EIP) && IndexReg) || + IndexReg == X86::EIP || IndexReg == X86::RIP || IndexReg == X86::ESP || + IndexReg == X86::RSP) { ErrMsg = "invalid base+index expression"; return true; } @@ -1344,13 +1346,13 @@ static bool CheckBaseRegAndIndexRegAndScale(unsigned BaseReg, unsigned IndexReg, return true; } - if (BaseReg == 0 && + if (!BaseReg && X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) { ErrMsg = "16-bit memory operand may not include only index register"; return true; } - if (BaseReg != 0 && IndexReg != 0) { + if (BaseReg && IndexReg) { if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) && (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) || @@ -1380,8 +1382,7 @@ static bool CheckBaseRegAndIndexRegAndScale(unsigned BaseReg, unsigned IndexReg, } // RIP/EIP-relative addressing is only supported in 64-bit mode. - if (!Is64BitMode && BaseReg != 0 && - (BaseReg == X86::RIP || BaseReg == X86::EIP)) { + if (!Is64BitMode && (BaseReg == X86::RIP || BaseReg == X86::EIP)) { ErrMsg = "IP-relative addressing requires 64-bit mode"; return true; } @@ -1608,7 +1609,8 @@ ParseStatus X86AsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, std::unique_ptr X86AsmParser::DefaultMemSIOperand(SMLoc Loc) { bool Parse32 = is32BitMode() || Code16GCC; - unsigned Basereg = is64BitMode() ? X86::RSI : (Parse32 ? X86::ESI : X86::SI); + MCRegister Basereg = + is64BitMode() ? X86::RSI : (Parse32 ? X86::ESI : X86::SI); const MCExpr *Disp = MCConstantExpr::create(0, getContext()); return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp, /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1, @@ -1617,15 +1619,16 @@ std::unique_ptr X86AsmParser::DefaultMemSIOperand(SMLoc Loc) { std::unique_ptr X86AsmParser::DefaultMemDIOperand(SMLoc Loc) { bool Parse32 = is32BitMode() || Code16GCC; - unsigned Basereg = is64BitMode() ? X86::RDI : (Parse32 ? X86::EDI : X86::DI); + MCRegister Basereg = + is64BitMode() ? X86::RDI : (Parse32 ? X86::EDI : X86::DI); const MCExpr *Disp = MCConstantExpr::create(0, getContext()); return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp, /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0); } -bool X86AsmParser::IsSIReg(unsigned Reg) { - switch (Reg) { +bool X86AsmParser::IsSIReg(MCRegister Reg) { + switch (Reg.id()) { default: llvm_unreachable("Only (R|E)SI and (R|E)DI are expected!"); case X86::RSI: case X86::ESI: @@ -1638,8 +1641,7 @@ bool X86AsmParser::IsSIReg(unsigned Reg) { } } -unsigned X86AsmParser::GetSIDIForRegClass(unsigned RegClassID, unsigned Reg, - bool IsSIReg) { +MCRegister X86AsmParser::GetSIDIForRegClass(unsigned RegClassID, bool IsSIReg) { switch (RegClassID) { default: llvm_unreachable("Unexpected register class"); case X86::GR64RegClassID: @@ -1690,8 +1692,8 @@ bool X86AsmParser::VerifyAndAdjustOperands(OperandVector &OrigOperands, // Return false and let a normal complaint about bogus operands happen return false; - unsigned OrigReg = OrigOp.Mem.BaseReg; - unsigned FinalReg = FinalOp.Mem.BaseReg; + MCRegister OrigReg = OrigOp.Mem.BaseReg; + MCRegister FinalReg = FinalOp.Mem.BaseReg; // If we've already encounterd a register class, make sure all register // bases are of the same register class @@ -1713,7 +1715,7 @@ bool X86AsmParser::VerifyAndAdjustOperands(OperandVector &OrigOperands, return false; bool IsSI = IsSIReg(FinalReg); - FinalReg = GetSIDIForRegClass(RegClassID, FinalReg, IsSI); + FinalReg = GetSIDIForRegClass(RegClassID, IsSI); if (FinalReg != OrigReg) { std::string RegName = IsSI ? "ES:(R|E)SI" : "ES:(R|E)DI"; @@ -1753,13 +1755,11 @@ bool X86AsmParser::parseOperand(OperandVector &Operands, StringRef Name) { return parseATTOperand(Operands); } -bool X86AsmParser::CreateMemForMSInlineAsm(unsigned SegReg, const MCExpr *Disp, - unsigned BaseReg, unsigned IndexReg, - unsigned Scale, bool NonAbsMem, - SMLoc Start, SMLoc End, - unsigned Size, StringRef Identifier, - const InlineAsmIdentifierInfo &Info, - OperandVector &Operands) { +bool X86AsmParser::CreateMemForMSInlineAsm( + MCRegister SegReg, const MCExpr *Disp, MCRegister BaseReg, + MCRegister IndexReg, unsigned Scale, bool NonAbsMem, SMLoc Start, SMLoc End, + unsigned Size, StringRef Identifier, const InlineAsmIdentifierInfo &Info, + OperandVector &Operands) { // If we found a decl other than a VarDecl, then assume it is a FuncDecl or // some other label reference. if (Info.isKind(InlineAsmIdentifierInfo::IK_Label)) { @@ -2651,10 +2651,10 @@ bool X86AsmParser::parseIntelOperand(OperandVector &Operands, StringRef Name) { } StringRef ErrMsg; - unsigned BaseReg = SM.getBaseReg(); - unsigned IndexReg = SM.getIndexReg(); + MCRegister BaseReg = SM.getBaseReg(); + MCRegister IndexReg = SM.getIndexReg(); if (IndexReg && BaseReg == X86::RIP) - BaseReg = 0; + BaseReg = MCRegister(); unsigned Scale = SM.getScale(); if (!PtrInOperand) Size = SM.getElementSize() << 3; @@ -2703,7 +2703,7 @@ bool X86AsmParser::parseIntelOperand(OperandVector &Operands, StringRef Name) { // When parsing x64 MS-style assembly, all non-absolute references to a named // variable default to RIP-relative. - unsigned DefaultBaseReg = X86::NoRegister; + MCRegister DefaultBaseReg; bool MaybeDirectBranchDest = true; if (Parser.isParsingMasm()) { @@ -2738,7 +2738,7 @@ bool X86AsmParser::parseIntelOperand(OperandVector &Operands, StringRef Name) { MaybeDirectBranchDest = false; } - if ((BaseReg || IndexReg || RegNo || DefaultBaseReg != X86::NoRegister)) + if ((BaseReg || IndexReg || RegNo || DefaultBaseReg)) Operands.push_back(X86Operand::CreateMem( getPointerWidth(), RegNo, Disp, BaseReg, IndexReg, Scale, Start, End, Size, DefaultBaseReg, /*SymName=*/StringRef(), /*OpDecl=*/nullptr, @@ -2782,7 +2782,7 @@ bool X86AsmParser::parseATTOperand(OperandVector &Operands) { SMLoc Loc = Parser.getTok().getLoc(), EndLoc; const MCExpr *Expr = nullptr; - unsigned Reg = 0; + MCRegister Reg; if (getLexer().isNot(AsmToken::LParen)) { // No '(' so this is either a displacement expression or a register. if (Parser.parseExpression(Expr, EndLoc)) @@ -2954,7 +2954,7 @@ bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands) { /// ParseMemOperand: 'seg : disp(basereg, indexreg, scale)'. The '%ds:' prefix /// has already been parsed if present. disp may be provided as well. -bool X86AsmParser::ParseMemOperand(unsigned SegReg, const MCExpr *Disp, +bool X86AsmParser::ParseMemOperand(MCRegister SegReg, const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc, OperandVector &Operands) { MCAsmParser &Parser = getParser(); @@ -3041,7 +3041,8 @@ bool X86AsmParser::ParseMemOperand(unsigned SegReg, const MCExpr *Disp, // If we reached here, then eat the '(' and Process // the rest of the memory operand. - unsigned BaseReg = 0, IndexReg = 0, Scale = 1; + MCRegister BaseReg, IndexReg; + unsigned Scale = 1; SMLoc BaseLoc = getLexer().getLoc(); const MCExpr *E; StringRef ErrMsg; @@ -3888,14 +3889,14 @@ bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) { uint64_t TSFlags = MII.get(Opcode).TSFlags; if (isVFCMADDCPH(Opcode) || isVFCMADDCSH(Opcode) || isVFMADDCPH(Opcode) || isVFMADDCSH(Opcode)) { - unsigned Dest = Inst.getOperand(0).getReg(); + MCRegister Dest = Inst.getOperand(0).getReg(); for (unsigned i = 2; i < Inst.getNumOperands(); i++) if (Inst.getOperand(i).isReg() && Dest == Inst.getOperand(i).getReg()) return Warning(Ops[0]->getStartLoc(), "Destination register should be " "distinct from source registers"); } else if (isVFCMULCPH(Opcode) || isVFCMULCSH(Opcode) || isVFMULCPH(Opcode) || isVFMULCSH(Opcode)) { - unsigned Dest = Inst.getOperand(0).getReg(); + MCRegister Dest = Inst.getOperand(0).getReg(); // The mask variants have different operand list. Scan from the third // operand to avoid emitting incorrect warning. // VFMULCPHZrr Dest, Src1, Src2 @@ -3909,8 +3910,9 @@ bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) { } else if (isV4FMADDPS(Opcode) || isV4FMADDSS(Opcode) || isV4FNMADDPS(Opcode) || isV4FNMADDSS(Opcode) || isVP4DPWSSDS(Opcode) || isVP4DPWSSD(Opcode)) { - unsigned Src2 = Inst.getOperand(Inst.getNumOperands() - - X86::AddrNumOperands - 1).getReg(); + MCRegister Src2 = + Inst.getOperand(Inst.getNumOperands() - X86::AddrNumOperands - 1) + .getReg(); unsigned Src2Enc = MRI->getEncodingValue(Src2); if (Src2Enc % 4 != 0) { StringRef RegName = X86IntelInstPrinter::getRegisterName(Src2); @@ -3946,9 +3948,9 @@ bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) { } else if (isTCMMIMFP16PS(Opcode) || isTCMMRLFP16PS(Opcode) || isTDPBF16PS(Opcode) || isTDPFP16PS(Opcode) || isTDPBSSD(Opcode) || isTDPBSUD(Opcode) || isTDPBUSD(Opcode) || isTDPBUUD(Opcode)) { - unsigned SrcDest = Inst.getOperand(0).getReg(); - unsigned Src1 = Inst.getOperand(2).getReg(); - unsigned Src2 = Inst.getOperand(3).getReg(); + MCRegister SrcDest = Inst.getOperand(0).getReg(); + MCRegister Src1 = Inst.getOperand(2).getReg(); + MCRegister Src2 = Inst.getOperand(3).getReg(); if (SrcDest == Src1 || SrcDest == Src2 || Src1 == Src2) return Error(Ops[0]->getStartLoc(), "all tmm registers must be distinct"); } @@ -3956,14 +3958,14 @@ bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) { // Check that we aren't mixing AH/BH/CH/DH with REX prefix. We only need to // check this with the legacy encoding, VEX/EVEX/XOP don't use REX. if ((TSFlags & X86II::EncodingMask) == 0) { - MCPhysReg HReg = X86::NoRegister; + MCRegister HReg; bool UsesRex = TSFlags & X86II::REX_W; unsigned NumOps = Inst.getNumOperands(); for (unsigned i = 0; i != NumOps; ++i) { const MCOperand &MO = Inst.getOperand(i); if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); + MCRegister Reg = MO.getReg(); if (Reg == X86::AH || Reg == X86::BH || Reg == X86::CH || Reg == X86::DH) HReg = Reg; if (X86II::isX86_64NonExtLowByteReg(Reg) || @@ -3971,7 +3973,7 @@ bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) { UsesRex = true; } - if (UsesRex && HReg != X86::NoRegister) { + if (UsesRex && HReg) { StringRef RegName = X86IntelInstPrinter::getRegisterName(HReg); return Error(Ops[0]->getStartLoc(), "can't encode '" + RegName + "' in an instruction requiring " @@ -4022,7 +4024,7 @@ void X86AsmParser::applyLVICFIMitigation(MCInst &Inst, MCStreamer &Out) { case X86::RETI64: { MCInst ShlInst, FenceInst; bool Parse32 = is32BitMode() || Code16GCC; - unsigned Basereg = + MCRegister Basereg = is64BitMode() ? X86::RSP : (Parse32 ? X86::ESP : X86::SP); const MCExpr *Disp = MCConstantExpr::create(0, getContext()); auto ShlMemOp = X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp, diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 988966fa6a6c4..6cf37836f921d 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -1549,6 +1549,19 @@ def ProcessorFeatures { FeatureVPOPCNTDQ]; list ZN4Features = !listconcat(ZN3Features, ZN4AdditionalFeatures); + + + list ZN5Tuning = ZN4Tuning; + list ZN5AdditionalFeatures = [FeatureVNNI, + FeatureMOVDIRI, + FeatureMOVDIR64B, + FeatureVP2INTERSECT, + FeaturePREFETCHI, + FeatureAVXVNNI + ]; + list ZN5Features = + !listconcat(ZN4Features, ZN5AdditionalFeatures); + } //===----------------------------------------------------------------------===// @@ -1898,6 +1911,8 @@ def : ProcModel<"znver3", Znver3Model, ProcessorFeatures.ZN3Features, ProcessorFeatures.ZN3Tuning>; def : ProcModel<"znver4", Znver4Model, ProcessorFeatures.ZN4Features, ProcessorFeatures.ZN4Tuning>; +def : ProcModel<"znver5", Znver4Model, ProcessorFeatures.ZN5Features, + ProcessorFeatures.ZN5Tuning>; def : Proc<"geode", [FeatureX87, FeatureCX8, FeatureMMX, FeaturePRFCHW], [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 3c5b952ff62e2..3597b864705ef 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -9927,11 +9927,11 @@ static SDValue getMaskNode(SDValue Mask, MVT MaskVT, const SDLoc &dl); // X86 has dedicated shuffle that can be lowered to VEXPAND -static SDValue lowerShuffleToEXPAND(const SDLoc &DL, MVT VT, - const APInt &Zeroable, - ArrayRef Mask, SDValue &V1, - SDValue &V2, SelectionDAG &DAG, - const X86Subtarget &Subtarget) { +static SDValue lowerShuffleWithEXPAND(const SDLoc &DL, MVT VT, SDValue V1, + SDValue V2, ArrayRef Mask, + const APInt &Zeroable, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { bool IsLeftZeroSide = true; if (!isNonZeroElementsInOrder(Zeroable, Mask, V1.getValueType(), IsLeftZeroSide)) @@ -15966,8 +15966,8 @@ static SDValue lowerV4F64Shuffle(const SDLoc &DL, ArrayRef Mask, // If we have VLX support, we can use VEXPAND. if (Subtarget.hasVLX()) - if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v4f64, Zeroable, Mask, V1, V2, - DAG, Subtarget)) + if (SDValue V = lowerShuffleWithEXPAND(DL, MVT::v4f64, V1, V2, Mask, + Zeroable, Subtarget, DAG)) return V; // If we have AVX2 then we always want to lower with a blend because an v4 we @@ -16046,8 +16046,8 @@ static SDValue lowerV4I64Shuffle(const SDLoc &DL, ArrayRef Mask, Zeroable, Subtarget, DAG)) return Rotate; - if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v4i64, Zeroable, Mask, V1, V2, - DAG, Subtarget)) + if (SDValue V = lowerShuffleWithEXPAND(DL, MVT::v4i64, V1, V2, Mask, + Zeroable, Subtarget, DAG)) return V; } @@ -16184,8 +16184,8 @@ static SDValue lowerV8F32Shuffle(const SDLoc &DL, ArrayRef Mask, // If we have VLX support, we can use VEXPAND. if (Subtarget.hasVLX()) - if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v8f32, Zeroable, Mask, V1, V2, - DAG, Subtarget)) + if (SDValue V = lowerShuffleWithEXPAND(DL, MVT::v8f32, V1, V2, Mask, + Zeroable, Subtarget, DAG)) return V; // Try to match an interleave of two v8f32s and lower them as unpck and @@ -16308,8 +16308,8 @@ static SDValue lowerV8I32Shuffle(const SDLoc &DL, ArrayRef Mask, Zeroable, Subtarget, DAG)) return Rotate; - if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v8i32, Zeroable, Mask, V1, V2, - DAG, Subtarget)) + if (SDValue V = lowerShuffleWithEXPAND(DL, MVT::v8i32, V1, V2, Mask, + Zeroable, Subtarget, DAG)) return V; } @@ -16827,8 +16827,8 @@ static SDValue lowerV8F64Shuffle(const SDLoc &DL, ArrayRef Mask, Zeroable, Subtarget, DAG)) return Op; - if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v8f64, Zeroable, Mask, V1, V2, - DAG, Subtarget)) + if (SDValue V = lowerShuffleWithEXPAND(DL, MVT::v8f64, V1, V2, Mask, Zeroable, + Subtarget, DAG)) return V; if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8f64, V1, V2, Mask, @@ -16898,8 +16898,8 @@ static SDValue lowerV16F32Shuffle(const SDLoc &DL, ArrayRef Mask, } // If we have AVX512F support, we can use VEXPAND. - if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v16f32, Zeroable, Mask, - V1, V2, DAG, Subtarget)) + if (SDValue V = lowerShuffleWithEXPAND(DL, MVT::v16f32, V1, V2, Mask, + Zeroable, Subtarget, DAG)) return V; return lowerShuffleWithPERMV(DL, MVT::v16f32, Mask, V1, V2, Subtarget, DAG); @@ -16967,8 +16967,8 @@ static SDValue lowerV8I64Shuffle(const SDLoc &DL, ArrayRef Mask, return Unpck; // If we have AVX512F support, we can use VEXPAND. - if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v8i64, Zeroable, Mask, V1, V2, - DAG, Subtarget)) + if (SDValue V = lowerShuffleWithEXPAND(DL, MVT::v8i64, V1, V2, Mask, Zeroable, + Subtarget, DAG)) return V; if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8i64, V1, V2, Mask, @@ -17064,8 +17064,8 @@ static SDValue lowerV16I32Shuffle(const SDLoc &DL, ArrayRef Mask, return V; // If we have AVX512F support, we can use VEXPAND. - if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v16i32, Zeroable, Mask, V1, V2, - DAG, Subtarget)) + if (SDValue V = lowerShuffleWithEXPAND(DL, MVT::v16i32, V1, V2, Mask, + Zeroable, Subtarget, DAG)) return V; if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v16i32, V1, V2, Mask, diff --git a/llvm/lib/Target/X86/X86PfmCounters.td b/llvm/lib/Target/X86/X86PfmCounters.td index 2b1dac411c992..c30e989cdc2af 100644 --- a/llvm/lib/Target/X86/X86PfmCounters.td +++ b/llvm/lib/Target/X86/X86PfmCounters.td @@ -350,3 +350,4 @@ def ZnVer4PfmCounters : ProcPfmCounters { let ValidationCounters = DefaultAMDPfmValidationCounters; } def : PfmCountersBinding<"znver4", ZnVer4PfmCounters>; +def : PfmCountersBinding<"znver5", ZnVer4PfmCounters>; diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp index 986b9a211ce6c..b2c4f9ee00293 100644 --- a/llvm/lib/TargetParser/Host.cpp +++ b/llvm/lib/TargetParser/Host.cpp @@ -1151,6 +1151,25 @@ static const char *getAMDProcessorTypeAndSubtype(unsigned Family, break; // "znver4" } break; // family 19h + case 26: + CPU = "znver5"; + *Type = X86::AMDFAM1AH; + if (Model <= 0x77) { + // Models 00h-0Fh (Breithorn). + // Models 10h-1Fh (Breithorn-Dense). + // Models 20h-2Fh (Strix 1). + // Models 30h-37h (Strix 2). + // Models 38h-3Fh (Strix 3). + // Models 40h-4Fh (Granite Ridge). + // Models 50h-5Fh (Weisshorn). + // Models 60h-6Fh (Krackan1). + // Models 70h-77h (Sarlak). + CPU = "znver5"; + *Subtype = X86::AMDFAM1AH_ZNVER5; + break; // "znver5" + } + break; + default: break; // Unknown AMD CPU. } diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp index 57bda0651ea82..09d4312918acf 100644 --- a/llvm/lib/TargetParser/X86TargetParser.cpp +++ b/llvm/lib/TargetParser/X86TargetParser.cpp @@ -238,6 +238,10 @@ static constexpr FeatureBitset FeaturesZNVER4 = FeatureAVX512BITALG | FeatureAVX512VPOPCNTDQ | FeatureAVX512BF16 | FeatureGFNI | FeatureSHSTK; +static constexpr FeatureBitset FeaturesZNVER5 = + FeaturesZNVER4 | FeatureAVXVNNI | FeatureMOVDIRI | FeatureMOVDIR64B | + FeatureAVX512VP2INTERSECT | FeaturePREFETCHI | FeatureAVXVNNI; + // D151696 tranplanted Mangling and OnlyForCPUDispatchSpecific from // X86TargetParser.def to here. They are assigned by following ways: // 1. Copy the mangling from the original CPU_SPEICIFC MACROs. If no, assign @@ -417,6 +421,7 @@ constexpr ProcInfo Processors[] = { { {"znver2"}, CK_ZNVER2, FEATURE_AVX2, FeaturesZNVER2, '\0', false }, { {"znver3"}, CK_ZNVER3, FEATURE_AVX2, FeaturesZNVER3, '\0', false }, { {"znver4"}, CK_ZNVER4, FEATURE_AVX512VBMI2, FeaturesZNVER4, '\0', false }, + { {"znver5"}, CK_ZNVER5, FEATURE_AVX512VP2INTERSECT, FeaturesZNVER5, '\0', false }, // Generic 64-bit processor. { {"x86-64"}, CK_x86_64, FEATURE_SSE2 , FeaturesX86_64, '\0', false }, { {"x86-64-v2"}, CK_x86_64_v2, FEATURE_SSE4_2 , FeaturesX86_64_V2, '\0', false }, diff --git a/llvm/lib/Transforms/Coroutines/CoroEarly.cpp b/llvm/lib/Transforms/Coroutines/CoroEarly.cpp index 13b6680264c87..5f8efd1a8f32e 100644 --- a/llvm/lib/Transforms/Coroutines/CoroEarly.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroEarly.cpp @@ -8,6 +8,7 @@ #include "llvm/Transforms/Coroutines/CoroEarly.h" #include "CoroInternal.h" +#include "CoroShape.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" diff --git a/llvm/lib/Transforms/Coroutines/CoroInternal.h b/llvm/lib/Transforms/Coroutines/CoroInternal.h index 891798f53b2d0..fcbd31878bdea 100644 --- a/llvm/lib/Transforms/Coroutines/CoroInternal.h +++ b/llvm/lib/Transforms/Coroutines/CoroInternal.h @@ -12,6 +12,7 @@ #define LLVM_LIB_TRANSFORMS_COROUTINES_COROINTERNAL_H #include "CoroInstr.h" +#include "CoroShape.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/IRBuilder.h" @@ -58,229 +59,6 @@ struct LowererBase { CallInst *makeSubFnCall(Value *Arg, int Index, Instruction *InsertPt); }; -enum class ABI { - /// The "resume-switch" lowering, where there are separate resume and - /// destroy functions that are shared between all suspend points. The - /// coroutine frame implicitly stores the resume and destroy functions, - /// the current index, and any promise value. - Switch, - - /// The "returned-continuation" lowering, where each suspend point creates a - /// single continuation function that is used for both resuming and - /// destroying. Does not support promises. - Retcon, - - /// The "unique returned-continuation" lowering, where each suspend point - /// creates a single continuation function that is used for both resuming - /// and destroying. Does not support promises. The function is known to - /// suspend at most once during its execution, and the return value of - /// the continuation is void. - RetconOnce, - - /// The "async continuation" lowering, where each suspend point creates a - /// single continuation function. The continuation function is available as an - /// intrinsic. - Async, -}; - -// Holds structural Coroutine Intrinsics for a particular function and other -// values used during CoroSplit pass. -struct LLVM_LIBRARY_VISIBILITY Shape { - CoroBeginInst *CoroBegin; - SmallVector CoroEnds; - SmallVector CoroSizes; - SmallVector CoroAligns; - SmallVector CoroSuspends; - SmallVector SwiftErrorOps; - SmallVector CoroAwaitSuspends; - SmallVector SymmetricTransfers; - - // Field indexes for special fields in the switch lowering. - struct SwitchFieldIndex { - enum { - Resume, - Destroy - - // The promise field is always at a fixed offset from the start of - // frame given its type, but the index isn't a constant for all - // possible frames. - - // The switch-index field isn't at a fixed offset or index, either; - // we just work it in where it fits best. - }; - }; - - coro::ABI ABI; - - StructType *FrameTy; - Align FrameAlign; - uint64_t FrameSize; - Value *FramePtr; - BasicBlock *AllocaSpillBlock; - - /// This would only be true if optimization are enabled. - bool OptimizeFrame; - - struct SwitchLoweringStorage { - SwitchInst *ResumeSwitch; - AllocaInst *PromiseAlloca; - BasicBlock *ResumeEntryBlock; - unsigned IndexField; - unsigned IndexAlign; - unsigned IndexOffset; - bool HasFinalSuspend; - bool HasUnwindCoroEnd; - }; - - struct RetconLoweringStorage { - Function *ResumePrototype; - Function *Alloc; - Function *Dealloc; - BasicBlock *ReturnBlock; - bool IsFrameInlineInStorage; - }; - - struct AsyncLoweringStorage { - Value *Context; - CallingConv::ID AsyncCC; - unsigned ContextArgNo; - uint64_t ContextHeaderSize; - uint64_t ContextAlignment; - uint64_t FrameOffset; // Start of the frame. - uint64_t ContextSize; // Includes frame size. - GlobalVariable *AsyncFuncPointer; - - Align getContextAlignment() const { return Align(ContextAlignment); } - }; - - union { - SwitchLoweringStorage SwitchLowering; - RetconLoweringStorage RetconLowering; - AsyncLoweringStorage AsyncLowering; - }; - - CoroIdInst *getSwitchCoroId() const { - assert(ABI == coro::ABI::Switch); - return cast(CoroBegin->getId()); - } - - AnyCoroIdRetconInst *getRetconCoroId() const { - assert(ABI == coro::ABI::Retcon || - ABI == coro::ABI::RetconOnce); - return cast(CoroBegin->getId()); - } - - CoroIdAsyncInst *getAsyncCoroId() const { - assert(ABI == coro::ABI::Async); - return cast(CoroBegin->getId()); - } - - unsigned getSwitchIndexField() const { - assert(ABI == coro::ABI::Switch); - assert(FrameTy && "frame type not assigned"); - return SwitchLowering.IndexField; - } - IntegerType *getIndexType() const { - assert(ABI == coro::ABI::Switch); - assert(FrameTy && "frame type not assigned"); - return cast(FrameTy->getElementType(getSwitchIndexField())); - } - ConstantInt *getIndex(uint64_t Value) const { - return ConstantInt::get(getIndexType(), Value); - } - - PointerType *getSwitchResumePointerType() const { - assert(ABI == coro::ABI::Switch); - assert(FrameTy && "frame type not assigned"); - return cast(FrameTy->getElementType(SwitchFieldIndex::Resume)); - } - - FunctionType *getResumeFunctionType() const { - switch (ABI) { - case coro::ABI::Switch: - return FunctionType::get(Type::getVoidTy(FrameTy->getContext()), - PointerType::getUnqual(FrameTy->getContext()), - /*IsVarArg=*/false); - case coro::ABI::Retcon: - case coro::ABI::RetconOnce: - return RetconLowering.ResumePrototype->getFunctionType(); - case coro::ABI::Async: - // Not used. The function type depends on the active suspend. - return nullptr; - } - - llvm_unreachable("Unknown coro::ABI enum"); - } - - ArrayRef getRetconResultTypes() const { - assert(ABI == coro::ABI::Retcon || - ABI == coro::ABI::RetconOnce); - auto FTy = CoroBegin->getFunction()->getFunctionType(); - - // The safety of all this is checked by checkWFRetconPrototype. - if (auto STy = dyn_cast(FTy->getReturnType())) { - return STy->elements().slice(1); - } else { - return ArrayRef(); - } - } - - ArrayRef getRetconResumeTypes() const { - assert(ABI == coro::ABI::Retcon || - ABI == coro::ABI::RetconOnce); - - // The safety of all this is checked by checkWFRetconPrototype. - auto FTy = RetconLowering.ResumePrototype->getFunctionType(); - return FTy->params().slice(1); - } - - CallingConv::ID getResumeFunctionCC() const { - switch (ABI) { - case coro::ABI::Switch: - return CallingConv::Fast; - - case coro::ABI::Retcon: - case coro::ABI::RetconOnce: - return RetconLowering.ResumePrototype->getCallingConv(); - case coro::ABI::Async: - return AsyncLowering.AsyncCC; - } - llvm_unreachable("Unknown coro::ABI enum"); - } - - AllocaInst *getPromiseAlloca() const { - if (ABI == coro::ABI::Switch) - return SwitchLowering.PromiseAlloca; - return nullptr; - } - - BasicBlock::iterator getInsertPtAfterFramePtr() const { - if (auto *I = dyn_cast(FramePtr)) { - BasicBlock::iterator It = std::next(I->getIterator()); - It.setHeadBit(true); // Copy pre-RemoveDIs behaviour. - return It; - } - return cast(FramePtr)->getParent()->getEntryBlock().begin(); - } - - /// Allocate memory according to the rules of the active lowering. - /// - /// \param CG - if non-null, will be updated for the new call - Value *emitAlloc(IRBuilder<> &Builder, Value *Size, CallGraph *CG) const; - - /// Deallocate memory according to the rules of the active lowering. - /// - /// \param CG - if non-null, will be updated for the new call - void emitDealloc(IRBuilder<> &Builder, Value *Ptr, CallGraph *CG) const; - - Shape() = default; - explicit Shape(Function &F, bool OptimizeFrame = false) - : OptimizeFrame(OptimizeFrame) { - buildFrom(F); - } - void buildFrom(Function &F); -}; - bool defaultMaterializable(Instruction &V); void normalizeCoroutine(Function &F, coro::Shape &Shape, TargetTransformInfo &TTI); diff --git a/llvm/lib/Transforms/Coroutines/CoroShape.h b/llvm/lib/Transforms/Coroutines/CoroShape.h new file mode 100644 index 0000000000000..3d1b38082173d --- /dev/null +++ b/llvm/lib/Transforms/Coroutines/CoroShape.h @@ -0,0 +1,288 @@ +//===- CoroShape.h - Coroutine info for lowering --------------*- C++ -*---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// This file declares the shape info struct that is required by many coroutine +// utility methods. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_COROUTINES_COROSHAPE_H +#define LLVM_TRANSFORMS_COROUTINES_COROSHAPE_H + +#include "CoroInstr.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class CallGraph; + +namespace coro { + +enum class ABI { + /// The "resume-switch" lowering, where there are separate resume and + /// destroy functions that are shared between all suspend points. The + /// coroutine frame implicitly stores the resume and destroy functions, + /// the current index, and any promise value. + Switch, + + /// The "returned-continuation" lowering, where each suspend point creates a + /// single continuation function that is used for both resuming and + /// destroying. Does not support promises. + Retcon, + + /// The "unique returned-continuation" lowering, where each suspend point + /// creates a single continuation function that is used for both resuming + /// and destroying. Does not support promises. The function is known to + /// suspend at most once during its execution, and the return value of + /// the continuation is void. + RetconOnce, + + /// The "async continuation" lowering, where each suspend point creates a + /// single continuation function. The continuation function is available as an + /// intrinsic. + Async, +}; + +// Holds structural Coroutine Intrinsics for a particular function and other +// values used during CoroSplit pass. +struct LLVM_LIBRARY_VISIBILITY Shape { + CoroBeginInst *CoroBegin = nullptr; + SmallVector CoroEnds; + SmallVector CoroSizes; + SmallVector CoroAligns; + SmallVector CoroSuspends; + SmallVector CoroAwaitSuspends; + SmallVector SymmetricTransfers; + + // Values invalidated by invalidateCoroutine() and tidyCoroutine() + SmallVector CoroFrames; + SmallVector UnusedCoroSaves; + + // Values invalidated by replaceSwiftErrorOps() + SmallVector SwiftErrorOps; + + void clear() { + CoroBegin = nullptr; + CoroEnds.clear(); + CoroSizes.clear(); + CoroAligns.clear(); + CoroSuspends.clear(); + CoroAwaitSuspends.clear(); + SymmetricTransfers.clear(); + + CoroFrames.clear(); + UnusedCoroSaves.clear(); + + SwiftErrorOps.clear(); + + FrameTy = nullptr; + FramePtr = nullptr; + AllocaSpillBlock = nullptr; + } + + // Scan the function and collect the above intrinsics for later processing + void analyze(Function &F); + // If for some reason, we were not able to find coro.begin, bailout. + void invalidateCoroutine(Function &F); + // Perform ABI related initial transformation + void initABI(); + // Remove orphaned and unnecessary intrinsics + void tidyCoroutine(); + + // Field indexes for special fields in the switch lowering. + struct SwitchFieldIndex { + enum { + Resume, + Destroy + + // The promise field is always at a fixed offset from the start of + // frame given its type, but the index isn't a constant for all + // possible frames. + + // The switch-index field isn't at a fixed offset or index, either; + // we just work it in where it fits best. + }; + }; + + coro::ABI ABI; + + StructType *FrameTy = nullptr; + Align FrameAlign; + uint64_t FrameSize = 0; + Value *FramePtr = nullptr; + BasicBlock *AllocaSpillBlock = nullptr; + + /// This would only be true if optimization are enabled. + bool OptimizeFrame; + + struct SwitchLoweringStorage { + SwitchInst *ResumeSwitch; + AllocaInst *PromiseAlloca; + BasicBlock *ResumeEntryBlock; + unsigned IndexField; + unsigned IndexAlign; + unsigned IndexOffset; + bool HasFinalSuspend; + bool HasUnwindCoroEnd; + }; + + struct RetconLoweringStorage { + Function *ResumePrototype; + Function *Alloc; + Function *Dealloc; + BasicBlock *ReturnBlock; + bool IsFrameInlineInStorage; + }; + + struct AsyncLoweringStorage { + Value *Context; + CallingConv::ID AsyncCC; + unsigned ContextArgNo; + uint64_t ContextHeaderSize; + uint64_t ContextAlignment; + uint64_t FrameOffset; // Start of the frame. + uint64_t ContextSize; // Includes frame size. + GlobalVariable *AsyncFuncPointer; + + Align getContextAlignment() const { return Align(ContextAlignment); } + }; + + union { + SwitchLoweringStorage SwitchLowering; + RetconLoweringStorage RetconLowering; + AsyncLoweringStorage AsyncLowering; + }; + + CoroIdInst *getSwitchCoroId() const { + assert(ABI == coro::ABI::Switch); + return cast(CoroBegin->getId()); + } + + AnyCoroIdRetconInst *getRetconCoroId() const { + assert(ABI == coro::ABI::Retcon || ABI == coro::ABI::RetconOnce); + return cast(CoroBegin->getId()); + } + + CoroIdAsyncInst *getAsyncCoroId() const { + assert(ABI == coro::ABI::Async); + return cast(CoroBegin->getId()); + } + + unsigned getSwitchIndexField() const { + assert(ABI == coro::ABI::Switch); + assert(FrameTy && "frame type not assigned"); + return SwitchLowering.IndexField; + } + IntegerType *getIndexType() const { + assert(ABI == coro::ABI::Switch); + assert(FrameTy && "frame type not assigned"); + return cast(FrameTy->getElementType(getSwitchIndexField())); + } + ConstantInt *getIndex(uint64_t Value) const { + return ConstantInt::get(getIndexType(), Value); + } + + PointerType *getSwitchResumePointerType() const { + assert(ABI == coro::ABI::Switch); + assert(FrameTy && "frame type not assigned"); + return cast(FrameTy->getElementType(SwitchFieldIndex::Resume)); + } + + FunctionType *getResumeFunctionType() const { + switch (ABI) { + case coro::ABI::Switch: + return FunctionType::get(Type::getVoidTy(FrameTy->getContext()), + PointerType::getUnqual(FrameTy->getContext()), + /*IsVarArg=*/false); + case coro::ABI::Retcon: + case coro::ABI::RetconOnce: + return RetconLowering.ResumePrototype->getFunctionType(); + case coro::ABI::Async: + // Not used. The function type depends on the active suspend. + return nullptr; + } + + llvm_unreachable("Unknown coro::ABI enum"); + } + + ArrayRef getRetconResultTypes() const { + assert(ABI == coro::ABI::Retcon || ABI == coro::ABI::RetconOnce); + auto FTy = CoroBegin->getFunction()->getFunctionType(); + + // The safety of all this is checked by checkWFRetconPrototype. + if (auto STy = dyn_cast(FTy->getReturnType())) { + return STy->elements().slice(1); + } else { + return ArrayRef(); + } + } + + ArrayRef getRetconResumeTypes() const { + assert(ABI == coro::ABI::Retcon || ABI == coro::ABI::RetconOnce); + + // The safety of all this is checked by checkWFRetconPrototype. + auto FTy = RetconLowering.ResumePrototype->getFunctionType(); + return FTy->params().slice(1); + } + + CallingConv::ID getResumeFunctionCC() const { + switch (ABI) { + case coro::ABI::Switch: + return CallingConv::Fast; + + case coro::ABI::Retcon: + case coro::ABI::RetconOnce: + return RetconLowering.ResumePrototype->getCallingConv(); + case coro::ABI::Async: + return AsyncLowering.AsyncCC; + } + llvm_unreachable("Unknown coro::ABI enum"); + } + + AllocaInst *getPromiseAlloca() const { + if (ABI == coro::ABI::Switch) + return SwitchLowering.PromiseAlloca; + return nullptr; + } + + BasicBlock::iterator getInsertPtAfterFramePtr() const { + if (auto *I = dyn_cast(FramePtr)) { + BasicBlock::iterator It = std::next(I->getIterator()); + It.setHeadBit(true); // Copy pre-RemoveDIs behaviour. + return It; + } + return cast(FramePtr)->getParent()->getEntryBlock().begin(); + } + + /// Allocate memory according to the rules of the active lowering. + /// + /// \param CG - if non-null, will be updated for the new call + Value *emitAlloc(IRBuilder<> &Builder, Value *Size, CallGraph *CG) const; + + /// Deallocate memory according to the rules of the active lowering. + /// + /// \param CG - if non-null, will be updated for the new call + void emitDealloc(IRBuilder<> &Builder, Value *Ptr, CallGraph *CG) const; + + Shape() = default; + explicit Shape(Function &F, bool OptimizeFrame = false) + : OptimizeFrame(OptimizeFrame) { + analyze(F); + if (!CoroBegin) { + invalidateCoroutine(F); + return; + } + initABI(); + tidyCoroutine(); + } +}; + +} // end namespace coro + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_COROUTINES_COROSHAPE_H diff --git a/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/llvm/lib/Transforms/Coroutines/Coroutines.cpp index cdc442bc819c3..c1042b21883f6 100644 --- a/llvm/lib/Transforms/Coroutines/Coroutines.cpp +++ b/llvm/lib/Transforms/Coroutines/Coroutines.cpp @@ -12,6 +12,7 @@ #include "CoroInstr.h" #include "CoroInternal.h" +#include "CoroShape.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/CallGraph.h" @@ -176,17 +177,6 @@ void coro::suppressCoroAllocs(LLVMContext &Context, } } -static void clear(coro::Shape &Shape) { - Shape.CoroBegin = nullptr; - Shape.CoroEnds.clear(); - Shape.CoroSizes.clear(); - Shape.CoroSuspends.clear(); - - Shape.FrameTy = nullptr; - Shape.FramePtr = nullptr; - Shape.AllocaSpillBlock = nullptr; -} - static CoroSaveInst *createCoroSave(CoroBeginInst *CoroBegin, CoroSuspendInst *SuspendInst) { Module *M = SuspendInst->getModule(); @@ -199,13 +189,12 @@ static CoroSaveInst *createCoroSave(CoroBeginInst *CoroBegin, } // Collect "interesting" coroutine intrinsics. -void coro::Shape::buildFrom(Function &F) { +void coro::Shape::analyze(Function &F) { + clear(); + bool HasFinalSuspend = false; bool HasUnwindCoroEnd = false; size_t FinalSuspendIndex = 0; - clear(*this); - SmallVector CoroFrames; - SmallVector UnusedCoroSaves; for (Instruction &I : instructions(F)) { // FIXME: coro_await_suspend_* are not proper `IntrinisicInst`s @@ -297,8 +286,58 @@ void coro::Shape::buildFrom(Function &F) { } } - // If for some reason, we were not able to find coro.begin, bailout. - if (!CoroBegin) { + // If there is no CoroBegin then this is not a coroutine. + if (!CoroBegin) + return; + + // Determination of ABI and initializing lowering info + auto Id = CoroBegin->getId(); + auto IntrID = Id->getIntrinsicID(); + if (IntrID == Intrinsic::coro_id) { + ABI = coro::ABI::Switch; + SwitchLowering.HasFinalSuspend = HasFinalSuspend; + SwitchLowering.HasUnwindCoroEnd = HasUnwindCoroEnd; + + auto SwitchId = getSwitchCoroId(); + SwitchLowering.ResumeSwitch = nullptr; + SwitchLowering.PromiseAlloca = SwitchId->getPromise(); + SwitchLowering.ResumeEntryBlock = nullptr; + + // Move final suspend to the last element in the CoroSuspends vector. + if (SwitchLowering.HasFinalSuspend && + FinalSuspendIndex != CoroSuspends.size() - 1) + std::swap(CoroSuspends[FinalSuspendIndex], CoroSuspends.back()); + } else if (IntrID == Intrinsic::coro_id_async) { + ABI = coro::ABI::Async; + auto *AsyncId = getAsyncCoroId(); + AsyncId->checkWellFormed(); + AsyncLowering.Context = AsyncId->getStorage(); + AsyncLowering.ContextArgNo = AsyncId->getStorageArgumentIndex(); + AsyncLowering.ContextHeaderSize = AsyncId->getStorageSize(); + AsyncLowering.ContextAlignment = AsyncId->getStorageAlignment().value(); + AsyncLowering.AsyncFuncPointer = AsyncId->getAsyncFunctionPointer(); + AsyncLowering.AsyncCC = F.getCallingConv(); + } else if (IntrID == Intrinsic::coro_id_retcon || + IntrID == Intrinsic::coro_id_retcon_once) { + ABI = IntrID == Intrinsic::coro_id_retcon ? coro::ABI::Retcon + : coro::ABI::RetconOnce; + auto ContinuationId = getRetconCoroId(); + ContinuationId->checkWellFormed(); + auto Prototype = ContinuationId->getPrototype(); + RetconLowering.ResumePrototype = Prototype; + RetconLowering.Alloc = ContinuationId->getAllocFunction(); + RetconLowering.Dealloc = ContinuationId->getDeallocFunction(); + RetconLowering.ReturnBlock = nullptr; + RetconLowering.IsFrameInlineInStorage = false; + } else { + llvm_unreachable("coro.begin is not dependent on a coro.id call"); + } +} + +// If for some reason, we were not able to find coro.begin, bailout. +void coro::Shape::invalidateCoroutine(Function &F) { + assert(!CoroBegin); + { // Replace coro.frame which are supposed to be lowered to the result of // coro.begin with undef. auto *Undef = UndefValue::get(PointerType::get(F.getContext(), 0)); @@ -319,21 +358,13 @@ void coro::Shape::buildFrom(Function &F) { // Replace all coro.ends with unreachable instruction. for (AnyCoroEndInst *CE : CoroEnds) changeToUnreachable(CE); - - return; } +} - auto Id = CoroBegin->getId(); - switch (auto IdIntrinsic = Id->getIntrinsicID()) { - case Intrinsic::coro_id: { - auto SwitchId = cast(Id); - this->ABI = coro::ABI::Switch; - this->SwitchLowering.HasFinalSuspend = HasFinalSuspend; - this->SwitchLowering.HasUnwindCoroEnd = HasUnwindCoroEnd; - this->SwitchLowering.ResumeSwitch = nullptr; - this->SwitchLowering.PromiseAlloca = SwitchId->getPromise(); - this->SwitchLowering.ResumeEntryBlock = nullptr; - +// Perform semantic checking and initialization of the ABI +void coro::Shape::initABI() { + switch (ABI) { + case coro::ABI::Switch: { for (auto *AnySuspend : CoroSuspends) { auto Suspend = dyn_cast(AnySuspend); if (!Suspend) { @@ -348,33 +379,11 @@ void coro::Shape::buildFrom(Function &F) { } break; } - case Intrinsic::coro_id_async: { - auto *AsyncId = cast(Id); - AsyncId->checkWellFormed(); - this->ABI = coro::ABI::Async; - this->AsyncLowering.Context = AsyncId->getStorage(); - this->AsyncLowering.ContextArgNo = AsyncId->getStorageArgumentIndex(); - this->AsyncLowering.ContextHeaderSize = AsyncId->getStorageSize(); - this->AsyncLowering.ContextAlignment = - AsyncId->getStorageAlignment().value(); - this->AsyncLowering.AsyncFuncPointer = AsyncId->getAsyncFunctionPointer(); - this->AsyncLowering.AsyncCC = F.getCallingConv(); + case coro::ABI::Async: { break; }; - case Intrinsic::coro_id_retcon: - case Intrinsic::coro_id_retcon_once: { - auto ContinuationId = cast(Id); - ContinuationId->checkWellFormed(); - this->ABI = (IdIntrinsic == Intrinsic::coro_id_retcon - ? coro::ABI::Retcon - : coro::ABI::RetconOnce); - auto Prototype = ContinuationId->getPrototype(); - this->RetconLowering.ResumePrototype = Prototype; - this->RetconLowering.Alloc = ContinuationId->getAllocFunction(); - this->RetconLowering.Dealloc = ContinuationId->getDeallocFunction(); - this->RetconLowering.ReturnBlock = nullptr; - this->RetconLowering.IsFrameInlineInStorage = false; - + case coro::ABI::Retcon: + case coro::ABI::RetconOnce: { // Determine the result value types, and make sure they match up with // the values passed to the suspends. auto ResultTys = getRetconResultTypes(); @@ -407,7 +416,7 @@ void coro::Shape::buildFrom(Function &F) { #ifndef NDEBUG Suspend->dump(); - Prototype->getFunctionType()->dump(); + RetconLowering.ResumePrototype->getFunctionType()->dump(); #endif report_fatal_error("argument to coro.suspend.retcon does not " "match corresponding prototype function result"); @@ -416,14 +425,14 @@ void coro::Shape::buildFrom(Function &F) { if (SI != SE || RI != RE) { #ifndef NDEBUG Suspend->dump(); - Prototype->getFunctionType()->dump(); + RetconLowering.ResumePrototype->getFunctionType()->dump(); #endif report_fatal_error("wrong number of arguments to coro.suspend.retcon"); } // Check that the result type of the suspend matches the resume types. Type *SResultTy = Suspend->getType(); - ArrayRef SuspendResultTys; + ArrayRef SuspendResultTys; if (SResultTy->isVoidTy()) { // leave as empty array } else if (auto SResultStructTy = dyn_cast(SResultTy)) { @@ -435,7 +444,7 @@ void coro::Shape::buildFrom(Function &F) { if (SuspendResultTys.size() != ResumeTys.size()) { #ifndef NDEBUG Suspend->dump(); - Prototype->getFunctionType()->dump(); + RetconLowering.ResumePrototype->getFunctionType()->dump(); #endif report_fatal_error("wrong number of results from coro.suspend.retcon"); } @@ -443,7 +452,7 @@ void coro::Shape::buildFrom(Function &F) { if (SuspendResultTys[I] != ResumeTys[I]) { #ifndef NDEBUG Suspend->dump(); - Prototype->getFunctionType()->dump(); + RetconLowering.ResumePrototype->getFunctionType()->dump(); #endif report_fatal_error("result from coro.suspend.retcon does not " "match corresponding prototype function param"); @@ -452,23 +461,18 @@ void coro::Shape::buildFrom(Function &F) { } break; } - default: llvm_unreachable("coro.begin is not dependent on a coro.id call"); } +} +void coro::Shape::tidyCoroutine() { // The coro.free intrinsic is always lowered to the result of coro.begin. for (CoroFrameInst *CF : CoroFrames) { CF->replaceAllUsesWith(CoroBegin); CF->eraseFromParent(); } - // Move final suspend to be the last element in the CoroSuspends vector. - if (ABI == coro::ABI::Switch && - SwitchLowering.HasFinalSuspend && - FinalSuspendIndex != CoroSuspends.size() - 1) - std::swap(CoroSuspends[FinalSuspendIndex], CoroSuspends.back()); - // Remove orphaned coro.saves. for (CoroSaveInst *CoroSave : UnusedCoroSaves) CoroSave->eraseFromParent(); diff --git a/llvm/test/CodeGen/AArch64/abs.ll b/llvm/test/CodeGen/AArch64/abs.ll index 6da019a79b727..25a14ef9a49ee 100644 --- a/llvm/test/CodeGen/AArch64/abs.ll +++ b/llvm/test/CodeGen/AArch64/abs.ll @@ -280,6 +280,40 @@ entry: } declare <4 x i64> @llvm.abs.v4i64(<4 x i64>, i1) +define <2 x i128> @abs_v4i128(<2 x i128> %a){ +; CHECK-SD-LABEL: abs_v4i128: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: asr x8, x1, #63 +; CHECK-SD-NEXT: asr x9, x3, #63 +; CHECK-SD-NEXT: eor x10, x0, x8 +; CHECK-SD-NEXT: eor x11, x1, x8 +; CHECK-SD-NEXT: subs x0, x10, x8 +; CHECK-SD-NEXT: eor x10, x2, x9 +; CHECK-SD-NEXT: sbc x1, x11, x8 +; CHECK-SD-NEXT: eor x8, x3, x9 +; CHECK-SD-NEXT: subs x2, x10, x9 +; CHECK-SD-NEXT: sbc x3, x8, x9 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: abs_v4i128: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: asr x8, x1, #63 +; CHECK-GI-NEXT: asr x9, x3, #63 +; CHECK-GI-NEXT: adds x10, x0, x8 +; CHECK-GI-NEXT: adc x11, x1, x8 +; CHECK-GI-NEXT: adds x12, x2, x9 +; CHECK-GI-NEXT: eor x0, x10, x8 +; CHECK-GI-NEXT: adc x13, x3, x9 +; CHECK-GI-NEXT: eor x1, x11, x8 +; CHECK-GI-NEXT: eor x2, x12, x9 +; CHECK-GI-NEXT: eor x3, x13, x9 +; CHECK-GI-NEXT: ret +entry: + %res = call <2 x i128> @llvm.abs.v2i128(<2 x i128> %a, i1 0) + ret <2 x i128> %res +} +declare <2 x i128> @llvm.abs.v2i128(<2 x i128>, i1) + ; ===== Vectors with Non-Pow 2 Widths ===== define <3 x i8> @abs_v3i8(<3 x i8> %a){ diff --git a/llvm/test/CodeGen/AArch64/expand-select.ll b/llvm/test/CodeGen/AArch64/expand-select.ll index f8397290ab5e1..1ed2e09c6b4d4 100644 --- a/llvm/test/CodeGen/AArch64/expand-select.ll +++ b/llvm/test/CodeGen/AArch64/expand-select.ll @@ -33,24 +33,20 @@ define void @bar(i32 %In1, <2 x i96> %In2, <2 x i96> %In3, ptr %Out) { ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0x1 ; CHECK-NEXT: fmov s0, wzr -; CHECK-NEXT: ldr x11, [sp, #16] +; CHECK-NEXT: ldr x10, [sp, #16] ; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: ldp x9, x10, [sp] ; CHECK-NEXT: cmeq v0.4s, v1.4s, v0.4s -; CHECK-NEXT: dup v1.4s, v0.s[0] -; CHECK-NEXT: mov x8, v1.d[1] -; CHECK-NEXT: lsr x8, x8, #32 -; CHECK-NEXT: tst w8, #0x1 ; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: csel x10, x5, x10, ne -; CHECK-NEXT: csel x9, x4, x9, ne -; CHECK-NEXT: stur x9, [x11, #12] ; CHECK-NEXT: tst w8, #0x1 -; CHECK-NEXT: str w10, [x11, #20] -; CHECK-NEXT: csel x8, x2, x6, ne +; CHECK-NEXT: ldp x9, x8, [sp] +; CHECK-NEXT: csel x11, x2, x6, ne +; CHECK-NEXT: str x11, [x10] +; CHECK-NEXT: csel x9, x4, x9, ne +; CHECK-NEXT: csel x8, x5, x8, ne +; CHECK-NEXT: stur x9, [x10, #12] ; CHECK-NEXT: csel x9, x3, x7, ne -; CHECK-NEXT: str x8, [x11] -; CHECK-NEXT: str w9, [x11, #8] +; CHECK-NEXT: str w8, [x10, #20] +; CHECK-NEXT: str w9, [x10, #8] ; CHECK-NEXT: ret %cond = and i32 %In1, 1 %cbool = icmp eq i32 %cond, 0 diff --git a/llvm/test/CodeGen/AArch64/fcmp.ll b/llvm/test/CodeGen/AArch64/fcmp.ll index 8ca1e9ee5b617..baab53d8bdbd4 100644 --- a/llvm/test/CodeGen/AArch64/fcmp.ll +++ b/llvm/test/CodeGen/AArch64/fcmp.ll @@ -1,11 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-NOFP16 ; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16 -; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16 -; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16 - -; CHECK-GI: warning: Instruction selection used fallback path for v2f128_fp128 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v3f128_fp128 +; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16 +; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16 define fp128 @f128_fp128(fp128 %a, fp128 %b, fp128 %d, fp128 %e) { ; CHECK-SD-LABEL: f128_fp128: @@ -429,35 +426,74 @@ entry: } define <2 x fp128> @v2f128_fp128(<2 x fp128> %a, <2 x fp128> %b, <2 x fp128> %d, <2 x fp128> %e) { -; CHECK-LABEL: v2f128_fp128: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #112 -; CHECK-NEXT: str x30, [sp, #96] // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 112 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: stp q4, q5, [sp] // 32-byte Folded Spill -; CHECK-NEXT: stp q1, q3, [sp, #32] // 32-byte Folded Spill -; CHECK-NEXT: mov v1.16b, v2.16b -; CHECK-NEXT: stp q7, q6, [sp, #64] // 32-byte Folded Spill -; CHECK-NEXT: bl __lttf2 -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: b.ge .LBB12_2 -; CHECK-NEXT: // %bb.1: // %entry -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: str q0, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: .LBB12_2: // %entry -; CHECK-NEXT: ldp q0, q1, [sp, #32] // 32-byte Folded Reload -; CHECK-NEXT: bl __lttf2 -; CHECK-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: b.ge .LBB12_4 -; CHECK-NEXT: // %bb.3: // %entry -; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: .LBB12_4: // %entry -; CHECK-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload -; CHECK-NEXT: add sp, sp, #112 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v2f128_fp128: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sub sp, sp, #112 +; CHECK-SD-NEXT: str x30, [sp, #96] // 8-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 112 +; CHECK-SD-NEXT: .cfi_offset w30, -16 +; CHECK-SD-NEXT: stp q4, q5, [sp] // 32-byte Folded Spill +; CHECK-SD-NEXT: stp q1, q3, [sp, #32] // 32-byte Folded Spill +; CHECK-SD-NEXT: mov v1.16b, v2.16b +; CHECK-SD-NEXT: stp q7, q6, [sp, #64] // 32-byte Folded Spill +; CHECK-SD-NEXT: bl __lttf2 +; CHECK-SD-NEXT: cmp w0, #0 +; CHECK-SD-NEXT: b.ge .LBB12_2 +; CHECK-SD-NEXT: // %bb.1: // %entry +; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-SD-NEXT: str q0, [sp, #80] // 16-byte Folded Spill +; CHECK-SD-NEXT: .LBB12_2: // %entry +; CHECK-SD-NEXT: ldp q0, q1, [sp, #32] // 32-byte Folded Reload +; CHECK-SD-NEXT: bl __lttf2 +; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-SD-NEXT: cmp w0, #0 +; CHECK-SD-NEXT: b.ge .LBB12_4 +; CHECK-SD-NEXT: // %bb.3: // %entry +; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: .LBB12_4: // %entry +; CHECK-SD-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload +; CHECK-SD-NEXT: add sp, sp, #112 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v2f128_fp128: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sub sp, sp, #112 +; CHECK-GI-NEXT: stp x30, x19, [sp, #96] // 16-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 112 +; CHECK-GI-NEXT: .cfi_offset w19, -8 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: stp q3, q1, [sp] // 32-byte Folded Spill +; CHECK-GI-NEXT: mov v1.16b, v2.16b +; CHECK-GI-NEXT: stp q6, q4, [sp, #32] // 32-byte Folded Spill +; CHECK-GI-NEXT: stp q7, q5, [sp, #64] // 32-byte Folded Spill +; CHECK-GI-NEXT: bl __lttf2 +; CHECK-GI-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload +; CHECK-GI-NEXT: mov w19, w0 +; CHECK-GI-NEXT: bl __lttf2 +; CHECK-GI-NEXT: ldp q3, q2, [sp, #32] // 32-byte Folded Reload +; CHECK-GI-NEXT: cmp w19, #0 +; CHECK-GI-NEXT: ldp x30, x19, [sp, #96] // 16-byte Folded Reload +; CHECK-GI-NEXT: mov d0, v2.d[1] +; CHECK-GI-NEXT: mov d1, v3.d[1] +; CHECK-GI-NEXT: fcsel d2, d2, d3, lt +; CHECK-GI-NEXT: fmov x8, d2 +; CHECK-GI-NEXT: fcsel d3, d0, d1, lt +; CHECK-GI-NEXT: ldp q5, q0, [sp, #64] // 32-byte Folded Reload +; CHECK-GI-NEXT: cmp w0, #0 +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: mov d4, v5.d[1] +; CHECK-GI-NEXT: fcsel d0, d0, d5, lt +; CHECK-GI-NEXT: fmov x9, d0 +; CHECK-GI-NEXT: mov v0.d[0], x8 +; CHECK-GI-NEXT: fmov x8, d3 +; CHECK-GI-NEXT: fcsel d2, d1, d4, lt +; CHECK-GI-NEXT: mov v1.d[0], x9 +; CHECK-GI-NEXT: fmov x9, d2 +; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: mov v1.d[1], x9 +; CHECK-GI-NEXT: add sp, sp, #112 +; CHECK-GI-NEXT: ret entry: %c = fcmp olt <2 x fp128> %a, %b %s = select <2 x i1> %c, <2 x fp128> %d, <2 x fp128> %e @@ -465,42 +501,104 @@ entry: } define <3 x fp128> @v3f128_fp128(<3 x fp128> %a, <3 x fp128> %b, <3 x fp128> %d, <3 x fp128> %e) { -; CHECK-LABEL: v3f128_fp128: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #112 -; CHECK-NEXT: str x30, [sp, #96] // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 112 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: stp q1, q4, [sp] // 32-byte Folded Spill -; CHECK-NEXT: mov v1.16b, v3.16b -; CHECK-NEXT: stp q2, q5, [sp, #32] // 32-byte Folded Spill -; CHECK-NEXT: stp q6, q7, [sp, #64] // 32-byte Folded Spill -; CHECK-NEXT: bl __lttf2 -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: b.lt .LBB13_2 -; CHECK-NEXT: // %bb.1: -; CHECK-NEXT: ldr q0, [sp, #128] -; CHECK-NEXT: str q0, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: .LBB13_2: // %entry -; CHECK-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload -; CHECK-NEXT: bl __lttf2 -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: b.lt .LBB13_4 -; CHECK-NEXT: // %bb.3: -; CHECK-NEXT: ldr q0, [sp, #144] -; CHECK-NEXT: str q0, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: .LBB13_4: // %entry -; CHECK-NEXT: ldp q0, q1, [sp, #32] // 32-byte Folded Reload -; CHECK-NEXT: bl __lttf2 -; CHECK-NEXT: add x8, sp, #160 -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: add x9, sp, #112 -; CHECK-NEXT: csel x8, x9, x8, lt -; CHECK-NEXT: ldp q0, q1, [sp, #64] // 32-byte Folded Reload -; CHECK-NEXT: ldr q2, [x8] -; CHECK-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload -; CHECK-NEXT: add sp, sp, #112 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v3f128_fp128: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sub sp, sp, #112 +; CHECK-SD-NEXT: str x30, [sp, #96] // 8-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 112 +; CHECK-SD-NEXT: .cfi_offset w30, -16 +; CHECK-SD-NEXT: stp q1, q4, [sp] // 32-byte Folded Spill +; CHECK-SD-NEXT: mov v1.16b, v3.16b +; CHECK-SD-NEXT: stp q2, q5, [sp, #32] // 32-byte Folded Spill +; CHECK-SD-NEXT: stp q6, q7, [sp, #64] // 32-byte Folded Spill +; CHECK-SD-NEXT: bl __lttf2 +; CHECK-SD-NEXT: cmp w0, #0 +; CHECK-SD-NEXT: b.lt .LBB13_2 +; CHECK-SD-NEXT: // %bb.1: +; CHECK-SD-NEXT: ldr q0, [sp, #128] +; CHECK-SD-NEXT: str q0, [sp, #64] // 16-byte Folded Spill +; CHECK-SD-NEXT: .LBB13_2: // %entry +; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload +; CHECK-SD-NEXT: bl __lttf2 +; CHECK-SD-NEXT: cmp w0, #0 +; CHECK-SD-NEXT: b.lt .LBB13_4 +; CHECK-SD-NEXT: // %bb.3: +; CHECK-SD-NEXT: ldr q0, [sp, #144] +; CHECK-SD-NEXT: str q0, [sp, #80] // 16-byte Folded Spill +; CHECK-SD-NEXT: .LBB13_4: // %entry +; CHECK-SD-NEXT: ldp q0, q1, [sp, #32] // 32-byte Folded Reload +; CHECK-SD-NEXT: bl __lttf2 +; CHECK-SD-NEXT: add x8, sp, #160 +; CHECK-SD-NEXT: cmp w0, #0 +; CHECK-SD-NEXT: add x9, sp, #112 +; CHECK-SD-NEXT: csel x8, x9, x8, lt +; CHECK-SD-NEXT: ldp q0, q1, [sp, #64] // 32-byte Folded Reload +; CHECK-SD-NEXT: ldr q2, [x8] +; CHECK-SD-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload +; CHECK-SD-NEXT: add sp, sp, #112 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v3f128_fp128: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sub sp, sp, #192 +; CHECK-GI-NEXT: str x30, [sp, #160] // 8-byte Folded Spill +; CHECK-GI-NEXT: stp x20, x19, [sp, #176] // 16-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 192 +; CHECK-GI-NEXT: .cfi_offset w19, -8 +; CHECK-GI-NEXT: .cfi_offset w20, -16 +; CHECK-GI-NEXT: .cfi_offset w30, -32 +; CHECK-GI-NEXT: stp q4, q1, [sp] // 32-byte Folded Spill +; CHECK-GI-NEXT: mov v1.16b, v3.16b +; CHECK-GI-NEXT: stp q5, q2, [sp, #32] // 32-byte Folded Spill +; CHECK-GI-NEXT: ldr q2, [sp, #192] +; CHECK-GI-NEXT: str q2, [sp, #144] // 16-byte Folded Spill +; CHECK-GI-NEXT: ldr q2, [sp, #208] +; CHECK-GI-NEXT: stp q2, q6, [sp, #64] // 32-byte Folded Spill +; CHECK-GI-NEXT: ldr q2, [sp, #224] +; CHECK-GI-NEXT: stp q7, q2, [sp, #96] // 32-byte Folded Spill +; CHECK-GI-NEXT: ldr q2, [sp, #240] +; CHECK-GI-NEXT: str q2, [sp, #128] // 16-byte Folded Spill +; CHECK-GI-NEXT: bl __lttf2 +; CHECK-GI-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload +; CHECK-GI-NEXT: mov w19, w0 +; CHECK-GI-NEXT: bl __lttf2 +; CHECK-GI-NEXT: ldp q1, q0, [sp, #32] // 32-byte Folded Reload +; CHECK-GI-NEXT: mov w20, w0 +; CHECK-GI-NEXT: bl __lttf2 +; CHECK-GI-NEXT: ldp q5, q4, [sp, #64] // 32-byte Folded Reload +; CHECK-GI-NEXT: cmp w19, #0 +; CHECK-GI-NEXT: ldp q7, q6, [sp, #96] // 32-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp, #160] // 8-byte Folded Reload +; CHECK-GI-NEXT: mov d0, v4.d[1] +; CHECK-GI-NEXT: mov d1, v5.d[1] +; CHECK-GI-NEXT: fcsel d4, d4, d5, lt +; CHECK-GI-NEXT: mov d2, v7.d[1] +; CHECK-GI-NEXT: mov d3, v6.d[1] +; CHECK-GI-NEXT: fmov x8, d4 +; CHECK-GI-NEXT: fcsel d5, d0, d1, lt +; CHECK-GI-NEXT: cmp w20, #0 +; CHECK-GI-NEXT: fcsel d1, d7, d6, lt +; CHECK-GI-NEXT: ldp q7, q0, [sp, #128] // 32-byte Folded Reload +; CHECK-GI-NEXT: fcsel d3, d2, d3, lt +; CHECK-GI-NEXT: cmp w0, #0 +; CHECK-GI-NEXT: ldp x20, x19, [sp, #176] // 16-byte Folded Reload +; CHECK-GI-NEXT: mov d2, v0.d[1] +; CHECK-GI-NEXT: mov d6, v7.d[1] +; CHECK-GI-NEXT: fcsel d7, d0, d7, lt +; CHECK-GI-NEXT: mov v0.d[0], x8 +; CHECK-GI-NEXT: fmov x8, d1 +; CHECK-GI-NEXT: fmov x9, d7 +; CHECK-GI-NEXT: fcsel d4, d2, d6, lt +; CHECK-GI-NEXT: mov v1.d[0], x8 +; CHECK-GI-NEXT: fmov x8, d5 +; CHECK-GI-NEXT: mov v2.d[0], x9 +; CHECK-GI-NEXT: fmov x9, d3 +; CHECK-GI-NEXT: fmov x10, d4 +; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: mov v1.d[1], x9 +; CHECK-GI-NEXT: mov v2.d[1], x10 +; CHECK-GI-NEXT: add sp, sp, #192 +; CHECK-GI-NEXT: ret entry: %c = fcmp olt <3 x fp128> %a, %b %s = select <3 x i1> %c, <3 x fp128> %d, <3 x fp128> %e diff --git a/llvm/test/CodeGen/AArch64/icmp.ll b/llvm/test/CodeGen/AArch64/icmp.ll index b00e5d6c701d8..61964060ca2c8 100644 --- a/llvm/test/CodeGen/AArch64/icmp.ll +++ b/llvm/test/CodeGen/AArch64/icmp.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI define i64 @i64_i64(i64 %a, i64 %b, i64 %d, i64 %e) { ; CHECK-LABEL: i64_i64: @@ -1376,6 +1376,62 @@ entry: ret <32 x i8> %s } +define <2 x i128> @v2i128_i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> %d, <2 x i128> %e) { +; CHECK-SD-LABEL: v2i128_i128: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: add x10, sp, #32 +; CHECK-SD-NEXT: mov x11, sp +; CHECK-SD-NEXT: cmp x0, x4 +; CHECK-SD-NEXT: orr x12, x10, #0x8 +; CHECK-SD-NEXT: orr x13, x11, #0x8 +; CHECK-SD-NEXT: sbcs xzr, x1, x5 +; CHECK-SD-NEXT: add x8, sp, #48 +; CHECK-SD-NEXT: add x9, sp, #16 +; CHECK-SD-NEXT: csel x12, x13, x12, lt +; CHECK-SD-NEXT: csel x10, x11, x10, lt +; CHECK-SD-NEXT: cmp x2, x6 +; CHECK-SD-NEXT: orr x11, x8, #0x8 +; CHECK-SD-NEXT: orr x13, x9, #0x8 +; CHECK-SD-NEXT: sbcs xzr, x3, x7 +; CHECK-SD-NEXT: ldr x0, [x10] +; CHECK-SD-NEXT: csel x8, x9, x8, lt +; CHECK-SD-NEXT: csel x9, x13, x11, lt +; CHECK-SD-NEXT: ldr x1, [x12] +; CHECK-SD-NEXT: ldr x2, [x8] +; CHECK-SD-NEXT: ldr x3, [x9] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v2i128_i128: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: cmp x1, x5 +; CHECK-GI-NEXT: ldp x8, x9, [sp] +; CHECK-GI-NEXT: cset w10, lt +; CHECK-GI-NEXT: cmp x0, x4 +; CHECK-GI-NEXT: cset w13, lo +; CHECK-GI-NEXT: cmp x1, x5 +; CHECK-GI-NEXT: csel w10, w13, w10, eq +; CHECK-GI-NEXT: cmp x3, x7 +; CHECK-GI-NEXT: ldp x13, x14, [sp, #32] +; CHECK-GI-NEXT: cset w15, lt +; CHECK-GI-NEXT: cmp x2, x6 +; CHECK-GI-NEXT: ldp x11, x12, [sp, #16] +; CHECK-GI-NEXT: cset w16, lo +; CHECK-GI-NEXT: cmp x3, x7 +; CHECK-GI-NEXT: ldp x17, x18, [sp, #48] +; CHECK-GI-NEXT: csel w15, w16, w15, eq +; CHECK-GI-NEXT: tst w10, #0x1 +; CHECK-GI-NEXT: csel x0, x8, x13, ne +; CHECK-GI-NEXT: csel x1, x9, x14, ne +; CHECK-GI-NEXT: tst w15, #0x1 +; CHECK-GI-NEXT: csel x2, x11, x17, ne +; CHECK-GI-NEXT: csel x3, x12, x18, ne +; CHECK-GI-NEXT: ret +entry: + %c = icmp slt <2 x i128> %a, %b + %s = select <2 x i1> %c, <2 x i128> %d, <2 x i128> %e + ret <2 x i128> %s +} + ; ===== ICMP Zero RHS ===== define <8 x i1> @icmp_eq_v8i8_Zero_RHS(<8 x i8> %a) { diff --git a/llvm/test/CodeGen/AArch64/shift.ll b/llvm/test/CodeGen/AArch64/shift.ll index 951458da17c07..7014a4a9acbe0 100644 --- a/llvm/test/CodeGen/AArch64/shift.ll +++ b/llvm/test/CodeGen/AArch64/shift.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI define i1 @shl_i1(i1 %0, i1 %1){ @@ -674,6 +674,61 @@ define <4 x i64> @shl_v4i64(<4 x i64> %0, <4 x i64> %1){ ret <4 x i64> %3 } +define <2 x i128> @shl_v2i128(<2 x i128> %0, <2 x i128> %1){ +; CHECK-SD-LABEL: shl_v2i128: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: lsr x8, x0, #1 +; CHECK-SD-NEXT: mvn w9, w4 +; CHECK-SD-NEXT: lsl x10, x1, x4 +; CHECK-SD-NEXT: mvn w12, w6 +; CHECK-SD-NEXT: lsl x11, x0, x4 +; CHECK-SD-NEXT: lsl x13, x3, x6 +; CHECK-SD-NEXT: lsr x8, x8, x9 +; CHECK-SD-NEXT: lsr x9, x2, #1 +; CHECK-SD-NEXT: tst x4, #0x40 +; CHECK-SD-NEXT: csel x0, xzr, x11, ne +; CHECK-SD-NEXT: lsr x9, x9, x12 +; CHECK-SD-NEXT: orr x8, x10, x8 +; CHECK-SD-NEXT: lsl x10, x2, x6 +; CHECK-SD-NEXT: csel x1, x11, x8, ne +; CHECK-SD-NEXT: tst x6, #0x40 +; CHECK-SD-NEXT: orr x8, x13, x9 +; CHECK-SD-NEXT: csel x2, xzr, x10, ne +; CHECK-SD-NEXT: csel x3, x10, x8, ne +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shl_v2i128: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #64 // =0x40 +; CHECK-GI-NEXT: sub x10, x4, #64 +; CHECK-GI-NEXT: lsl x11, x1, x4 +; CHECK-GI-NEXT: sub x9, x8, x4 +; CHECK-GI-NEXT: lsl x10, x0, x10 +; CHECK-GI-NEXT: lsl x12, x0, x4 +; CHECK-GI-NEXT: lsr x9, x0, x9 +; CHECK-GI-NEXT: cmp x4, #64 +; CHECK-GI-NEXT: sub x8, x8, x6 +; CHECK-GI-NEXT: lsr x8, x2, x8 +; CHECK-GI-NEXT: csel x0, x12, xzr, lo +; CHECK-GI-NEXT: lsl x12, x2, x6 +; CHECK-GI-NEXT: orr x9, x9, x11 +; CHECK-GI-NEXT: lsl x11, x3, x6 +; CHECK-GI-NEXT: csel x9, x9, x10, lo +; CHECK-GI-NEXT: sub x10, x6, #64 +; CHECK-GI-NEXT: cmp x4, #0 +; CHECK-GI-NEXT: lsl x10, x2, x10 +; CHECK-GI-NEXT: csel x1, x1, x9, eq +; CHECK-GI-NEXT: orr x8, x8, x11 +; CHECK-GI-NEXT: cmp x6, #64 +; CHECK-GI-NEXT: csel x2, x12, xzr, lo +; CHECK-GI-NEXT: csel x8, x8, x10, lo +; CHECK-GI-NEXT: cmp x6, #0 +; CHECK-GI-NEXT: csel x3, x3, x8, eq +; CHECK-GI-NEXT: ret + %3 = shl <2 x i128> %0, %1 + ret <2 x i128> %3 +} + define <4 x i8> @ashr_v4i8(<4 x i8> %0, <4 x i8> %1){ ; CHECK-SD-LABEL: ashr_v4i8: ; CHECK-SD: // %bb.0: @@ -819,6 +874,67 @@ define <4 x i64> @ashr_v4i64(<4 x i64> %0, <4 x i64> %1){ ret <4 x i64> %3 } +define <2 x i128> @ashr_v2i128(<2 x i128> %0, <2 x i128> %1){ +; CHECK-SD-LABEL: ashr_v2i128: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: lsl x8, x1, #1 +; CHECK-SD-NEXT: mvn w9, w4 +; CHECK-SD-NEXT: lsl x10, x3, #1 +; CHECK-SD-NEXT: lsr x11, x0, x4 +; CHECK-SD-NEXT: lsr x12, x2, x6 +; CHECK-SD-NEXT: asr x13, x1, #63 +; CHECK-SD-NEXT: lsl x8, x8, x9 +; CHECK-SD-NEXT: mvn w9, w6 +; CHECK-SD-NEXT: tst x4, #0x40 +; CHECK-SD-NEXT: lsl x9, x10, x9 +; CHECK-SD-NEXT: asr x10, x1, x4 +; CHECK-SD-NEXT: asr x14, x3, #63 +; CHECK-SD-NEXT: orr x8, x8, x11 +; CHECK-SD-NEXT: asr x11, x3, x6 +; CHECK-SD-NEXT: csel x0, x10, x8, ne +; CHECK-SD-NEXT: orr x8, x9, x12 +; CHECK-SD-NEXT: csel x1, x13, x10, ne +; CHECK-SD-NEXT: tst x6, #0x40 +; CHECK-SD-NEXT: csel x2, x11, x8, ne +; CHECK-SD-NEXT: csel x3, x14, x11, ne +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ashr_v2i128: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #64 // =0x40 +; CHECK-GI-NEXT: sub x10, x4, #64 +; CHECK-GI-NEXT: lsr x11, x0, x4 +; CHECK-GI-NEXT: sub x9, x8, x4 +; CHECK-GI-NEXT: asr x10, x1, x10 +; CHECK-GI-NEXT: cmp x4, #64 +; CHECK-GI-NEXT: lsl x9, x1, x9 +; CHECK-GI-NEXT: sub x8, x8, x6 +; CHECK-GI-NEXT: asr x12, x1, x4 +; CHECK-GI-NEXT: lsl x8, x3, x8 +; CHECK-GI-NEXT: orr x9, x11, x9 +; CHECK-GI-NEXT: asr x11, x1, #63 +; CHECK-GI-NEXT: csel x9, x9, x10, lo +; CHECK-GI-NEXT: cmp x4, #0 +; CHECK-GI-NEXT: lsr x10, x2, x6 +; CHECK-GI-NEXT: csel x0, x0, x9, eq +; CHECK-GI-NEXT: sub x9, x6, #64 +; CHECK-GI-NEXT: cmp x4, #64 +; CHECK-GI-NEXT: asr x9, x3, x9 +; CHECK-GI-NEXT: csel x1, x12, x11, lo +; CHECK-GI-NEXT: orr x8, x10, x8 +; CHECK-GI-NEXT: cmp x6, #64 +; CHECK-GI-NEXT: asr x11, x3, x6 +; CHECK-GI-NEXT: asr x10, x3, #63 +; CHECK-GI-NEXT: csel x8, x8, x9, lo +; CHECK-GI-NEXT: cmp x6, #0 +; CHECK-GI-NEXT: csel x2, x2, x8, eq +; CHECK-GI-NEXT: cmp x6, #64 +; CHECK-GI-NEXT: csel x3, x11, x10, lo +; CHECK-GI-NEXT: ret + %3 = ashr <2 x i128> %0, %1 + ret <2 x i128> %3 +} + define <4 x i8> @lshr_v4i8(<4 x i8> %0, <4 x i8> %1){ ; CHECK-SD-LABEL: lshr_v4i8: ; CHECK-SD: // %bb.0: @@ -962,6 +1078,63 @@ define <4 x i64> @lshr_v4i64(<4 x i64> %0, <4 x i64> %1){ ret <4 x i64> %3 } +define <2 x i128> @lshr_v2i128(<2 x i128> %0, <2 x i128> %1){ +; CHECK-SD-LABEL: lshr_v2i128: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: lsl x8, x1, #1 +; CHECK-SD-NEXT: mvn w9, w4 +; CHECK-SD-NEXT: lsr x10, x0, x4 +; CHECK-SD-NEXT: mvn w12, w6 +; CHECK-SD-NEXT: lsr x11, x1, x4 +; CHECK-SD-NEXT: lsr x13, x2, x6 +; CHECK-SD-NEXT: lsl x8, x8, x9 +; CHECK-SD-NEXT: lsl x9, x3, #1 +; CHECK-SD-NEXT: tst x4, #0x40 +; CHECK-SD-NEXT: csel x1, xzr, x11, ne +; CHECK-SD-NEXT: lsl x9, x9, x12 +; CHECK-SD-NEXT: orr x8, x8, x10 +; CHECK-SD-NEXT: lsr x10, x3, x6 +; CHECK-SD-NEXT: csel x0, x11, x8, ne +; CHECK-SD-NEXT: tst x6, #0x40 +; CHECK-SD-NEXT: orr x8, x9, x13 +; CHECK-SD-NEXT: csel x3, xzr, x10, ne +; CHECK-SD-NEXT: csel x2, x10, x8, ne +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: lshr_v2i128: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #64 // =0x40 +; CHECK-GI-NEXT: sub x10, x4, #64 +; CHECK-GI-NEXT: lsr x11, x0, x4 +; CHECK-GI-NEXT: sub x9, x8, x4 +; CHECK-GI-NEXT: lsr x10, x1, x10 +; CHECK-GI-NEXT: cmp x4, #64 +; CHECK-GI-NEXT: lsl x9, x1, x9 +; CHECK-GI-NEXT: sub x8, x8, x6 +; CHECK-GI-NEXT: lsr x12, x1, x4 +; CHECK-GI-NEXT: lsl x8, x3, x8 +; CHECK-GI-NEXT: orr x9, x11, x9 +; CHECK-GI-NEXT: lsr x11, x2, x6 +; CHECK-GI-NEXT: csel x9, x9, x10, lo +; CHECK-GI-NEXT: cmp x4, #0 +; CHECK-GI-NEXT: sub x10, x6, #64 +; CHECK-GI-NEXT: csel x0, x0, x9, eq +; CHECK-GI-NEXT: cmp x4, #64 +; CHECK-GI-NEXT: lsr x9, x3, x10 +; CHECK-GI-NEXT: csel x1, x12, xzr, lo +; CHECK-GI-NEXT: orr x8, x11, x8 +; CHECK-GI-NEXT: cmp x6, #64 +; CHECK-GI-NEXT: lsr x10, x3, x6 +; CHECK-GI-NEXT: csel x8, x8, x9, lo +; CHECK-GI-NEXT: cmp x6, #0 +; CHECK-GI-NEXT: csel x2, x2, x8, eq +; CHECK-GI-NEXT: cmp x6, #64 +; CHECK-GI-NEXT: csel x3, x10, xzr, lo +; CHECK-GI-NEXT: ret + %3 = lshr <2 x i128> %0, %1 + ret <2 x i128> %3 +} + ; ===== Vector with Non-Pow 2 Width ===== define <3 x i8> @shl_v3i8(<3 x i8> %0, <3 x i8> %1){ diff --git a/llvm/test/CodeGen/X86/bypass-slow-division-64.ll b/llvm/test/CodeGen/X86/bypass-slow-division-64.ll index 6e0cfdd26a786..b0ca0069a526b 100644 --- a/llvm/test/CodeGen/X86/bypass-slow-division-64.ll +++ b/llvm/test/CodeGen/X86/bypass-slow-division-64.ll @@ -23,6 +23,7 @@ ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver2 | FileCheck %s --check-prefixes=CHECK,SLOW-DIVQ ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3 | FileCheck %s --check-prefixes=CHECK,SLOW-DIVQ ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver4 | FileCheck %s --check-prefixes=CHECK,SLOW-DIVQ +; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver5 | FileCheck %s --check-prefixes=CHECK,SLOW-DIVQ ; Additional tests for 64-bit divide bypass diff --git a/llvm/test/CodeGen/X86/cmp16.ll b/llvm/test/CodeGen/X86/cmp16.ll index fa9e75ff16a5c..8c14a78d9e113 100644 --- a/llvm/test/CodeGen/X86/cmp16.ll +++ b/llvm/test/CodeGen/X86/cmp16.ll @@ -13,6 +13,7 @@ ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver2 | FileCheck %s --check-prefixes=X64,X64-FAST ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3 | FileCheck %s --check-prefixes=X64,X64-FAST ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver4 | FileCheck %s --check-prefixes=X64,X64-FAST +; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver5 | FileCheck %s --check-prefixes=X64,X64-FAST define i1 @cmp16_reg_eq_reg(i16 %a0, i16 %a1) { ; X86-GENERIC-LABEL: cmp16_reg_eq_reg: diff --git a/llvm/test/CodeGen/X86/cpus-amd.ll b/llvm/test/CodeGen/X86/cpus-amd.ll index 228a00428c457..33b2cf3731478 100644 --- a/llvm/test/CodeGen/X86/cpus-amd.ll +++ b/llvm/test/CodeGen/X86/cpus-amd.ll @@ -29,6 +29,7 @@ ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver3 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver4 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty +; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver5 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty define void @foo() { ret void diff --git a/llvm/test/CodeGen/X86/rdpru.ll b/llvm/test/CodeGen/X86/rdpru.ll index 7771f52653cb5..be79a4499a338 100644 --- a/llvm/test/CodeGen/X86/rdpru.ll +++ b/llvm/test/CodeGen/X86/rdpru.ll @@ -6,6 +6,7 @@ ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver2 | FileCheck %s --check-prefix=X64 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3 -fast-isel | FileCheck %s --check-prefix=X64 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver4 -fast-isel | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver5 -fast-isel | FileCheck %s --check-prefix=X64 define void @rdpru_asm() { ; X86-LABEL: rdpru_asm: diff --git a/llvm/test/CodeGen/X86/shuffle-as-shifts.ll b/llvm/test/CodeGen/X86/shuffle-as-shifts.ll index e89197f5b42c3..9c8729b3ea505 100644 --- a/llvm/test/CodeGen/X86/shuffle-as-shifts.ll +++ b/llvm/test/CodeGen/X86/shuffle-as-shifts.ll @@ -3,6 +3,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=icelake-server | FileCheck %s --check-prefixes=CHECK,CHECK-ICX ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-V4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver4 | FileCheck %s --check-prefixes=CHECK,CHECK-ZNVER4 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver5 | FileCheck %s --check-prefixes=CHECK,CHECK-ZNVER4 define <4 x i32> @shuf_rot_v4i32_1032(<4 x i32> %x) { diff --git a/llvm/test/CodeGen/X86/slow-unaligned-mem.ll b/llvm/test/CodeGen/X86/slow-unaligned-mem.ll index d74d195439bda..ceef3fb4bb188 100644 --- a/llvm/test/CodeGen/X86/slow-unaligned-mem.ll +++ b/llvm/test/CodeGen/X86/slow-unaligned-mem.ll @@ -50,6 +50,7 @@ ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver2 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX256 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver3 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX256 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver4 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX512 +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver5 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX512 ; Other chips with slow unaligned memory accesses diff --git a/llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll b/llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll index 9f2071ff14b87..2b78a70ebcc26 100644 --- a/llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll +++ b/llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll @@ -6,6 +6,7 @@ ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver1 | FileCheck %s --check-prefixes=FAST-SCALAR,FAST-VECTOR ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3 | FileCheck %s --check-prefixes=FAST-SCALAR,FAST-VECTOR ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver4 | FileCheck %s --check-prefixes=FAST-SCALAR,FAST-VECTOR +; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver5 | FileCheck %s --check-prefixes=FAST-SCALAR,FAST-VECTOR ; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=X86-64 define float @f32_no_daz(float %f) #0 { diff --git a/llvm/test/CodeGen/X86/tuning-shuffle-permilpd-avx512.ll b/llvm/test/CodeGen/X86/tuning-shuffle-permilpd-avx512.ll index 7d8bb567c09b3..162ab71fc00d4 100644 --- a/llvm/test/CodeGen/X86/tuning-shuffle-permilpd-avx512.ll +++ b/llvm/test/CodeGen/X86/tuning-shuffle-permilpd-avx512.ll @@ -4,6 +4,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-V4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw,+avx512dq | FileCheck %s --check-prefixes=CHECK,CHECK-AVX512 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver4 | FileCheck %s --check-prefixes=CHECK,CHECK-ZNVER4 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver5 | FileCheck %s --check-prefixes=CHECK,CHECK-ZNVER4 define <8 x double> @transform_VPERMILPSZrr(<8 x double> %a) nounwind { ; CHECK-LABEL: transform_VPERMILPSZrr: diff --git a/llvm/test/CodeGen/X86/tuning-shuffle-permilps-avx512.ll b/llvm/test/CodeGen/X86/tuning-shuffle-permilps-avx512.ll index 5d031f6017c77..cd97946da248f 100644 --- a/llvm/test/CodeGen/X86/tuning-shuffle-permilps-avx512.ll +++ b/llvm/test/CodeGen/X86/tuning-shuffle-permilps-avx512.ll @@ -4,6 +4,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-V4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw,+avx512dq | FileCheck %s --check-prefixes=CHECK,CHECK-AVX512 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver4 | FileCheck %s --check-prefixes=CHECK,CHECK-ZNVER4 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver5 | FileCheck %s --check-prefixes=CHECK,CHECK-ZNVER4 define <16 x float> @transform_VPERMILPSZrr(<16 x float> %a) nounwind { ; CHECK-LABEL: transform_VPERMILPSZrr: diff --git a/llvm/test/CodeGen/X86/tuning-shuffle-unpckpd-avx512.ll b/llvm/test/CodeGen/X86/tuning-shuffle-unpckpd-avx512.ll index 4a160bc9debc7..5ea991f85523e 100644 --- a/llvm/test/CodeGen/X86/tuning-shuffle-unpckpd-avx512.ll +++ b/llvm/test/CodeGen/X86/tuning-shuffle-unpckpd-avx512.ll @@ -5,6 +5,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-V4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw,+avx512dq | FileCheck %s --check-prefixes=CHECK,CHECK-AVX512 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver4 | FileCheck %s --check-prefixes=CHECK,CHECK-ZNVER4 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver5 | FileCheck %s --check-prefixes=CHECK,CHECK-ZNVER4 define <16 x float> @transform_VUNPCKLPDZrr(<16 x float> %a, <16 x float> %b) nounwind { diff --git a/llvm/test/CodeGen/X86/tuning-shuffle-unpckps-avx512.ll b/llvm/test/CodeGen/X86/tuning-shuffle-unpckps-avx512.ll index d0e3ad9b19086..96155f0300d2d 100644 --- a/llvm/test/CodeGen/X86/tuning-shuffle-unpckps-avx512.ll +++ b/llvm/test/CodeGen/X86/tuning-shuffle-unpckps-avx512.ll @@ -5,6 +5,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-V4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw,+avx512dq | FileCheck %s --check-prefixes=CHECK,CHECK-AVX512 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver4 | FileCheck %s --check-prefixes=CHECK,CHECK-ZNVER4 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver5 | FileCheck %s --check-prefixes=CHECK,CHECK-ZNVER4 define <16 x float> @transform_VUNPCKLPSZrr(<16 x float> %a, <16 x float> %b) nounwind { ; CHECK-LABEL: transform_VUNPCKLPSZrr: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-fast-per-lane.ll b/llvm/test/CodeGen/X86/vector-shuffle-fast-per-lane.ll index e59532d4fef30..4021b1bf292bb 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-fast-per-lane.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-fast-per-lane.ll @@ -8,6 +8,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=znver2 | FileCheck %s --check-prefixes=FAST ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=znver3 | FileCheck %s --check-prefixes=FAST ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=znver4 | FileCheck %s --check-prefixes=FAST +; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=znver5 | FileCheck %s --check-prefixes=FAST ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=haswell | FileCheck %s --check-prefixes=FAST ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=skx | FileCheck %s --check-prefixes=FAST diff --git a/llvm/test/CodeGen/X86/vpdpwssd.ll b/llvm/test/CodeGen/X86/vpdpwssd.ll index e6a07b4aeb271..3c1eb92e9e3c3 100644 --- a/llvm/test/CodeGen/X86/vpdpwssd.ll +++ b/llvm/test/CodeGen/X86/vpdpwssd.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver4 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver5 | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vnni,+fast-dpwssd | FileCheck %s define <16 x i32> @vpdpwssd_test(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2) { diff --git a/llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll b/llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll index af6fbdc9f60de..bbaa414924707 100644 --- a/llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll +++ b/llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll @@ -16,6 +16,7 @@ ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver2 | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3 | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver4 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver5 | FileCheck %s ; Verify that for the X86_64 processors that are known to have poor latency ; double precision shift instructions we do not generate 'shld' or 'shrd' diff --git a/llvm/test/MC/X86/x86_long_nop.s b/llvm/test/MC/X86/x86_long_nop.s index 6136c3db9a3da..b79403bb5f1ec 100644 --- a/llvm/test/MC/X86/x86_long_nop.s +++ b/llvm/test/MC/X86/x86_long_nop.s @@ -19,6 +19,8 @@ # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s -mcpu=znver3 | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=x86_64-pc-linux-gnu -mcpu=znver4 %s | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s -mcpu=znver4 | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15 +# RUN: llvm-mc -filetype=obj -arch=x86 -triple=x86_64-pc-linux-gnu -mcpu=znver5 %s | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15 +# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s -mcpu=znver5 | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=nehalem %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=westmere %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=sandybridge %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP15 %s diff --git a/llvm/test/Transforms/LoopUnroll/X86/call-remark.ll b/llvm/test/Transforms/LoopUnroll/X86/call-remark.ll index abdcfcf7e0742..b05994ddfa35e 100644 --- a/llvm/test/Transforms/LoopUnroll/X86/call-remark.ll +++ b/llvm/test/Transforms/LoopUnroll/X86/call-remark.ll @@ -1,6 +1,7 @@ ; RUN: opt -passes=debugify,loop-unroll -mcpu=znver3 -pass-remarks=loop-unroll -pass-remarks-analysis=loop-unroll < %s -S 2>&1 | FileCheck --check-prefixes=ALL,UNROLL %s ; RUN: opt -passes=debugify,loop-unroll -mcpu=znver3 -pass-remarks=TTI -pass-remarks-analysis=TTI < %s -S 2>&1 | FileCheck --check-prefixes=ALL,TTI %s ; RUN: opt -passes=debugify,loop-unroll -mcpu=znver4 -pass-remarks=loop-unroll -pass-remarks-analysis=loop-unroll < %s -S 2>&1 | FileCheck --check-prefixes=ALL,UNROLL %s +; RUN: opt -passes=debugify,loop-unroll -mcpu=znver5 -pass-remarks=loop-unroll -pass-remarks-analysis=loop-unroll < %s -S 2>&1 | FileCheck --check-prefixes=ALL,UNROLL %s ; RUN: opt -passes=debugify,loop-unroll -mcpu=znver3 -pass-remarks=loop-unroll -pass-remarks-analysis=loop-unroll < %s -S 2>&1 --try-experimental-debuginfo-iterators | FileCheck --check-prefixes=ALL,UNROLL %s diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr63668.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr63668.ll index 391771e06cab8..037e073de9d59 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr63668.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr63668.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 ; RUN: opt -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=znver4 -S < %s | FileCheck %s +; RUN: opt -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=znver5 -S < %s | FileCheck %s define internal i32 @testfunc() { ; CHECK-LABEL: define internal i32 @testfunc diff --git a/llvm/tools/llvm-exegesis/lib/LlvmState.h b/llvm/tools/llvm-exegesis/lib/LlvmState.h index e42393edb636d..f69d76c9a1e4e 100644 --- a/llvm/tools/llvm-exegesis/lib/LlvmState.h +++ b/llvm/tools/llvm-exegesis/lib/LlvmState.h @@ -76,14 +76,6 @@ class LLVMState { return *OpcodeNameToOpcodeIdxMapping; }; - // TODO(boomanaiden154): We are keeping this getter around to enable internal - // migration to getRegisterNumberFromName. Once that is complete and - // the changes have been pulled, we can remove this. - const DenseMap &getRegNameToRegNoMapping() const { - assert(RegNameToRegNoMapping); - return *RegNameToRegNoMapping; - } - std::optional getRegisterNumberFromName(StringRef RegisterName) const; diff --git a/llvm/unittests/Bitcode/BitReaderTest.cpp b/llvm/unittests/Bitcode/BitReaderTest.cpp index 22cc5e7492803..aea66fc1d8db5 100644 --- a/llvm/unittests/Bitcode/BitReaderTest.cpp +++ b/llvm/unittests/Bitcode/BitReaderTest.cpp @@ -38,7 +38,7 @@ std::unique_ptr parseAssembly(LLVMContext &Context, // A failure here means that the test itself is buggy. if (!M) - report_fatal_error(OS.str().c_str()); + report_fatal_error(ErrMsg.c_str()); return M; } diff --git a/llvm/unittests/CodeGen/GlobalISel/GISelMITest.cpp b/llvm/unittests/CodeGen/GlobalISel/GISelMITest.cpp index db9fb3a2d316e..b0dbd4a10b0a7 100644 --- a/llvm/unittests/CodeGen/GlobalISel/GISelMITest.cpp +++ b/llvm/unittests/CodeGen/GlobalISel/GISelMITest.cpp @@ -14,7 +14,7 @@ operator<<(std::ostream &OS, const LLT Ty) { std::string Repr; raw_string_ostream SS{Repr}; Ty.print(SS); - OS << SS.str(); + OS << Repr; return OS; } @@ -23,7 +23,7 @@ operator<<(std::ostream &OS, const MachineFunction &MF) { std::string Repr; raw_string_ostream SS{Repr}; MF.print(SS); - OS << SS.str(); + OS << Repr; return OS; } diff --git a/llvm/unittests/CodeGen/GlobalISel/LegalizerTest.cpp b/llvm/unittests/CodeGen/GlobalISel/LegalizerTest.cpp index 401d04954a669..625e2c92b1119 100644 --- a/llvm/unittests/CodeGen/GlobalISel/LegalizerTest.cpp +++ b/llvm/unittests/CodeGen/GlobalISel/LegalizerTest.cpp @@ -27,7 +27,7 @@ ::testing::AssertionResult isNullMIPtr(const MachineInstr *MI) { MI->print(MISStream, /*IsStandalone=*/true, /*SkipOpers=*/false, /*SkipDebugLoc=*/false, /*AddNewLine=*/false); return ::testing::AssertionFailure() - << "unable to legalize instruction: " << MISStream.str(); + << "unable to legalize instruction: " << MIBuffer; } DefineLegalizerInfo(ALegalizer, { diff --git a/llvm/unittests/CodeGen/MachineInstrTest.cpp b/llvm/unittests/CodeGen/MachineInstrTest.cpp index af25acbb38fd5..d1546cf96f8d7 100644 --- a/llvm/unittests/CodeGen/MachineInstrTest.cpp +++ b/llvm/unittests/CodeGen/MachineInstrTest.cpp @@ -223,9 +223,8 @@ TEST(MachineInstrPrintingTest, DebugLocPrinting) { raw_string_ostream OS(str); MI->print(OS, /*IsStandalone*/true, /*SkipOpers*/false, /*SkipDebugLoc*/false, /*AddNewLine*/false); - ASSERT_TRUE( - StringRef(OS.str()).starts_with("$noreg = UNKNOWN debug-location ")); - ASSERT_TRUE(StringRef(OS.str()).ends_with("filename:1:5")); + ASSERT_TRUE(StringRef(str).starts_with("$noreg = UNKNOWN debug-location ")); + ASSERT_TRUE(StringRef(str).ends_with("filename:1:5")); } TEST(MachineInstrSpan, DistanceBegin) { diff --git a/llvm/unittests/CodeGen/MachineOperandTest.cpp b/llvm/unittests/CodeGen/MachineOperandTest.cpp index 8465c8b4f5394..63059d3267f71 100644 --- a/llvm/unittests/CodeGen/MachineOperandTest.cpp +++ b/llvm/unittests/CodeGen/MachineOperandTest.cpp @@ -73,7 +73,7 @@ TEST(MachineOperandTest, PrintRegisterMask) { std::string str; raw_string_ostream OS(str); MO.print(OS, /*TRI=*/nullptr, /*IntrinsicInfo=*/nullptr); - ASSERT_TRUE(OS.str() == ""); + ASSERT_TRUE(str == ""); } TEST(MachineOperandTest, PrintSubReg) { @@ -94,7 +94,7 @@ TEST(MachineOperandTest, PrintSubReg) { std::string str; raw_string_ostream OS(str); MO.print(OS, /*TRI=*/nullptr, /*IntrinsicInfo=*/nullptr); - ASSERT_TRUE(OS.str() == "$physreg1.subreg5"); + ASSERT_TRUE(str == "$physreg1.subreg5"); } TEST(MachineOperandTest, PrintCImm) { @@ -116,7 +116,7 @@ TEST(MachineOperandTest, PrintCImm) { std::string str; raw_string_ostream OS(str); MO.print(OS, /*TRI=*/nullptr, /*IntrinsicInfo=*/nullptr); - ASSERT_TRUE(OS.str() == "i128 18446744073709551616"); + ASSERT_TRUE(str == "i128 18446744073709551616"); } TEST(MachineOperandTest, PrintSubRegIndex) { @@ -133,7 +133,7 @@ TEST(MachineOperandTest, PrintSubRegIndex) { std::string str; raw_string_ostream OS(str); MachineOperand::printSubRegIdx(OS, MO.getImm(), nullptr); - ASSERT_TRUE(OS.str() == "%subreg.3"); + ASSERT_TRUE(str == "%subreg.3"); } TEST(MachineOperandTest, PrintCPI) { @@ -152,7 +152,7 @@ TEST(MachineOperandTest, PrintCPI) { { raw_string_ostream OS(str); MO.print(OS, /*TRI=*/nullptr, /*IntrinsicInfo=*/nullptr); - ASSERT_TRUE(OS.str() == "%const.0 + 8"); + ASSERT_TRUE(str == "%const.0 + 8"); } str.clear(); @@ -164,7 +164,7 @@ TEST(MachineOperandTest, PrintCPI) { { raw_string_ostream OS(str); MO.print(OS, /*TRI=*/nullptr, /*IntrinsicInfo=*/nullptr); - ASSERT_TRUE(OS.str() == "%const.0 - 12"); + ASSERT_TRUE(str == "%const.0 - 12"); } } @@ -183,7 +183,7 @@ TEST(MachineOperandTest, PrintTargetIndexName) { { raw_string_ostream OS(str); MO.print(OS, /*TRI=*/nullptr, /*IntrinsicInfo=*/nullptr); - ASSERT_TRUE(OS.str() == "target-index() + 8"); + ASSERT_TRUE(str == "target-index() + 8"); } str.clear(); @@ -194,7 +194,7 @@ TEST(MachineOperandTest, PrintTargetIndexName) { { raw_string_ostream OS(str); MO.print(OS, /*TRI=*/nullptr, /*IntrinsicInfo=*/nullptr); - ASSERT_TRUE(OS.str() == "target-index() - 12"); + ASSERT_TRUE(str == "target-index() - 12"); } } @@ -211,7 +211,7 @@ TEST(MachineOperandTest, PrintJumpTableIndex) { std::string str; raw_string_ostream OS(str); MO.print(OS, /*TRI=*/nullptr, /*IntrinsicInfo=*/nullptr); - ASSERT_TRUE(OS.str() == "%jump-table.3"); + ASSERT_TRUE(str == "%jump-table.3"); } TEST(MachineOperandTest, PrintExternalSymbol) { @@ -228,7 +228,7 @@ TEST(MachineOperandTest, PrintExternalSymbol) { { raw_string_ostream OS(str); MO.print(OS, /*TRI=*/nullptr, /*IntrinsicInfo=*/nullptr); - ASSERT_TRUE(OS.str() == "&foo"); + ASSERT_TRUE(str == "&foo"); } str.clear(); @@ -238,7 +238,7 @@ TEST(MachineOperandTest, PrintExternalSymbol) { { raw_string_ostream OS(str); MO.print(OS, /*TRI=*/nullptr, /*IntrinsicInfo=*/nullptr); - ASSERT_TRUE(OS.str() == "&foo + 12"); + ASSERT_TRUE(str == "&foo + 12"); } str.clear(); @@ -248,7 +248,7 @@ TEST(MachineOperandTest, PrintExternalSymbol) { { raw_string_ostream OS(str); MO.print(OS, /*TRI=*/nullptr, /*IntrinsicInfo=*/nullptr); - ASSERT_TRUE(OS.str() == "&foo - 12"); + ASSERT_TRUE(str == "&foo - 12"); } } @@ -274,7 +274,7 @@ TEST(MachineOperandTest, PrintGlobalAddress) { { raw_string_ostream OS(str); MO.print(OS, /*TRI=*/nullptr, /*IntrinsicInfo=*/nullptr); - ASSERT_TRUE(OS.str() == "@foo + 12"); + ASSERT_TRUE(str == "@foo + 12"); } str.clear(); @@ -284,7 +284,7 @@ TEST(MachineOperandTest, PrintGlobalAddress) { { raw_string_ostream OS(str); MO.print(OS, /*TRI=*/nullptr, /*IntrinsicInfo=*/nullptr); - ASSERT_TRUE(OS.str() == "@foo - 12"); + ASSERT_TRUE(str == "@foo - 12"); } } @@ -302,7 +302,7 @@ TEST(MachineOperandTest, PrintRegisterLiveOut) { // Print a MachineOperand containing a register live out list without a TRI. raw_string_ostream OS(str); MO.print(OS, /*TRI=*/nullptr, /*IntrinsicInfo=*/nullptr); - ASSERT_TRUE(OS.str() == "liveout()"); + ASSERT_TRUE(str == "liveout()"); } TEST(MachineOperandTest, PrintMetadata) { @@ -328,7 +328,7 @@ TEST(MachineOperandTest, PrintMetadata) { MO.print(OS, MST, LLT{}, /*OpIdx*/~0U, /*PrintDef=*/false, /*IsStandalone=*/false, /*ShouldPrintRegisterTies=*/false, 0, /*TRI=*/nullptr, /*IntrinsicInfo=*/nullptr); - ASSERT_TRUE(OS.str() == "!0"); + ASSERT_TRUE(str == "!0"); } TEST(MachineOperandTest, PrintMCSymbol) { @@ -349,7 +349,7 @@ TEST(MachineOperandTest, PrintMCSymbol) { // Print a MachineOperand containing a metadata node. raw_string_ostream OS(str); MO.print(OS, /*TRI=*/nullptr, /*IntrinsicInfo=*/nullptr); - ASSERT_TRUE(OS.str() == ""); + ASSERT_TRUE(str == ""); } TEST(MachineOperandTest, PrintCFI) { @@ -366,7 +366,7 @@ TEST(MachineOperandTest, PrintCFI) { // attached to it. raw_string_ostream OS(str); MO.print(OS, /*TRI=*/nullptr, /*IntrinsicInfo=*/nullptr); - ASSERT_TRUE(OS.str() == ""); + ASSERT_TRUE(str == ""); } TEST(MachineOperandTest, PrintIntrinsicID) { @@ -383,7 +383,7 @@ TEST(MachineOperandTest, PrintIntrinsicID) { // Print a MachineOperand containing a generic intrinsic ID. raw_string_ostream OS(str); MO.print(OS, /*TRI=*/nullptr, /*IntrinsicInfo=*/nullptr); - ASSERT_TRUE(OS.str() == "intrinsic(@llvm.bswap)"); + ASSERT_TRUE(str == "intrinsic(@llvm.bswap)"); } str.clear(); @@ -394,7 +394,7 @@ TEST(MachineOperandTest, PrintIntrinsicID) { // IntrinsicInfo. raw_string_ostream OS(str); MO.print(OS, /*TRI=*/nullptr, /*IntrinsicInfo=*/nullptr); - ASSERT_TRUE(OS.str() == "intrinsic(4294967295)"); + ASSERT_TRUE(str == "intrinsic(4294967295)"); } } @@ -411,7 +411,7 @@ TEST(MachineOperandTest, PrintPredicate) { // Print a MachineOperand containing a int predicate ICMP_EQ. raw_string_ostream OS(str); MO.print(OS, /*TRI=*/nullptr, /*IntrinsicInfo=*/nullptr); - ASSERT_TRUE(OS.str() == "intpred(eq)"); + ASSERT_TRUE(str == "intpred(eq)"); } TEST(MachineOperandTest, HashValue) { diff --git a/llvm/unittests/SandboxIR/PassTest.cpp b/llvm/unittests/SandboxIR/PassTest.cpp index 3517f0e32b1bb..ed226d5765586 100644 --- a/llvm/unittests/SandboxIR/PassTest.cpp +++ b/llvm/unittests/SandboxIR/PassTest.cpp @@ -162,3 +162,34 @@ TEST_F(PassTest, PassRegistry) { EXPECT_EQ(Buff, "test-pass1\ntest-pass2\n"); #endif // NDEBUG } + +TEST_F(PassTest, ParsePassPipeline) { + class TestPass1 final : public FunctionPass { + public: + TestPass1() : FunctionPass("test-pass1") {} + bool runOnFunction(Function &F) final { return false; } + }; + class TestPass2 final : public FunctionPass { + public: + TestPass2() : FunctionPass("test-pass2") {} + bool runOnFunction(Function &F) final { return false; } + }; + + PassRegistry Registry; + Registry.registerPass(std::make_unique()); + Registry.registerPass(std::make_unique()); + + auto &FPM = + Registry.parseAndCreatePassPipeline("test-pass1,test-pass2,test-pass1"); +#ifndef NDEBUG + std::string Buff; + llvm::raw_string_ostream SS(Buff); + FPM.print(SS); + EXPECT_EQ(Buff, "init-fpm(test-pass1,test-pass2,test-pass1)"); +#endif // NDEBUG + + EXPECT_DEATH(Registry.parseAndCreatePassPipeline("bad-pass-name"), + ".*not registered.*"); + EXPECT_DEATH(Registry.parseAndCreatePassPipeline(""), ".*not registered.*"); + EXPECT_DEATH(Registry.parseAndCreatePassPipeline(","), ".*not registered.*"); +} diff --git a/llvm/unittests/SandboxIR/SandboxIRTest.cpp b/llvm/unittests/SandboxIR/SandboxIRTest.cpp index ad5508f041d6c..d883c185f8296 100644 --- a/llvm/unittests/SandboxIR/SandboxIRTest.cpp +++ b/llvm/unittests/SandboxIR/SandboxIRTest.cpp @@ -843,6 +843,30 @@ define void @foo(ptr %ptr) { EXPECT_EQ(LookupBB2Addr, nullptr); } +TEST_F(SandboxIRTest, DSOLocalEquivalent) { + parseIR(C, R"IR( +declare void @bar() +define void @foo() { + call void dso_local_equivalent @bar() + ret void +} +)IR"); + Function &LLVMF = *M->getFunction("foo"); + sandboxir::Context Ctx(C); + + auto &F = *Ctx.createFunction(&LLVMF); + auto *BB = &*F.begin(); + auto It = BB->begin(); + auto *CI = cast(&*It++); + // Check classof(). + auto *DSOLE = cast(CI->getCalledOperand()); + // Check getGlobalValue(). + auto *GV = DSOLE->getGlobalValue(); + // Check get(). + auto *NewDSOLE = sandboxir::DSOLocalEquivalent::get(GV); + EXPECT_EQ(NewDSOLE, DSOLE); +} + TEST_F(SandboxIRTest, ConstantTokenNone) { parseIR(C, R"IR( define void @foo(ptr %ptr) { diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index 7bbf18fe0106f..152715f281088 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -123,52 +123,67 @@ class NVVM_SpecialRegisterOp traits = []> : let assemblyFormat = "attr-dict `:` type($res)"; } +class NVVM_SpecialRangeableRegisterOp traits = []> : + NVVM_SpecialRegisterOp { + let arguments = (ins OptionalAttr:$range); + let assemblyFormat = "(`range` $range^)? attr-dict `:` type($res)"; + let llvmBuilder = baseLlvmBuilder # setRangeRetAttrCode # baseLlvmBuilderCoda; + let mlirBuilder = baseMlirBuilder # importRangeRetAttrCode # baseMlirBuilderCoda; + + // Backwards-compatibility builder for an unspecified range. + let builders = [ + OpBuilder<(ins "Type":$resultType), [{ + build($_builder, $_state, resultType, ::mlir::LLVM::ConstantRangeAttr{}); + }]> + ]; +} + //===----------------------------------------------------------------------===// // Lane index and range -def NVVM_LaneIdOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.laneid">; -def NVVM_WarpSizeOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.warpsize">; +def NVVM_LaneIdOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.laneid">; +def NVVM_WarpSizeOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.warpsize">; //===----------------------------------------------------------------------===// // Thread index and range -def NVVM_ThreadIdXOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.tid.x">; -def NVVM_ThreadIdYOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.tid.y">; -def NVVM_ThreadIdZOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.tid.z">; -def NVVM_BlockDimXOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.ntid.x">; -def NVVM_BlockDimYOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.ntid.y">; -def NVVM_BlockDimZOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.ntid.z">; +def NVVM_ThreadIdXOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.tid.x">; +def NVVM_ThreadIdYOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.tid.y">; +def NVVM_ThreadIdZOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.tid.z">; +def NVVM_BlockDimXOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.ntid.x">; +def NVVM_BlockDimYOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.ntid.y">; +def NVVM_BlockDimZOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.ntid.z">; //===----------------------------------------------------------------------===// // Block index and range -def NVVM_BlockIdXOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.ctaid.x">; -def NVVM_BlockIdYOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.ctaid.y">; -def NVVM_BlockIdZOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.ctaid.z">; -def NVVM_GridDimXOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.nctaid.x">; -def NVVM_GridDimYOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.nctaid.y">; -def NVVM_GridDimZOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.nctaid.z">; +def NVVM_BlockIdXOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.ctaid.x">; +def NVVM_BlockIdYOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.ctaid.y">; +def NVVM_BlockIdZOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.ctaid.z">; +def NVVM_GridDimXOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.nctaid.x">; +def NVVM_GridDimYOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.nctaid.y">; +def NVVM_GridDimZOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.nctaid.z">; //===----------------------------------------------------------------------===// // CTA Cluster index and range -def NVVM_ClusterIdXOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.clusterid.x">; -def NVVM_ClusterIdYOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.clusterid.y">; -def NVVM_ClusterIdZOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.clusterid.z">; -def NVVM_ClusterDimXOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.nclusterid.x">; -def NVVM_ClusterDimYOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.nclusterid.y">; -def NVVM_ClusterDimZOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.nclusterid.z">; +def NVVM_ClusterIdXOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.clusterid.x">; +def NVVM_ClusterIdYOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.clusterid.y">; +def NVVM_ClusterIdZOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.clusterid.z">; +def NVVM_ClusterDimXOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.nclusterid.x">; +def NVVM_ClusterDimYOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.nclusterid.y">; +def NVVM_ClusterDimZOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.nclusterid.z">; //===----------------------------------------------------------------------===// // CTA index and range within Cluster -def NVVM_BlockInClusterIdXOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.cluster.ctaid.x">; -def NVVM_BlockInClusterIdYOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.cluster.ctaid.y">; -def NVVM_BlockInClusterIdZOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.cluster.ctaid.z">; -def NVVM_ClusterDimBlocksXOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.cluster.nctaid.x">; -def NVVM_ClusterDimBlocksYOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.cluster.nctaid.y">; -def NVVM_ClusterDimBlocksZOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.cluster.nctaid.z">; +def NVVM_BlockInClusterIdXOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.cluster.ctaid.x">; +def NVVM_BlockInClusterIdYOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.cluster.ctaid.y">; +def NVVM_BlockInClusterIdZOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.cluster.ctaid.z">; +def NVVM_ClusterDimBlocksXOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.cluster.nctaid.x">; +def NVVM_ClusterDimBlocksYOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.cluster.nctaid.y">; +def NVVM_ClusterDimBlocksZOp : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.cluster.nctaid.z">; //===----------------------------------------------------------------------===// // CTA index and across Cluster dimensions -def NVVM_ClusterId : NVVM_SpecialRegisterOp<"read.ptx.sreg.cluster.ctarank">; -def NVVM_ClusterDim : NVVM_SpecialRegisterOp<"read.ptx.sreg.cluster.nctarank">; +def NVVM_ClusterId : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.cluster.ctarank">; +def NVVM_ClusterDim : NVVM_SpecialRangeableRegisterOp<"read.ptx.sreg.cluster.nctarank">; //===----------------------------------------------------------------------===// // Clock registers diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp index 9b1be198f77a8..164622d77e6b6 100644 --- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp +++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp @@ -29,6 +29,7 @@ #include "mlir/Dialect/LLVMIR/NVVMDialect.h" #include "mlir/Dialect/Math/IR/Math.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/Dialect/NVGPU/IR/NVGPUDialect.h" #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" @@ -209,7 +210,15 @@ struct GPULaneIdOpToNVVM : ConvertOpToLLVMPattern { ConversionPatternRewriter &rewriter) const override { auto loc = op->getLoc(); MLIRContext *context = rewriter.getContext(); - Value newOp = rewriter.create(loc, rewriter.getI32Type()); + LLVM::ConstantRangeAttr bounds = nullptr; + if (std::optional upperBound = op.getUpperBound()) + bounds = rewriter.getAttr( + /*bitWidth=*/32, /*lower=*/0, upperBound->getZExtValue()); + else + bounds = rewriter.getAttr( + /*bitWidth=*/32, /*lower=*/0, /*upper=*/kWarpSize); + Value newOp = + rewriter.create(loc, rewriter.getI32Type(), bounds); // Truncate or extend the result depending on the index bitwidth specified // by the LLVMTypeConverter options. const unsigned indexBitwidth = getTypeConverter()->getIndexTypeBitwidth(); @@ -340,27 +349,40 @@ void mlir::populateGpuSubgroupReduceOpLoweringPattern( void mlir::populateGpuToNVVMConversionPatterns(LLVMTypeConverter &converter, RewritePatternSet &patterns) { + using gpu::index_lowering::IndexKind; + using gpu::index_lowering::IntrType; populateWithGenerated(patterns); patterns.add(converter); patterns.add< gpu::index_lowering::OpLowering, + NVVM::ThreadIdYOp, NVVM::ThreadIdZOp>>( + converter, IndexKind::Block, IntrType::Id); + patterns.add< gpu::index_lowering::OpLowering, + NVVM::BlockDimYOp, NVVM::BlockDimZOp>>( + converter, IndexKind::Block, IntrType::Dim); + patterns.add< gpu::index_lowering::OpLowering, - gpu::index_lowering::OpLowering, - gpu::index_lowering::OpLowering< - gpu::ClusterBlockIdOp, NVVM::BlockInClusterIdXOp, - NVVM::BlockInClusterIdYOp, NVVM::BlockInClusterIdZOp>, - gpu::index_lowering::OpLowering, - gpu::index_lowering::OpLowering, - gpu::index_lowering::OpLowering, - GPULaneIdOpToNVVM, GPUShuffleOpLowering, GPUReturnOpLowering>(converter); + NVVM::ClusterIdYOp, NVVM::ClusterIdZOp>>( + converter, IndexKind::Other, IntrType::Id); + patterns.add>(converter, IndexKind::Other, IntrType::Dim); + patterns.add>( + converter, IndexKind::Other, IntrType::Id); + patterns.add>(converter, IndexKind::Other, IntrType::Dim); + patterns.add>( + converter, IndexKind::Block, IntrType::Id); + patterns.add>( + converter, IndexKind::Grid, IntrType::Dim); + patterns.add( + converter); patterns.add( converter, NVVM::kSharedMemoryAlignmentBit); diff --git a/mlir/lib/Dialect/Linalg/Transforms/WinogradConv2D.cpp b/mlir/lib/Dialect/Linalg/Transforms/WinogradConv2D.cpp index b65b18699a15a..80edf4a32c6df 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/WinogradConv2D.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/WinogradConv2D.cpp @@ -390,6 +390,8 @@ Value filterTransform(RewriterBase &rewriter, Location loc, Value filter, TransformMapKeyTy key = {m, r}; int64_t retRows = 1; Value matmulRetValue = extractFilter; + Value zero = builder.create( + loc, rewriter.getZeroAttr(elementType)); if (leftTransform) { // Get constant transform matrix G. auto it = GMatrices.find(key); @@ -399,8 +401,11 @@ Value filterTransform(RewriterBase &rewriter, Location loc, Value filter, retRows = GMatrix.rows; auto matmulType = RankedTensorType::get({retRows, filterW}, elementType); - auto init = builder.create(loc, matmulType.getShape(), - elementType); + auto empty = + builder + .create(loc, matmulType.getShape(), elementType) + .getResult(); + auto init = builder.create(loc, zero, empty).getResult(0); Value G = create2DTransformMatrix(builder, loc, GMatrix, elementType); // Multiply G x g. @@ -418,8 +423,11 @@ Value filterTransform(RewriterBase &rewriter, Location loc, Value filter, auto matmulType = RankedTensorType::get({retRows, GTMatrix.cols}, elementType); - auto init = builder.create(loc, matmulType.getShape(), - elementType); + auto empty = + builder + .create(loc, matmulType.getShape(), elementType) + .getResult(); + auto init = builder.create(loc, zero, empty).getResult(0); Value GT = create2DTransformMatrix(builder, loc, GTMatrix, elementType); // Multiply u = (G x g) x GT. @@ -523,6 +531,8 @@ Value inputTransform(RewriterBase &rewriter, Location loc, Value input, int64_t retRows = 1; int64_t retCols = 1; Value matmulRetValue = extractInput; + Value zero = builder.create( + loc, rewriter.getZeroAttr(elementType)); if (leftTransform) { // Get constant transform matrix BT. auto it = BTMatrices.find(key); @@ -532,8 +542,11 @@ Value inputTransform(RewriterBase &rewriter, Location loc, Value input, retRows = BTMatrix.rows; auto matmulType = RankedTensorType::get({retRows, alphaW}, elementType); - auto init = builder.create(loc, matmulType.getShape(), - elementType); + auto empty = + builder + .create(loc, matmulType.getShape(), elementType) + .getResult(); + auto init = builder.create(loc, zero, empty).getResult(0); Value BT = create2DTransformMatrix(builder, loc, BTMatrix, builder.getF32Type()); @@ -552,8 +565,11 @@ Value inputTransform(RewriterBase &rewriter, Location loc, Value input, retCols = BMatrix.cols; auto matmulType = RankedTensorType::get({retRows, retCols}, elementType); - auto init = builder.create(loc, matmulType.getShape(), - elementType); + auto empty = + builder + .create(loc, matmulType.getShape(), elementType) + .getResult(); + auto init = builder.create(loc, zero, empty).getResult(0); Value B = create2DTransformMatrix(builder, loc, BMatrix, builder.getF32Type()); // Multiply v = (BT x d) x B. @@ -636,8 +652,13 @@ static Value matrixMultiply(RewriterBase &rewriter, Location loc, {inputShape[0] * inputShape[1], inputShape[2] * inputShape[3] * inputShape[4], filterShape[3]}, outputElementType); - Value init = rewriter.create(loc, matmulType.getShape(), - outputElementType); + Value empty = rewriter + .create(loc, matmulType.getShape(), + outputElementType) + .getResult(); + Value zero = rewriter.create( + loc, rewriter.getZeroAttr(outputElementType)); + Value init = rewriter.create(loc, zero, empty).getResult(0); auto matmulOp = rewriter.create( loc, matmulType, ValueRange({collapseInput, collapseFilter}), @@ -725,6 +746,8 @@ Value outputTransform(RewriterBase &rewriter, Location loc, Value value, int64_t leftScalarFactor = 1; int64_t rightScalarFactor = 1; Value matmulRetValue = extractValue; + Value zero = builder.create( + loc, rewriter.getZeroAttr(elementType)); if (leftTransform) { // Get constant transform matrix AT. auto it = ATMatrices.find(key); @@ -735,8 +758,11 @@ Value outputTransform(RewriterBase &rewriter, Location loc, Value value, leftScalarFactor = ATMatrix.scalarFactor; retRows = ATMatrix.rows; auto matmulType = RankedTensorType::get({retRows, valueW}, elementType); - auto init = builder.create(loc, matmulType.getShape(), - elementType); + auto empty = + builder + .create(loc, matmulType.getShape(), elementType) + .getResult(); + auto init = builder.create(loc, zero, empty).getResult(0); Value AT = create2DTransformMatrix(builder, loc, ATMatrix, elementType); // Multiply AT x m. @@ -756,8 +782,11 @@ Value outputTransform(RewriterBase &rewriter, Location loc, Value value, auto matmulType = RankedTensorType::get({retRows, AMatrix.cols}, elementType); retCols = AMatrix.cols; - auto init = builder.create(loc, matmulType.getShape(), - elementType); + auto empty = + builder + .create(loc, matmulType.getShape(), elementType) + .getResult(); + auto init = builder.create(loc, zero, empty).getResult(0); Value A = create2DTransformMatrix(builder, loc, AMatrix, elementType); // Multiply y = (AT x m) x A. diff --git a/mlir/lib/Target/LLVMIR/Dialect/NVVM/LLVMIRToNVVMTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/NVVM/LLVMIRToNVVMTranslation.cpp index 855abc12a909e..bc830a77f3c58 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/NVVM/LLVMIRToNVVMTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/NVVM/LLVMIRToNVVMTranslation.cpp @@ -14,6 +14,7 @@ #include "mlir/Dialect/LLVMIR/NVVMDialect.h" #include "mlir/Target/LLVMIR/ModuleImport.h" +#include "llvm/IR/ConstantRange.h" #include "llvm/IR/IntrinsicsNVPTX.h" using namespace mlir; diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir index 8f2ec289c9252..66ad1e307fc3a 100644 --- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir +++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir @@ -50,7 +50,7 @@ gpu.module @test_module_0 { %gDimZ = gpu.grid_dim z - // CHECK: = nvvm.read.ptx.sreg.laneid : i32 + // CHECK: = nvvm.read.ptx.sreg.laneid range : i32 // CHECK: = llvm.sext %{{.*}} : i32 to i64 %laneId = gpu.lane_id @@ -699,9 +699,21 @@ gpu.module @test_module_32 { } gpu.module @test_module_33 { -// CHECK-LABEL: func @kernel_with_block_size() -// CHECK: attributes {gpu.kernel, gpu.known_block_size = array, nvvm.kernel, nvvm.maxntid = array} - gpu.func @kernel_with_block_size() kernel attributes {known_block_size = array} { +// CHECK-LABEL: func @kernel_with_block_size( +// CHECK: attributes {gpu.kernel, gpu.known_block_size = array, nvvm.kernel, nvvm.maxntid = array} + gpu.func @kernel_with_block_size(%arg0: !llvm.ptr) kernel attributes {known_block_size = array} { + // CHECK: = nvvm.read.ptx.sreg.tid.x range : i32 + %0 = gpu.thread_id x + // CHECK: = nvvm.read.ptx.sreg.tid.y range : i32 + %1 = gpu.thread_id y + // CHECK: = nvvm.read.ptx.sreg.tid.z range : i32 + %2 = gpu.thread_id z + + // Fake usage to prevent dead code elimination + %3 = arith.addi %0, %1 : index + %4 = arith.addi %3, %2 : index + %5 = arith.index_cast %4 : index to i64 + llvm.store %5, %arg0 : i64, !llvm.ptr gpu.return } } @@ -917,6 +929,20 @@ gpu.module @test_module_48 { } } +gpu.module @test_module_49 { +// CHECK-LABEL: func @explicit_id_bounds() + func.func @explicit_id_bounds() -> (index, index, index) { + // CHECK: = nvvm.read.ptx.sreg.tid.x range : i32 + %0 = gpu.thread_id x upper_bound 32 + // CHECK: = nvvm.read.ptx.sreg.ntid.x range : i32 + %1 = gpu.block_dim x upper_bound 32 + // CHECK: = nvvm.read.ptx.sreg.laneid range : i32 + %2 = gpu.lane_id upper_bound 16 + + return %0, %1, %2 : index, index, index + } +} + module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%toplevel_module: !transform.any_op {transform.readonly}) { %gpu_module = transform.structured.match ops{["gpu.module"]} in %toplevel_module diff --git a/mlir/test/Dialect/Linalg/transform-tile-and-winograd-rewrite.mlir b/mlir/test/Dialect/Linalg/transform-tile-and-winograd-rewrite.mlir index 6bb3fb1423edc..c5760acf94a88 100644 --- a/mlir/test/Dialect/Linalg/transform-tile-and-winograd-rewrite.mlir +++ b/mlir/test/Dialect/Linalg/transform-tile-and-winograd-rewrite.mlir @@ -36,6 +36,13 @@ module attributes {transform.with_named_sequence} { // CHECK-LABEL: func.func @conv2d // CHECK-SAME: (%[[ARG0:.*]]: tensor<2x10x10x5xf32>, %[[ARG1:.*]]: tensor<2x3x3x5xf32>, %[[ARG2:.*]]: tensor<2x8x8x2xf32>) -> tensor<2x8x8x2xf32> { // CHECK: %[[CST:.*]] = arith.constant 1.024000e+03 : f32 +// CHECK: %[[CST_0:.*]] = arith.constant dense<{{.*}}> : tensor<6x4xf32> +// CHECK: %[[CST_1:.*]] = arith.constant dense<{{.*}}> : tensor<4x6xf32> +// CHECK: %[[CST_2:.*]] = arith.constant dense<{{.*}}> : tensor<6x6xf32> +// CHECK: %[[CST_3:.*]] = arith.constant dense<{{.*}}> : tensor<6x6xf32> +// CHECK: %[[CST_4:.*]] = arith.constant dense<{{.*}}> : tensor<3x6xf32> +// CHECK: %[[CST_5:.*]] = arith.constant dense<{{.*}}> : tensor<6x3xf32> +// CHECK: %[[CST_6:.*]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[C1:.*]] = arith.constant 1 : index // CHECK: %[[C5:.*]] = arith.constant 5 : index // CHECK: %[[C2:.*]] = arith.constant 2 : index @@ -44,9 +51,13 @@ module attributes {transform.with_named_sequence} { // CHECK: %[[S1:.*]] = scf.for %[[ARG3:.*]] = %[[C0]] to %[[C2]] step %[[C1]] iter_args(%[[ARG4:.*]] = %[[S0]]) // CHECK: %[[S9:.*]] = scf.for %[[ARG5:.*]] = %[[C0]] to %[[C5]] step %[[C1]] iter_args(%[[ARG6:.*]] = %[[ARG4]]) // CHECK: %[[EXTRACTED_SLICE:.*]] = tensor.extract_slice %[[ARG1]][%[[ARG3]], 0, 0, %[[ARG5]]] [1, 3, 3, 1] [1, 1, 1, 1] -// CHECK: %[[S11:.*]] = linalg.matmul -// CHECK: %[[S13:.*]] = linalg.matmul -// CHECK: %[[INSERTED_SLICE:.*]] = tensor.insert_slice %[[S13]] into %[[ARG6]][0, 0, %[[ARG5]], %[[ARG3]]] [6, 6, 1, 1] [1, 1, 1, 1] +// CHECK: %[[S10:.*]] = tensor.empty() : tensor<6x3xf32> +// CHECK: %[[S11:.*]] = linalg.fill ins(%[[CST_6]] : f32) outs(%[[S10]] : tensor<6x3xf32>) -> tensor<6x3xf32> +// CHECK: %[[S12:.*]] = linalg.matmul ins(%[[CST_5]], %[[EXTRACTED_SLICE]] : tensor<6x3xf32>, tensor<3x3xf32>) outs(%[[S11]] : tensor<6x3xf32>) -> tensor<6x3xf32> +// CHECK: %[[S13:.*]] = tensor.empty() : tensor<6x6xf32> +// CHECK: %[[S14:.*]] = linalg.fill ins(%[[CST_6]] : f32) outs(%[[S13]] : tensor<6x6xf32>) -> tensor<6x6xf32> +// CHECK: %[[S15:.*]] = linalg.matmul ins(%[[S12]], %[[CST_4]] : tensor<6x3xf32>, tensor<3x6xf32>) outs(%[[S14]] : tensor<6x6xf32>) -> tensor<6x6xf32> +// CHECK: %[[INSERTED_SLICE:.*]] = tensor.insert_slice %[[S15]] into %[[ARG6]][0, 0, %[[ARG5]], %[[ARG3]]] [6, 6, 1, 1] [1, 1, 1, 1] // CHECK: scf.yield %[[INSERTED_SLICE]] // CHECK: scf.yield %[[S9]] // CHECK: %[[S2:.*]] = tensor.empty() : tensor<6x6x2x2x2x5xf32> @@ -60,9 +71,13 @@ module attributes {transform.with_named_sequence} { // CHECK: %[[S12:.*]] = scf.for %[[ARG7:.*]] = %[[C0]] to %[[C2]] step %[[C1]] iter_args(%[[ARG8:.*]] = %[[EXTRACTED_SLICE_7]]) // CHECK: %[[S13:.*]] = scf.for %[[ARG9:.*]] = %[[C0]] to %[[C5]] step %[[C1]] iter_args(%[[ARG10:.*]] = %[[ARG8]]) // CHECK: %[[EXTRACTED_SLICE_8:.*]] = tensor.extract_slice %[[EXTRACTED_SLICE]][%[[ARG7]], 0, 0, %[[ARG9]]] [1, 6, 6, 1] [1, 1, 1, 1] -// CHECK: %[[S15:.*]] = linalg.matmul -// CHECK: %[[S17:.*]] = linalg.matmul -// CHECK: %[[INSERTED_SLICE_9:.*]] = tensor.insert_slice %[[S17]] into %[[ARG10]][0, 0, 0, 0, %[[ARG7]], %[[ARG9]]] [6, 6, 1, 1, 1, 1] [1, 1, 1, 1, 1, 1] +// CHECK: %[[S14:.*]] = tensor.empty() : tensor<6x6xf32> +// CHECK: %[[S15:.*]] = linalg.fill ins(%[[CST_6]] : f32) outs(%[[S14]] : tensor<6x6xf32>) -> tensor<6x6xf32> +// CHECK: %[[S16:.*]] = linalg.matmul ins(%[[CST_3]], %[[EXTRACTED_SLICE_8]] : tensor<6x6xf32>, tensor<6x6xf32>) outs(%[[S15]] : tensor<6x6xf32>) -> tensor<6x6xf32> +// CHECK: %[[S17:.*]] = tensor.empty() : tensor<6x6xf32> +// CHECK: %[[S18:.*]] = linalg.fill ins(%[[CST_6]] : f32) outs(%[[S17]] : tensor<6x6xf32>) -> tensor<6x6xf32> +// CHECK: %[[S19:.*]] = linalg.matmul ins(%[[S16]], %[[CST_2]] : tensor<6x6xf32>, tensor<6x6xf32>) outs(%[[S18]] : tensor<6x6xf32>) -> tensor<6x6xf32> +// CHECK: %[[INSERTED_SLICE_9:.*]] = tensor.insert_slice %[[S19]] into %[[ARG10]][0, 0, 0, 0, %[[ARG7]], %[[ARG9]]] [6, 6, 1, 1, 1, 1] [1, 1, 1, 1, 1, 1] // CHECK: scf.yield %[[INSERTED_SLICE_9]] // CHECK: scf.yield %[[S13]] // CHECK: %[[INSERTED_SLICE:.*]] = tensor.insert_slice %[[S12]] into %[[ARG6]][0, 0, %[[ARG3]], %[[ARG5]], 0, 0] [6, 6, 1, 1, 2, 5] [1, 1, 1, 1, 1, 1] @@ -82,15 +97,19 @@ module attributes {transform.with_named_sequence} { // CHECK: %[[S12:.*]] = scf.for %[[ARG7:.*]] = %[[C0]] to %[[C2]] step %[[C1]] iter_args(%[[ARG8:.*]] = %[[EXTRACTED_SLICE_7]]) // CHECK: %[[S15:.*]] = scf.for %[[ARG9:.*]] = %[[C0]] to %[[C2]] step %[[C1]] iter_args(%[[ARG10:.*]] = %[[ARG8]]) // CHECK: %[[EXTRACTED_SLICE_8:.*]] = tensor.extract_slice %[[EXTRACTED_SLICE]][0, 0, 0, 0, %[[ARG7]], %[[ARG9]]] [6, 6, 1, 1, 1, 1] [1, 1, 1, 1, 1, 1] -// CHECK: %[[S17:.*]] = linalg.matmul -// CHECK: %[[S19:.*]] = linalg.matmul -// CHECK: %[[S20:.*]] = tensor.empty() -// CHECK: %[[S21:.*]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%[[CST]] : f32) outs(%[[S20]] : tensor<4x4xf32>) { +// CHECK: %[[S16:.*]] = tensor.empty() : tensor<4x6xf32> +// CHECK: %[[S17:.*]] = linalg.fill ins(%[[CST_6]] : f32) outs(%[[S16]] : tensor<4x6xf32>) -> tensor<4x6xf32> +// CHECK: %[[S18:.*]] = linalg.matmul ins(%[[CST_1]], %[[EXTRACTED_SLICE_8]] : tensor<4x6xf32>, tensor<6x6xf32>) outs(%[[S17]] : tensor<4x6xf32>) -> tensor<4x6xf32> +// CHECK: %[[S19:.*]] = tensor.empty() : tensor<4x4xf32> +// CHECK: %[[S20:.*]] = linalg.fill ins(%[[CST_6]] : f32) outs(%[[S19]] : tensor<4x4xf32>) -> tensor<4x4xf32> +// CHECK: %[[S21:.*]] = linalg.matmul ins(%[[S18]], %[[CST_0]] : tensor<4x6xf32>, tensor<6x4xf32>) outs(%[[S20]] : tensor<4x4xf32>) -> tensor<4x4xf32> +// CHECK: %[[S22:.*]] = tensor.empty() : tensor<4x4xf32> +// CHECK: %[[S23:.*]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%[[CST]] : f32) outs(%[[S22]] : tensor<4x4xf32>) { // CHECK: ^bb0(%[[IN:.*]]: f32, %[[OUT:.*]]: f32): // CHECK: linalg.yield %[[IN]] : f32 // CHECK: } -> tensor<4x4xf32> -// CHECK: %[[S22:.*]] = linalg.mul ins(%[[S21]], %[[S19]] : tensor<4x4xf32>, tensor<4x4xf32>) outs(%[[S20]] : tensor<4x4xf32>) -> tensor<4x4xf32> -// CHECK: %[[INSERTED_SLICE_9:.*]] = tensor.insert_slice %[[S22]] into %[[ARG10]][%[[ARG7]], 0, 0, %[[ARG9]]] [1, 4, 4, 1] [1, 1, 1, 1] +// CHECK: %[[S24:.*]] = linalg.mul ins(%[[S23]], %[[S21]] : tensor<4x4xf32>, tensor<4x4xf32>) outs(%[[S22]] : tensor<4x4xf32>) -> tensor<4x4xf32> +// CHECK: %[[INSERTED_SLICE_9:.*]] = tensor.insert_slice %[[S24]] into %[[ARG10]][%[[ARG7]], 0, 0, %[[ARG9]]] [1, 4, 4, 1] [1, 1, 1, 1] // CHECK: scf.yield %[[INSERTED_SLICE_9]] // CHECK: scf.yield %[[S15]] // CHECK: %[[S13:.*]] = affine.apply #[[$MAP0]](%[[ARG3]]) @@ -114,14 +133,15 @@ func.func @conv2d_unaligned(%arg0: tensor<2x11x11x5xf32>, %arg1: tensor<2x3x3x5x %collapsed = tensor.collapse_shape %1 [[0, 1], [2], [3]] : tensor<6x6x5x2xf32> into tensor<36x5x2xf32> %collapsed_0 = tensor.collapse_shape %3 [[0, 1], [2, 3, 4], [5]] : tensor<6x6x3x3x2x5xf32> into tensor<36x18x5xf32> %4 = tensor.empty() : tensor<36x18x2xf32> - %5 = linalg.batch_matmul ins(%collapsed_0, %collapsed : tensor<36x18x5xf32>, tensor<36x5x2xf32>) outs(%4 : tensor<36x18x2xf32>) -> tensor<36x18x2xf32> - %expanded = tensor.expand_shape %5 [[0, 1], [2, 3, 4], [5]] output_shape [6, 6, 3, 3, 2, 2] : tensor<36x18x2xf32> into tensor<6x6x3x3x2x2xf32> + %5 = linalg.fill ins(%cst : f32) outs(%4 : tensor<36x18x2xf32>) -> tensor<36x18x2xf32> + %6 = linalg.batch_matmul ins(%collapsed_0, %collapsed : tensor<36x18x5xf32>, tensor<36x5x2xf32>) outs(%5 : tensor<36x18x2xf32>) -> tensor<36x18x2xf32> + %expanded = tensor.expand_shape %6 [[0, 1], [2, 3, 4], [5]] output_shape [6, 6, 3, 3, 2, 2] : tensor<36x18x2xf32> into tensor<6x6x3x3x2x2xf32> %padded_1 = tensor.pad %arg2 low[0, 0, 0, 0] high[0, 3, 3, 0] { ^bb0(%arg4: index, %arg5: index, %arg6: index, %arg7: index): tensor.yield %cst : f32 } : tensor<2x9x9x2xf32> to tensor<2x12x12x2xf32> - %6 = linalg.winograd_output_transform m(4) r(3) ins(%expanded : tensor<6x6x3x3x2x2xf32>) outs(%padded_1 : tensor<2x12x12x2xf32>) -> tensor<2x12x12x2xf32> - %extracted_slice = tensor.extract_slice %6[0, 0, 0, 0] [2, 9, 9, 2] [1, 1, 1, 1] : tensor<2x12x12x2xf32> to tensor<2x9x9x2xf32> + %7 = linalg.winograd_output_transform m(4) r(3) ins(%expanded : tensor<6x6x3x3x2x2xf32>) outs(%padded_1 : tensor<2x12x12x2xf32>) -> tensor<2x12x12x2xf32> + %extracted_slice = tensor.extract_slice %7[0, 0, 0, 0] [2, 9, 9, 2] [1, 1, 1, 1] : tensor<2x12x12x2xf32> to tensor<2x9x9x2xf32> return %extracted_slice : tensor<2x9x9x2xf32> } @@ -147,18 +167,29 @@ module attributes {transform.with_named_sequence} { // CHECK-LABEL: func.func @conv2d_unaligned // CHECK-SAME: (%[[ARG0:.*]]: tensor<2x11x11x5xf32>, %[[ARG1:.*]]: tensor<2x3x3x5xf32>, %[[ARG2:.*]]: tensor<2x9x9x2xf32>) -> tensor<2x9x9x2xf32> { // CHECK: %[[CST:.*]] = arith.constant 1.024000e+03 : f32 +// CHECK: %[[CST_0:.*]] = arith.constant dense<{{.*}}> : tensor<6x4xf32> +// CHECK: %[[CST_1:.*]] = arith.constant dense<{{.*}}> : tensor<4x6xf32> +// CHECK: %[[CST_2:.*]] = arith.constant dense<{{.*}}> : tensor<6x6xf32> +// CHECK: %[[CST_3:.*]] = arith.constant dense<{{.*}}> : tensor<6x6xf32> // CHECK: %[[C3:.*]] = arith.constant 3 : index +// CHECK: %[[CST_4:.*]] = arith.constant dense<{{.*}}> : tensor<3x6xf32> +// CHECK: %[[CST_5:.*]] = arith.constant dense<{{.*}}> : tensor<6x3xf32> // CHECK: %[[C1:.*]] = arith.constant 1 : index // CHECK: %[[C5:.*]] = arith.constant 5 : index // CHECK: %[[C2:.*]] = arith.constant 2 : index // CHECK: %[[C0:.*]] = arith.constant 0 : index +// CHECK: %[[CST_6:.*]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[S0:.*]] = tensor.empty() // CHECK: %[[S1:.*]] = scf.for %[[ARG4:.*]] = %[[C0]] to %[[C2]] step %[[C1]] iter_args(%[[ARG5:.*]] = %[[S0]]) // CHECK: %[[S9:.*]] = scf.for %[[ARG6:.*]] = %[[C0]] to %[[C5]] step %[[C1]] iter_args(%[[ARG7:.*]] = %[[ARG5]]) // CHECK: %[[EXTRACTED_SLICE_9:.*]] = tensor.extract_slice %[[ARG1]][%[[ARG4]], 0, 0, %[[ARG6]]] [1, 3, 3, 1] [1, 1, 1, 1] -// CHECK: %[[S11:.*]] = linalg.matmul -// CHECK: %[[S13:.*]] = linalg.matmul -// CHECK: %[[INSERTED_SLICE:.*]] = tensor.insert_slice %[[S13]] into %[[ARG7]][0, 0, %[[ARG6]], %[[ARG4]]] [6, 6, 1, 1] [1, 1, 1, 1] +// CHECK: %[[S11:.*]] = tensor.empty() : tensor<6x3xf32> +// CHECK: %[[S12:.*]] = linalg.fill ins(%[[CST_6]] : f32) outs(%[[S11]] : tensor<6x3xf32>) -> tensor<6x3xf32> +// CHECK: %[[S13:.*]] = linalg.matmul ins(%[[CST_5]], %[[EXTRACTED_SLICE_9]] : tensor<6x3xf32>, tensor<3x3xf32>) outs(%[[S12]] : tensor<6x3xf32>) -> tensor<6x3xf32> +// CHECK: %[[S14:.*]] = tensor.empty() : tensor<6x6xf32> +// CHECK: %[[S15:.*]] = linalg.fill ins(%[[CST_6]] : f32) outs(%[[S14]] : tensor<6x6xf32>) -> tensor<6x6xf32> +// CHECK: %[[S16:.*]] = linalg.matmul ins(%[[S13]], %[[CST_4]] : tensor<6x3xf32>, tensor<3x6xf32>) outs(%[[S15]] : tensor<6x6xf32>) -> tensor<6x6xf32> +// CHECK: %[[INSERTED_SLICE:.*]] = tensor.insert_slice %[[S16]] into %[[ARG7]][0, 0, %[[ARG6]], %[[ARG4]]] [6, 6, 1, 1] [1, 1, 1, 1] // CHECK: scf.yield %[[INSERTED_SLICE]] : tensor<6x6x5x2xf32> // CHECK: scf.yield %[[S9]] : tensor<6x6x5x2xf32> // CHECK: %[[PADDED:.*]] = tensor.pad %[[ARG0]] low[0, 0, 0, 0] high[0, 3, 3, 0] @@ -173,9 +204,13 @@ module attributes {transform.with_named_sequence} { // CHECK: %[[S12:.*]] = scf.for %[[ARG8:.*]] = %[[C0]] to %[[C2]] step %[[C1]] iter_args(%[[ARG9:.*]] = %[[EXTRACTED_SLICE_10]]) // CHECK: %[[S13:.*]] = scf.for %[[ARG10:.*]] = %[[C0]] to %[[C5]] step %[[C1]] iter_args(%[[ARG11:.*]] = %[[ARG9]]) // CHECK: %[[EXTRACTED_SLICE_11:.*]] = tensor.extract_slice %[[EXTRACTED_SLICE_9]][%[[ARG8]], 0, 0, %[[ARG10]]] [1, 6, 6, 1] [1, 1, 1, 1] -// CHECK: %[[S15:.*]] = linalg.matmul -// CHECK: %[[S17:.*]] = linalg.matmul -// CHECK: %[[INSERTED_SLICE_12:.*]] = tensor.insert_slice %[[S17]] into %[[ARG11]][0, 0, 0, 0, %[[ARG8]], %[[ARG10]]] [6, 6, 1, 1, 1, 1] [1, 1, 1, 1, 1, 1] +// CHECK: %[[S15:.*]] = tensor.empty() : tensor<6x6xf32> +// CHECK: %[[S16:.*]] = linalg.fill ins(%[[CST_6]] : f32) outs(%[[S15]] : tensor<6x6xf32>) -> tensor<6x6xf32> +// CHECK: %[[S17:.*]] = linalg.matmul ins(%[[CST_3]], %[[EXTRACTED_SLICE_11]] : tensor<6x6xf32>, tensor<6x6xf32>) outs(%[[S16]] : tensor<6x6xf32>) -> tensor<6x6xf32> +// CHECK: %[[S18:.*]] = tensor.empty() : tensor<6x6xf32> +// CHECK: %[[S19:.*]] = linalg.fill ins(%[[CST_6]] : f32) outs(%[[S18]] : tensor<6x6xf32>) -> tensor<6x6xf32> +// CHECK: %[[S20:.*]] = linalg.matmul ins(%[[S17]], %[[CST_2]] : tensor<6x6xf32>, tensor<6x6xf32>) outs(%[[S19]] : tensor<6x6xf32>) -> tensor<6x6xf32> +// CHECK: %[[INSERTED_SLICE_12:.*]] = tensor.insert_slice %[[S20]] into %[[ARG11]][0, 0, 0, 0, %[[ARG8]], %[[ARG10]]] [6, 6, 1, 1, 1, 1] [1, 1, 1, 1, 1, 1] // CHECK: scf.yield %[[INSERTED_SLICE_12]] : tensor<6x6x1x1x2x5xf32> // CHECK: scf.yield %[[S13]] : tensor<6x6x1x1x2x5xf32> // CHECK: %[[INSERTED_SLICE:.*]] = tensor.insert_slice %[[S12]] into %[[ARG7]][0, 0, %[[ARG4]], %[[ARG6]], 0, 0] [6, 6, 1, 1, 2, 5] [1, 1, 1, 1, 1, 1] @@ -196,15 +231,19 @@ module attributes {transform.with_named_sequence} { // CHECK: %[[S12:.*]] = scf.for %[[ARG8:.*]] = %[[C0]] to %[[C2]] step %[[C1]] iter_args(%[[ARG9:.*]] = %[[EXTRACTED_SLICE_10]]) // CHECK: %[[S15:.*]] = scf.for %[[ARG10:.*]] = %[[C0]] to %[[C2]] step %[[C1]] iter_args(%[[ARG11:.*]] = %[[ARG9]]) // CHECK: %[[EXTRACTED_SLICE_11:.*]] = tensor.extract_slice %[[EXTRACTED_SLICE_9]][0, 0, 0, 0, %[[ARG8]], %[[ARG10]]] [6, 6, 1, 1, 1, 1] [1, 1, 1, 1, 1, 1] -// CHECK: %[[S17:.*]] = linalg.matmul -// CHECK: %[[S19:.*]] = linalg.matmul +// CHECK: %[[S17:.*]] = tensor.empty() : tensor<4x6xf32> +// CHECK: %[[S18:.*]] = linalg.fill ins(%[[CST_6]] : f32) outs(%[[S17]] : tensor<4x6xf32>) -> tensor<4x6xf32> +// CHECK: %[[S19:.*]] = linalg.matmul ins(%[[CST_1]], %[[EXTRACTED_SLICE_11]] : tensor<4x6xf32>, tensor<6x6xf32>) outs(%[[S18]] : tensor<4x6xf32>) -> tensor<4x6xf32> // CHECK: %[[S20:.*]] = tensor.empty() : tensor<4x4xf32> -// CHECK: %[[S21:.*]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%[[CST]] : f32) outs(%[[S20]] : tensor<4x4xf32>) { +// CHECK: %[[S21:.*]] = linalg.fill ins(%[[CST_6]] : f32) outs(%[[S20]] : tensor<4x4xf32>) -> tensor<4x4xf32> +// CHECK: %[[S22:.*]] = linalg.matmul ins(%[[S19]], %[[CST_0]] : tensor<4x6xf32>, tensor<6x4xf32>) outs(%[[S21]] : tensor<4x4xf32>) -> tensor<4x4xf32> +// CHECK: %[[S23:.*]] = tensor.empty() : tensor<4x4xf32> +// CHECK: %[[S24:.*]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%[[CST]] : f32) outs(%[[S23]] : tensor<4x4xf32>) { // CHECK: ^bb0(%[[IN:.*]]: f32, %[[OUT:.*]]: f32): // CHECK: linalg.yield %[[IN]] : f32 // CHECK: } -> tensor<4x4xf32> -// CHECK: %[[S22:.*]] = linalg.mul ins(%[[S21]], %[[S19]] : tensor<4x4xf32>, tensor<4x4xf32>) outs(%[[S20]] : tensor<4x4xf32>) -> tensor<4x4xf32> -// CHECK: %[[INSERTED_SLICE_12:.*]] = tensor.insert_slice %[[S22]] into %[[ARG11]][%[[ARG8]], 0, 0, %[[ARG10]]] [1, 4, 4, 1] [1, 1, 1, 1] +// CHECK: %[[S25:.*]] = linalg.mul ins(%[[S24]], %[[S22]] : tensor<4x4xf32>, tensor<4x4xf32>) outs(%[[S23]] : tensor<4x4xf32>) -> tensor<4x4xf32> +// CHECK: %[[INSERTED_SLICE_12:.*]] = tensor.insert_slice %[[S25]] into %[[ARG11]][%[[ARG8]], 0, 0, %[[ARG10]]] [1, 4, 4, 1] [1, 1, 1, 1] // CHECK: scf.yield %[[INSERTED_SLICE_12]] // CHECK: scf.yield %[[S15]] : tensor<2x4x4x2xf32> // CHECK: %[[S13:.*]] = affine.apply #[[$MAP0]](%[[ARG4]]) @@ -218,6 +257,7 @@ module attributes {transform.with_named_sequence} { // ----- func.func @conv2d_mx1_rx1(%arg0: tensor<2x6x1x5xf32>, %arg1: tensor<2x3x1x5xf32>, %arg2: tensor<2x4x1x2xf32>) -> tensor<2x4x1x2xf32> { + %cst = arith.constant 0.000000e+00 : f32 %0 = tensor.empty() : tensor<6x1x5x2xf32> %1 = linalg.winograd_filter_transform m(4) r(3) ins(%arg1 : tensor<2x3x1x5xf32>) outs(%0 : tensor<6x1x5x2xf32>) -> tensor<6x1x5x2xf32> %2 = tensor.empty() : tensor<6x1x1x1x2x5xf32> @@ -225,10 +265,11 @@ func.func @conv2d_mx1_rx1(%arg0: tensor<2x6x1x5xf32>, %arg1: tensor<2x3x1x5xf32> %collapsed = tensor.collapse_shape %1 [[0, 1], [2], [3]] : tensor<6x1x5x2xf32> into tensor<6x5x2xf32> %collapsed_0 = tensor.collapse_shape %3 [[0, 1], [2, 3, 4], [5]] : tensor<6x1x1x1x2x5xf32> into tensor<6x2x5xf32> %4 = tensor.empty() : tensor<6x2x2xf32> - %5 = linalg.batch_matmul ins(%collapsed_0, %collapsed : tensor<6x2x5xf32>, tensor<6x5x2xf32>) outs(%4 : tensor<6x2x2xf32>) -> tensor<6x2x2xf32> - %expanded = tensor.expand_shape %5 [[0, 1], [2, 3, 4], [5]] output_shape [6, 1, 1, 1, 2, 2] : tensor<6x2x2xf32> into tensor<6x1x1x1x2x2xf32> - %6 = linalg.winograd_output_transform m(4) r(3) ins(%expanded : tensor<6x1x1x1x2x2xf32>) outs(%arg2 : tensor<2x4x1x2xf32>) -> tensor<2x4x1x2xf32> - return %6 : tensor<2x4x1x2xf32> + %5 = linalg.fill ins(%cst : f32) outs(%4 : tensor<6x2x2xf32>) -> tensor<6x2x2xf32> + %6 = linalg.batch_matmul ins(%collapsed_0, %collapsed : tensor<6x2x5xf32>, tensor<6x5x2xf32>) outs(%5 : tensor<6x2x2xf32>) -> tensor<6x2x2xf32> + %expanded = tensor.expand_shape %6 [[0, 1], [2, 3, 4], [5]] output_shape [6, 1, 1, 1, 2, 2] : tensor<6x2x2xf32> into tensor<6x1x1x1x2x2xf32> + %7 = linalg.winograd_output_transform m(4) r(3) ins(%expanded : tensor<6x1x1x1x2x2xf32>) outs(%arg2 : tensor<2x4x1x2xf32>) -> tensor<2x4x1x2xf32> + return %7 : tensor<2x4x1x2xf32> } module attributes {transform.with_named_sequence} { @@ -252,41 +293,53 @@ module attributes {transform.with_named_sequence} { // CHECK-LABEL: func.func @conv2d_mx1_rx1 // CHECK-SAME: (%[[ARG0:.*]]: tensor<2x6x1x5xf32>, %[[ARG1:.*]]: tensor<2x3x1x5xf32>, %[[ARG2:.*]]: tensor<2x4x1x2xf32>) -> tensor<2x4x1x2xf32> { // CHECK: %[[CST:.*]] = arith.constant 3.200000e+01 : f32 +// CHECK: %[[CST_0:.*]] = arith.constant dense<{{.*}}> : tensor<4x6xf32> +// CHECK: %[[CST_1:.*]] = arith.constant dense<{{.*}}> : tensor<6x6xf32> +// CHECK: %[[CST_2:.*]] = arith.constant dense<{{.*}}> : tensor<6x3xf32> // CHECK: %[[C1:.*]] = arith.constant 1 : index // CHECK: %[[C5:.*]] = arith.constant 5 : index // CHECK: %[[C2:.*]] = arith.constant 2 : index // CHECK: %[[C0:.*]] = arith.constant 0 : index +// CHECK: %[[CST_3:.*]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[S0:.*]] = tensor.empty() : tensor<6x1x5x2xf32> // CHECK: %[[S1:.*]] = scf.for %[[ARG3:.*]] = %[[C0]] to %[[C2]] step %[[C1]] iter_args(%[[ARG4:.*]] = %[[S0]]) // CHECK: %[[S7:.*]] = scf.for %[[ARG5:.*]] = %[[C0]] to %[[C5]] step %[[C1]] iter_args(%[[ARG6:.*]] = %[[ARG4]]) // CHECK: %[[EXTRACTED_SLICE:.*]] = tensor.extract_slice %[[ARG1]][%[[ARG3]], 0, 0, %[[ARG5]]] [1, 3, 1, 1] [1, 1, 1, 1] -// CHECK: %[[S9:.*]] = linalg.matmul -// CHECK: %[[INSERTED_SLICE:.*]] = tensor.insert_slice %[[S9]] into %[[ARG6]][0, 0, %[[ARG5]], %[[ARG3]]] [6, 1, 1, 1] [1, 1, 1, 1] +// CHECK: %[[S8:.*]] = tensor.empty() : tensor<6x1xf32> +// CHECK: %[[S9:.*]] = linalg.fill ins(%[[CST_3]] : f32) outs(%[[S8]] : tensor<6x1xf32>) -> tensor<6x1xf32> +// CHECK: %[[S10:.*]] = linalg.matmul ins(%[[CST_2]], %[[EXTRACTED_SLICE]] : tensor<6x3xf32>, tensor<3x1xf32>) outs(%[[S9]] : tensor<6x1xf32>) -> tensor<6x1xf32> +// CHECK: %[[INSERTED_SLICE:.*]] = tensor.insert_slice %[[S10]] into %[[ARG6]][0, 0, %[[ARG5]], %[[ARG3]]] [6, 1, 1, 1] [1, 1, 1, 1] // CHECK: scf.yield %[[INSERTED_SLICE]] // CHECK: scf.yield %[[S7]] // CHECK: %[[S2:.*]] = tensor.empty() : tensor<6x1x1x1x2x5xf32> // CHECK: %[[S3:.*]] = scf.for %[[ARG3:.*]] = %[[C0]] to %[[C2]] step %[[C1]] iter_args(%[[ARG4:.*]] = %[[S2]]) // CHECK: %[[S7:.*]] = scf.for %[[ARG5:.*]] = %[[C0]] to %[[C5]] step %[[C1]] iter_args(%[[ARG6:.*]] = %[[ARG4]]) // CHECK: %[[EXTRACTED_SLICE:.*]] = tensor.extract_slice %[[ARG0]][%[[ARG3]], 0, 0, %[[ARG5]]] [1, 6, 1, 1] [1, 1, 1, 1] -// CHECK: %[[S9:.*]] = linalg.matmul -// CHECK: %[[INSERTED_SLICE:.*]] = tensor.insert_slice %[[S9]] into %[[ARG6]][0, 0, 0, 0, %[[ARG3]], %[[ARG5]]] [6, 1, 1, 1, 1, 1] [1, 1, 1, 1, 1, 1] +// CHECK: %[[S8:.*]] = tensor.empty() : tensor<6x1xf32> +// CHECK: %[[S9:.*]] = linalg.fill ins(%[[CST_3]] : f32) outs(%[[S8]] : tensor<6x1xf32>) -> tensor<6x1xf32> +// CHECK: %[[S10:.*]] = linalg.matmul ins(%[[CST_1]], %[[EXTRACTED_SLICE]] : tensor<6x6xf32>, tensor<6x1xf32>) outs(%[[S9]] : tensor<6x1xf32>) -> tensor<6x1xf32> +// CHECK: %[[INSERTED_SLICE:.*]] = tensor.insert_slice %[[S10]] into %[[ARG6]][0, 0, 0, 0, %[[ARG3]], %[[ARG5]]] [6, 1, 1, 1, 1, 1] [1, 1, 1, 1, 1, 1] // CHECK: scf.yield %[[INSERTED_SLICE]] // CHECK: scf.yield %[[S7]] // CHECK: %[[COLLAPSED:.*]] = tensor.collapse_shape %[[S1]] {{\[}}[0, 1], [2], [3]] // CHECK: %[[COLLAPSED_3:.*]] = tensor.collapse_shape %[[S3]] {{\[}}[0, 1], [2, 3, 4], [5]] -// CHECK: %[[S5:.*]] = linalg.batch_matmul -// CHECK: %[[EXPANDED:.*]] = tensor.expand_shape %[[S5]] {{\[}}[0, 1], [2, 3, 4], [5]] output_shape [6, 1, 1, 1, 2, 2] +// CHECK: %[[S4:.*]] = tensor.empty() : tensor<6x2x2xf32> +// CHECK: %[[S5:.*]] = linalg.fill ins(%[[CST_3]] : f32) outs(%[[S4]] : tensor<6x2x2xf32>) -> tensor<6x2x2xf32> +// CHECK: %[[S6:.*]] = linalg.batch_matmul ins(%[[COLLAPSED_3]], %[[COLLAPSED]] : tensor<6x2x5xf32>, tensor<6x5x2xf32>) outs(%[[S5]] : tensor<6x2x2xf32>) -> tensor<6x2x2xf32> +// CHECK: %[[EXPANDED:.*]] = tensor.expand_shape %[[S6]] {{\[}}[0, 1], [2, 3, 4], [5]] output_shape [6, 1, 1, 1, 2, 2] // CHECK: %[[S6:.*]] = scf.for %[[ARG3:.*]] = %[[C0]] to %[[C2]] step %[[C1]] iter_args(%[[ARG4:.*]] = %[[ARG2]]) // CHECK: %[[S7:.*]] = scf.for %[[ARG5:.*]] = %[[C0]] to %[[C2]] step %[[C1]] iter_args(%[[ARG6:.*]] = %[[ARG4]]) // CHECK: %[[EXTRACTED_SLICE:.*]] = tensor.extract_slice %[[EXPANDED]][0, 0, 0, 0, %[[ARG3]], %[[ARG5]]] [6, 1, 1, 1, 1, 1] [1, 1, 1, 1, 1, 1] -// CHECK: %[[S9:.*]] = linalg.matmul -// CHECK: %[[S10:.*]] = tensor.empty() : tensor<4x1xf32> -// CHECK: %[[S11:.*]] = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel"]} ins(%[[CST]] : f32) outs(%[[S10]] : tensor<4x1xf32>) { +// CHECK: %[[S9:.*]] = tensor.empty() : tensor<4x1xf32> +// CHECK: %[[S10:.*]] = linalg.fill ins(%[[CST_3]] : f32) outs(%[[S9]] : tensor<4x1xf32>) -> tensor<4x1xf32> +// CHECK: %[[S11:.*]] = linalg.matmul ins(%[[CST_0]], %[[EXTRACTED_SLICE]] : tensor<4x6xf32>, tensor<6x1xf32>) outs(%[[S10]] : tensor<4x1xf32>) -> tensor<4x1xf32> +// CHECK: %[[S12:.*]] = tensor.empty() : tensor<4x1xf32> +// CHECK: %[[S13:.*]] = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel"]} ins(%[[CST]] : f32) outs(%[[S12]] : tensor<4x1xf32>) { // CHECK: ^bb0(%[[IN:.*]]: f32, %[[OUT:.*]]: f32): // CHECK: linalg.yield %[[IN]] : f32 // CHECK: } -> tensor<4x1xf32> -// CHECK: %[[S12:.*]] = linalg.mul ins(%[[S11]], %[[S9]] : tensor<4x1xf32>, tensor<4x1xf32>) outs(%[[S10]] : tensor<4x1xf32>) -> tensor<4x1xf32> -// CHECK: %[[INSERTED_SLICE:.*]] = tensor.insert_slice %[[S12]] into %[[ARG6]][%[[ARG3]], 0, 0, %[[ARG5]]] [1, 4, 1, 1] [1, 1, 1, 1] +// CHECK: %[[S14:.*]] = linalg.mul ins(%[[S13]], %[[S11]] : tensor<4x1xf32>, tensor<4x1xf32>) outs(%[[S12]] : tensor<4x1xf32>) -> tensor<4x1xf32> +// CHECK: %[[INSERTED_SLICE:.*]] = tensor.insert_slice %[[S14]] into %[[ARG6]][%[[ARG3]], 0, 0, %[[ARG5]]] [1, 4, 1, 1] [1, 1, 1, 1] // CHECK: scf.yield %[[INSERTED_SLICE]] // CHECK: scf.yield %[[S7]] // CHECK: return %[[S6]] diff --git a/mlir/test/Dialect/Linalg/winograd-conv2d-rewrite.mlir b/mlir/test/Dialect/Linalg/winograd-conv2d-rewrite.mlir index 095a6636b68dc..4369f5f1eab4c 100644 --- a/mlir/test/Dialect/Linalg/winograd-conv2d-rewrite.mlir +++ b/mlir/test/Dialect/Linalg/winograd-conv2d-rewrite.mlir @@ -13,14 +13,15 @@ func.func @conv2d(%arg0: tensor<2x11x11x5xf32>, %arg1: tensor<2x3x3x5xf32>, %arg %collapsed = tensor.collapse_shape %3 [[0, 1], [2], [3]] : tensor<6x6x5x2xf32> into tensor<36x5x2xf32> %collapsed_0 = tensor.collapse_shape %5 [[0, 1], [2, 3, 4], [5]] : tensor<6x6x3x3x2x5xf32> into tensor<36x18x5xf32> %6 = tensor.empty() : tensor<36x18x2xf32> - %7 = linalg.batch_matmul ins(%collapsed_0, %collapsed : tensor<36x18x5xf32>, tensor<36x5x2xf32>) outs(%6 : tensor<36x18x2xf32>) -> tensor<36x18x2xf32> - %expanded = tensor.expand_shape %7 [[0, 1], [2, 3, 4], [5]] output_shape [6, 6, 3, 3, 2, 2] : tensor<36x18x2xf32> into tensor<6x6x3x3x2x2xf32> + %7 = linalg.fill ins(%cst : f32) outs(%6 : tensor<36x18x2xf32>) -> tensor<36x18x2xf32> + %8 = linalg.batch_matmul ins(%collapsed_0, %collapsed : tensor<36x18x5xf32>, tensor<36x5x2xf32>) outs(%7 : tensor<36x18x2xf32>) -> tensor<36x18x2xf32> + %expanded = tensor.expand_shape %8 [[0, 1], [2, 3, 4], [5]] output_shape [6, 6, 3, 3, 2, 2] : tensor<36x18x2xf32> into tensor<6x6x3x3x2x2xf32> %padded_1 = tensor.pad %arg2 low[0, 0, 0, 0] high[0, 3, 3, 0] { ^bb0(%arg3: index, %arg4: index, %arg5: index, %arg6: index): tensor.yield %cst : f32 } : tensor<2x9x9x2xf32> to tensor<2x12x12x2xf32> - %8 = linalg.winograd_output_transform m(4) r(3) ins(%expanded : tensor<6x6x3x3x2x2xf32>) outs(%padded_1 : tensor<2x12x12x2xf32>) -> tensor<2x12x12x2xf32> - %extracted_slice = tensor.extract_slice %8[0, 0, 0, 0] [2, 9, 9, 2] [1, 1, 1, 1] : tensor<2x12x12x2xf32> to tensor<2x9x9x2xf32> + %9 = linalg.winograd_output_transform m(4) r(3) ins(%expanded : tensor<6x6x3x3x2x2xf32>) outs(%padded_1 : tensor<2x12x12x2xf32>) -> tensor<2x12x12x2xf32> + %extracted_slice = tensor.extract_slice %9[0, 0, 0, 0] [2, 9, 9, 2] [1, 1, 1, 1] : tensor<2x12x12x2xf32> to tensor<2x9x9x2xf32> return %extracted_slice : tensor<2x9x9x2xf32> } @@ -46,11 +47,13 @@ func.func @conv2d(%arg0: tensor<2x11x11x5xf32>, %arg1: tensor<2x3x3x5xf32>, %arg // CHECK-NEXT: %[[S1:.*]] = scf.for %[[ARG3:.*]] = %[[C0]] to %[[C2]] step %[[C1]] iter_args(%[[ARG4:.*]] = %[[S0]]) -> (tensor<6x6x5x2xf32>) { // CHECK-NEXT: %[[S7:.*]] = scf.for %[[ARG5:.*]] = %[[C0]] to %[[C5]] step %[[C1]] iter_args(%[[ARG6:.*]] = %[[ARG4]]) -> (tensor<6x6x5x2xf32>) { // CHECK-NEXT: %[[EXTRACTED_SLICE_9:.*]] = tensor.extract_slice %[[ARG1]][%[[ARG3]], %[[C0]], %[[C0]], %[[ARG5]]] [1, 3, 3, 1] [1, 1, 1, 1] : tensor<2x3x3x5xf32> to tensor<3x3xf32> -// CHECK-NEXT: %[[S8:.*]] = tensor.empty() : tensor<6x3xf32> -// CHECK-NEXT: %[[S9:.*]] = linalg.matmul ins(%[[CST_5]], %[[EXTRACTED_SLICE_9]] : tensor<6x3xf32>, tensor<3x3xf32>) outs(%[[S8]] : tensor<6x3xf32>) -> tensor<6x3xf32> -// CHECK-NEXT: %[[S10:.*]] = tensor.empty() : tensor<6x6xf32> -// CHECK-NEXT: %[[S11:.*]] = linalg.matmul ins(%[[S9]], %[[CST_4]] : tensor<6x3xf32>, tensor<3x6xf32>) outs(%[[S10]] : tensor<6x6xf32>) -> tensor<6x6xf32> -// CHECK-NEXT: %[[INSERTED_SLICE:.*]] = tensor.insert_slice %[[S11]] into %[[ARG6]][%[[C0]], %[[C0]], %[[ARG5]], %[[ARG3]]] [6, 6, 1, 1] [1, 1, 1, 1] : tensor<6x6xf32> into tensor<6x6x5x2xf32> +// CHECK-NEXT: %[[S9:.*]] = tensor.empty() : tensor<6x3xf32> +// CHECK-NEXT: %[[S10:.*]] = linalg.fill ins(%[[CST_6]] : f32) outs(%[[S9]] : tensor<6x3xf32>) -> tensor<6x3xf32> +// CHECK-NEXT: %[[S11:.*]] = linalg.matmul ins(%[[CST_5]], %[[EXTRACTED_SLICE_9]] : tensor<6x3xf32>, tensor<3x3xf32>) outs(%[[S10]] : tensor<6x3xf32>) -> tensor<6x3xf32> +// CHECK-NEXT: %[[S12:.*]] = tensor.empty() : tensor<6x6xf32> +// CHECK-NEXT: %[[S13:.*]] = linalg.fill ins(%[[CST_6]] : f32) outs(%[[S12]] : tensor<6x6xf32>) -> tensor<6x6xf32> +// CHECK-NEXT: %[[S14:.*]] = linalg.matmul ins(%[[S11]], %[[CST_4]] : tensor<6x3xf32>, tensor<3x6xf32>) outs(%[[S13]] : tensor<6x6xf32>) -> tensor<6x6xf32> +// CHECK-NEXT: %[[INSERTED_SLICE:.*]] = tensor.insert_slice %[[S14]] into %[[ARG6]][%[[C0]], %[[C0]], %[[ARG5]], %[[ARG3]]] [6, 6, 1, 1] [1, 1, 1, 1] : tensor<6x6xf32> into tensor<6x6x5x2xf32> // CHECK-NEXT: scf.yield %[[INSERTED_SLICE]] : tensor<6x6x5x2xf32> // CHECK-NEXT: } // CHECK-NEXT: scf.yield %[[S7]] : tensor<6x6x5x2xf32> @@ -67,11 +70,13 @@ func.func @conv2d(%arg0: tensor<2x11x11x5xf32>, %arg1: tensor<2x3x3x5xf32>, %arg // CHECK-NEXT: %[[S10:.*]] = affine.apply #[[$MAP0]](%[[ARG3]]) // CHECK-NEXT: %[[S11:.*]] = affine.apply #[[$MAP0]](%[[ARG5]]) // CHECK-NEXT: %[[EXTRACTED_SLICE_9:.*]] = tensor.extract_slice %[[PADDED]][%[[ARG7]], %[[S10]], %[[S11]], %[[ARG9]]] [1, 6, 6, 1] [1, 1, 1, 1] : tensor<2x14x14x5xf32> to tensor<6x6xf32> -// CHECK-NEXT: %[[S12:.*]] = tensor.empty() : tensor<6x6xf32> -// CHECK-NEXT: %[[S13:.*]] = linalg.matmul ins(%[[CST_3]], %[[EXTRACTED_SLICE_9]] : tensor<6x6xf32>, tensor<6x6xf32>) outs(%[[S12]] : tensor<6x6xf32>) -> tensor<6x6xf32> -// CHECK-NEXT: %[[S14:.*]] = tensor.empty() : tensor<6x6xf32> -// CHECK-NEXT: %[[S15:.*]] = linalg.matmul ins(%[[S13]], %[[CST_2]] : tensor<6x6xf32>, tensor<6x6xf32>) outs(%[[S14]] : tensor<6x6xf32>) -> tensor<6x6xf32> -// CHECK-NEXT: %[[INSERTED_SLICE:.*]] = tensor.insert_slice %[[S15]] into %[[ARG10]][0, 0, %[[ARG3]], %[[ARG5]], %[[ARG7]], %[[ARG9]]] [6, 6, 1, 1, 1, 1] [1, 1, 1, 1, 1, 1] : tensor<6x6xf32> into tensor<6x6x3x3x2x5xf32> +// CHECK-NEXT: %[[S13:.*]] = tensor.empty() : tensor<6x6xf32> +// CHECK-NEXT: %[[S14:.*]] = linalg.fill ins(%[[CST_6]] : f32) outs(%[[S13]] : tensor<6x6xf32>) -> tensor<6x6xf32> +// CHECK-NEXT: %[[S15:.*]] = linalg.matmul ins(%[[CST_3]], %[[EXTRACTED_SLICE_9]] : tensor<6x6xf32>, tensor<6x6xf32>) outs(%[[S14]] : tensor<6x6xf32>) -> tensor<6x6xf32> +// CHECK-NEXT: %[[S16:.*]] = tensor.empty() : tensor<6x6xf32> +// CHECK-NEXT: %[[S17:.*]] = linalg.fill ins(%[[CST_6]] : f32) outs(%[[S16]] : tensor<6x6xf32>) -> tensor<6x6xf32> +// CHECK-NEXT: %[[S18:.*]] = linalg.matmul ins(%[[S15]], %[[CST_2]] : tensor<6x6xf32>, tensor<6x6xf32>) outs(%[[S17]] : tensor<6x6xf32>) -> tensor<6x6xf32> +// CHECK-NEXT: %[[INSERTED_SLICE:.*]] = tensor.insert_slice %[[S18]] into %[[ARG10]][0, 0, %[[ARG3]], %[[ARG5]], %[[ARG7]], %[[ARG9]]] [6, 6, 1, 1, 1, 1] [1, 1, 1, 1, 1, 1] : tensor<6x6xf32> into tensor<6x6x3x3x2x5xf32> // CHECK-NEXT: scf.yield %[[INSERTED_SLICE]] : tensor<6x6x3x3x2x5xf32> // CHECK-NEXT: } // CHECK-NEXT: scf.yield %[[S9]] : tensor<6x6x3x3x2x5xf32> @@ -83,8 +88,9 @@ func.func @conv2d(%arg0: tensor<2x11x11x5xf32>, %arg1: tensor<2x3x3x5xf32>, %arg // CHECK-NEXT: %[[COLLAPSED:.*]] = tensor.collapse_shape %[[S1]] {{\[}}[0, 1], [2], [3]] : tensor<6x6x5x2xf32> into tensor<36x5x2xf32> // CHECK-NEXT: %[[COLLAPSED_7:.*]] = tensor.collapse_shape %[[S3]] {{\[}}[0, 1], [2, 3, 4], [5]] : tensor<6x6x3x3x2x5xf32> into tensor<36x18x5xf32> // CHECK-NEXT: %[[S4:.*]] = tensor.empty() : tensor<36x18x2xf32> -// CHECK-NEXT: %[[S5:.*]] = linalg.batch_matmul ins(%[[COLLAPSED_7]], %[[COLLAPSED]] : tensor<36x18x5xf32>, tensor<36x5x2xf32>) outs(%[[S4]] : tensor<36x18x2xf32>) -> tensor<36x18x2xf32> -// CHECK-NEXT: %[[EXPANDED:.*]] = tensor.expand_shape %[[S5]] {{\[}}[0, 1], [2, 3, 4], [5]] output_shape [6, 6, 3, 3, 2, 2] : tensor<36x18x2xf32> into tensor<6x6x3x3x2x2xf32> +// CHECK-NEXT: %[[S5:.*]] = linalg.fill ins(%[[CST_6]] : f32) outs(%[[S4]] : tensor<36x18x2xf32>) -> tensor<36x18x2xf32> +// CHECK-NEXT: %[[S6:.*]] = linalg.batch_matmul ins(%[[COLLAPSED_7]], %[[COLLAPSED]] : tensor<36x18x5xf32>, tensor<36x5x2xf32>) outs(%[[S5]] : tensor<36x18x2xf32>) -> tensor<36x18x2xf32> +// CHECK-NEXT: %[[EXPANDED:.*]] = tensor.expand_shape %[[S6]] {{\[}}[0, 1], [2, 3, 4], [5]] output_shape [6, 6, 3, 3, 2, 2] : tensor<36x18x2xf32> into tensor<6x6x3x3x2x2xf32> // CHECK-NEXT: %[[PADDED_8:.*]] = tensor.pad %[[ARG2]] low[0, 0, 0, 0] high[0, 3, 3, 0] { // CHECK-NEXT: ^bb0(%[[ARG3:.*]]: index, %[[ARG4:.*]]: index, %[[ARG5:.*]]: index, %[[ARG6:.*]]: index): // CHECK-NEXT: tensor.yield %[[CST_6]] : f32 @@ -94,19 +100,21 @@ func.func @conv2d(%arg0: tensor<2x11x11x5xf32>, %arg1: tensor<2x3x3x5xf32>, %arg // CHECK-NEXT: %[[S8:.*]] = scf.for %[[ARG7:.*]] = %[[C0]] to %[[C2]] step %[[C1]] iter_args(%[[ARG8:.*]] = %[[ARG6]]) -> (tensor<2x12x12x2xf32>) { // CHECK-NEXT: %[[S9:.*]] = scf.for %[[ARG9:.*]] = %[[C0]] to %[[C2]] step %[[C1]] iter_args(%[[ARG10:.*]] = %[[ARG8]]) -> (tensor<2x12x12x2xf32>) { // CHECK-NEXT: %[[EXTRACTED_SLICE_9:.*]] = tensor.extract_slice %[[EXPANDED]][0, 0, %[[ARG3]], %[[ARG5]], %[[ARG7]], %[[ARG9]]] [6, 6, 1, 1, 1, 1] [1, 1, 1, 1, 1, 1] : tensor<6x6x3x3x2x2xf32> to tensor<6x6xf32> -// CHECK-NEXT: %[[S10:.*]] = tensor.empty() : tensor<4x6xf32> -// CHECK-NEXT: %[[S11:.*]] = linalg.matmul ins(%[[CST_1]], %[[EXTRACTED_SLICE_9]] : tensor<4x6xf32>, tensor<6x6xf32>) outs(%[[S10]] : tensor<4x6xf32>) -> tensor<4x6xf32> -// CHECK-NEXT: %[[S12:.*]] = tensor.empty() : tensor<4x4xf32> -// CHECK-NEXT: %[[S13:.*]] = linalg.matmul ins(%[[S11]], %[[CST_0]] : tensor<4x6xf32>, tensor<6x4xf32>) outs(%[[S12]] : tensor<4x4xf32>) -> tensor<4x4xf32> +// CHECK-NEXT: %[[S11:.*]] = tensor.empty() : tensor<4x6xf32> +// CHECK-NEXT: %[[S12:.*]] = linalg.fill ins(%[[CST_6]] : f32) outs(%[[S11]] : tensor<4x6xf32>) -> tensor<4x6xf32> +// CHECK-NEXT: %[[S13:.*]] = linalg.matmul ins(%[[CST_1]], %[[EXTRACTED_SLICE_9]] : tensor<4x6xf32>, tensor<6x6xf32>) outs(%[[S12]] : tensor<4x6xf32>) -> tensor<4x6xf32> // CHECK-NEXT: %[[S14:.*]] = tensor.empty() : tensor<4x4xf32> -// CHECK-NEXT: %[[S15:.*]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%[[CST]] : f32) outs(%[[S14]] : tensor<4x4xf32>) { +// CHECK-NEXT: %[[S15:.*]] = linalg.fill ins(%[[CST_6]] : f32) outs(%[[S14]] : tensor<4x4xf32>) -> tensor<4x4xf32> +// CHECK-NEXT: %[[S16:.*]] = linalg.matmul ins(%[[S13]], %[[CST_0]] : tensor<4x6xf32>, tensor<6x4xf32>) outs(%[[S15]] : tensor<4x4xf32>) -> tensor<4x4xf32> +// CHECK-NEXT: %[[S17:.*]] = tensor.empty() : tensor<4x4xf32> +// CHECK-NEXT: %[[S18:.*]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%[[CST]] : f32) outs(%[[S17]] : tensor<4x4xf32>) { // CHECK-NEXT: ^bb0(%[[IN:.*]]: f32, %[[OUT:.*]]: f32): // CHECK-NEXT: linalg.yield %[[IN]] : f32 // CHECK-NEXT: } -> tensor<4x4xf32> -// CHECK-NEXT: %[[S16:.*]] = linalg.mul ins(%[[S15]], %[[S13]] : tensor<4x4xf32>, tensor<4x4xf32>) outs(%[[S14]] : tensor<4x4xf32>) -> tensor<4x4xf32> -// CHECK-NEXT: %[[S17:.*]] = affine.apply #[[$MAP0]](%[[ARG3]]) -// CHECK-NEXT: %[[S18:.*]] = affine.apply #[[$MAP0]](%[[ARG5]]) -// CHECK-NEXT: %[[INSERTED_SLICE:.*]] = tensor.insert_slice %[[S16]] into %[[ARG10]][%[[ARG7]], %[[S17]], %[[S18]], %[[ARG9]]] [1, 4, 4, 1] [1, 1, 1, 1] : tensor<4x4xf32> into tensor<2x12x12x2xf32> +// CHECK-NEXT: %[[S19:.*]] = linalg.mul ins(%[[S18]], %[[S16]] : tensor<4x4xf32>, tensor<4x4xf32>) outs(%[[S17]] : tensor<4x4xf32>) -> tensor<4x4xf32> +// CHECK-NEXT: %[[S20:.*]] = affine.apply #[[$MAP0]](%[[ARG3]]) +// CHECK-NEXT: %[[S21:.*]] = affine.apply #[[$MAP0]](%[[ARG5]]) +// CHECK-NEXT: %[[INSERTED_SLICE:.*]] = tensor.insert_slice %[[S19]] into %[[ARG10]][%[[ARG7]], %[[S20]], %[[S21]], %[[ARG9]]] [1, 4, 4, 1] [1, 1, 1, 1] : tensor<4x4xf32> into tensor<2x12x12x2xf32> // CHECK-NEXT: scf.yield %[[INSERTED_SLICE]] : tensor<2x12x12x2xf32> // CHECK-NEXT: } // CHECK-NEXT: scf.yield %[[S9]] : tensor<2x12x12x2xf32> diff --git a/mlir/test/Dialect/Linalg/winograd-conv2d.mlir b/mlir/test/Dialect/Linalg/winograd-conv2d.mlir index ec11a6ef8fbee..0040d81a2d24e 100644 --- a/mlir/test/Dialect/Linalg/winograd-conv2d.mlir +++ b/mlir/test/Dialect/Linalg/winograd-conv2d.mlir @@ -7,6 +7,7 @@ func.func @conv2d_4x4_3x3(%arg0: tensor<2x6x6x5xf32>, %arg1: tensor<2x3x3x5xf32> // CHECK-LABEL: func.func @conv2d_4x4_3x3 // CHECK-SAME: (%[[ARG0:.*]]: tensor<2x6x6x5xf32>, %[[ARG1:.*]]: tensor<2x3x3x5xf32>, %[[ARG2:.*]]: tensor<1xf32>, %[[ARG3:.*]]: tensor<2x4x4x2xf32>) -> tensor<2x4x4x2xf32> { +// CHECK-NEXT: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 // CHECK-NEXT: %[[S2:.*]] = tensor.empty() : tensor<6x6x5x2xf32> // CHECK-NEXT: %[[S3:.*]] = linalg.winograd_filter_transform m(4) r(3) ins(%[[ARG1]] : tensor<2x3x3x5xf32>) outs(%[[S2]] : tensor<6x6x5x2xf32>) -> tensor<6x6x5x2xf32> // CHECK-NEXT: %[[S4:.*]] = tensor.empty() : tensor<6x6x1x1x2x5xf32> @@ -14,10 +15,11 @@ func.func @conv2d_4x4_3x3(%arg0: tensor<2x6x6x5xf32>, %arg1: tensor<2x3x3x5xf32> // CHECK-NEXT: %[[COLLAPSED:.*]] = tensor.collapse_shape %[[S3]] {{\[}}[0, 1], [2], [3]] : tensor<6x6x5x2xf32> into tensor<36x5x2xf32> // CHECK-NEXT: %[[COLLAPSED_0:.*]] = tensor.collapse_shape %[[S5]] {{\[}}[0, 1], [2, 3, 4], [5]] : tensor<6x6x1x1x2x5xf32> into tensor<36x2x5xf32> // CHECK-NEXT: %[[S6:.*]] = tensor.empty() : tensor<36x2x2xf32> -// CHECK-NEXT: %[[S7:.*]] = linalg.batch_matmul ins(%[[COLLAPSED_0]], %[[COLLAPSED]] : tensor<36x2x5xf32>, tensor<36x5x2xf32>) outs(%[[S6]] : tensor<36x2x2xf32>) -> tensor<36x2x2xf32> -// CHECK-NEXT: %[[EXPANDED:.*]] = tensor.expand_shape %[[S7]] {{\[}}[0, 1], [2, 3, 4], [5]] output_shape [6, 6, 1, 1, 2, 2] : tensor<36x2x2xf32> into tensor<6x6x1x1x2x2xf32> -// CHECK-NEXT: %[[S8:.*]] = linalg.winograd_output_transform m(4) r(3) ins(%[[EXPANDED]] : tensor<6x6x1x1x2x2xf32>) outs(%[[ARG3]] : tensor<2x4x4x2xf32>) -> tensor<2x4x4x2xf32> -// CHECK-NEXT: return %[[S8]] : tensor<2x4x4x2xf32> +// CHECK-NEXT: %[[S7:.*]] = linalg.fill ins(%[[CST]] : f32) outs(%[[S6]] : tensor<36x2x2xf32>) -> tensor<36x2x2xf32> +// CHECK-NEXT: %[[S8:.*]] = linalg.batch_matmul ins(%[[COLLAPSED_0]], %[[COLLAPSED]] : tensor<36x2x5xf32>, tensor<36x5x2xf32>) outs(%[[S7]] : tensor<36x2x2xf32>) -> tensor<36x2x2xf32> +// CHECK-NEXT: %[[EXPANDED:.*]] = tensor.expand_shape %[[S8]] {{\[}}[0, 1], [2, 3, 4], [5]] output_shape [6, 6, 1, 1, 2, 2] : tensor<36x2x2xf32> into tensor<6x6x1x1x2x2xf32> +// CHECK-NEXT: %[[S9:.*]] = linalg.winograd_output_transform m(4) r(3) ins(%[[EXPANDED]] : tensor<6x6x1x1x2x2xf32>) outs(%[[ARG3]] : tensor<2x4x4x2xf32>) -> tensor<2x4x4x2xf32> +// CHECK-NEXT: return %[[S9]] : tensor<2x4x4x2xf32> // CHECK-NEXT: } // ----- @@ -29,6 +31,7 @@ func.func @conv2d_2x2_5x5(%arg0: tensor<2x6x6x5xf32>, %arg1: tensor<2x5x5x5xf32> // CHECK-LABEL: func.func @conv2d_2x2_5x5 // CHECK-SAME: (%[[ARG0:.*]]: tensor<2x6x6x5xf32>, %[[ARG1:.*]]: tensor<2x5x5x5xf32>, %[[ARG2:.*]]: tensor<1xf32>, %[[ARG3:.*]]: tensor<2x2x2x2xf32>) -> tensor<2x2x2x2xf32> { +// CHECK-NEXT: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 // CHECK-NEXT: %[[S2:.*]] = tensor.empty() : tensor<6x6x5x2xf32> // CHECK-NEXT: %[[S3:.*]] = linalg.winograd_filter_transform m(2) r(5) ins(%[[ARG1]] : tensor<2x5x5x5xf32>) outs(%[[S2]] : tensor<6x6x5x2xf32>) -> tensor<6x6x5x2xf32> // CHECK-NEXT: %[[S4:.*]] = tensor.empty() : tensor<6x6x1x1x2x5xf32> @@ -36,10 +39,11 @@ func.func @conv2d_2x2_5x5(%arg0: tensor<2x6x6x5xf32>, %arg1: tensor<2x5x5x5xf32> // CHECK-NEXT: %[[COLLAPSED:.*]] = tensor.collapse_shape %[[S3]] {{\[}}[0, 1], [2], [3]] : tensor<6x6x5x2xf32> into tensor<36x5x2xf32> // CHECK-NEXT: %[[COLLAPSED_0:.*]] = tensor.collapse_shape %[[S5]] {{\[}}[0, 1], [2, 3, 4], [5]] : tensor<6x6x1x1x2x5xf32> into tensor<36x2x5xf32> // CHECK-NEXT: %[[S6:.*]] = tensor.empty() : tensor<36x2x2xf32> -// CHECK-NEXT: %[[S7:.*]] = linalg.batch_matmul ins(%[[COLLAPSED_0]], %[[COLLAPSED]] : tensor<36x2x5xf32>, tensor<36x5x2xf32>) outs(%[[S6]] : tensor<36x2x2xf32>) -> tensor<36x2x2xf32> -// CHECK-NEXT: %[[EXPANDED:.*]] = tensor.expand_shape %[[S7]] {{\[}}[0, 1], [2, 3, 4], [5]] output_shape [6, 6, 1, 1, 2, 2] : tensor<36x2x2xf32> into tensor<6x6x1x1x2x2xf32> -// CHECK-NEXT: %[[S8:.*]] = linalg.winograd_output_transform m(2) r(5) ins(%[[EXPANDED]] : tensor<6x6x1x1x2x2xf32>) outs(%[[ARG3]] : tensor<2x2x2x2xf32>) -> tensor<2x2x2x2xf32> -// CHECK-NEXT: return %[[S8]] : tensor<2x2x2x2xf32> +// CHECK-NEXT: %[[S7:.*]] = linalg.fill ins(%[[CST]] : f32) outs(%[[S6]] : tensor<36x2x2xf32>) -> tensor<36x2x2xf32> +// CHECK-NEXT: %[[S8:.*]] = linalg.batch_matmul ins(%[[COLLAPSED_0]], %[[COLLAPSED]] : tensor<36x2x5xf32>, tensor<36x5x2xf32>) outs(%[[S7]] : tensor<36x2x2xf32>) -> tensor<36x2x2xf32> +// CHECK-NEXT: %[[EXPANDED:.*]] = tensor.expand_shape %[[S8]] {{\[}}[0, 1], [2, 3, 4], [5]] output_shape [6, 6, 1, 1, 2, 2] : tensor<36x2x2xf32> into tensor<6x6x1x1x2x2xf32> +// CHECK-NEXT: %[[S9:.*]] = linalg.winograd_output_transform m(2) r(5) ins(%[[EXPANDED]] : tensor<6x6x1x1x2x2xf32>) outs(%[[ARG3]] : tensor<2x2x2x2xf32>) -> tensor<2x2x2x2xf32> +// CHECK-NEXT: return %[[S9]] : tensor<2x2x2x2xf32> // CHECK-NEXT: } // ----- @@ -51,6 +55,7 @@ func.func @conv2d_1x4_1x3(%arg0: tensor<2x1x6x5xf32>, %arg1: tensor<2x1x3x5xf32> // CHECK-LABEL: func.func @conv2d_1x4_1x3 // CHECK-SAME: (%[[ARG0:.*]]: tensor<2x1x6x5xf32>, %[[ARG1:.*]]: tensor<2x1x3x5xf32>, %[[ARG2:.*]]: tensor<1xf32>, %[[ARG3:.*]]: tensor<2x1x4x2xf32>) -> tensor<2x1x4x2xf32> { +// CHECK-NEXT: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 // CHECK-NEXT: %[[S2:.*]] = tensor.empty() : tensor<1x6x5x2xf32> // CHECK-NEXT: %[[S3:.*]] = linalg.winograd_filter_transform m(4) r(3) ins(%[[ARG1]] : tensor<2x1x3x5xf32>) outs(%[[S2]] : tensor<1x6x5x2xf32>) -> tensor<1x6x5x2xf32> // CHECK-NEXT: %[[S4:.*]] = tensor.empty() : tensor<1x6x1x1x2x5xf32> @@ -58,10 +63,11 @@ func.func @conv2d_1x4_1x3(%arg0: tensor<2x1x6x5xf32>, %arg1: tensor<2x1x3x5xf32> // CHECK-NEXT: %[[COLLAPSED:.*]] = tensor.collapse_shape %[[S3]] {{\[}}[0, 1], [2], [3]] : tensor<1x6x5x2xf32> into tensor<6x5x2xf32> // CHECK-NEXT: %[[COLLAPSED_0:.*]] = tensor.collapse_shape %[[S5]] {{\[}}[0, 1], [2, 3, 4], [5]] : tensor<1x6x1x1x2x5xf32> into tensor<6x2x5xf32> // CHECK-NEXT: %[[S6:.*]] = tensor.empty() : tensor<6x2x2xf32> -// CHECK-NEXT: %[[S7:.*]] = linalg.batch_matmul ins(%[[COLLAPSED_0]], %[[COLLAPSED]] : tensor<6x2x5xf32>, tensor<6x5x2xf32>) outs(%[[S6]] : tensor<6x2x2xf32>) -> tensor<6x2x2xf32> -// CHECK-NEXT: %[[EXPANDED:.*]] = tensor.expand_shape %[[S7]] {{\[}}[0, 1], [2, 3, 4], [5]] output_shape [1, 6, 1, 1, 2, 2] : tensor<6x2x2xf32> into tensor<1x6x1x1x2x2xf32> -// CHECK-NEXT: %[[S8:.*]] = linalg.winograd_output_transform m(4) r(3) ins(%[[EXPANDED]] : tensor<1x6x1x1x2x2xf32>) outs(%[[ARG3]] : tensor<2x1x4x2xf32>) -> tensor<2x1x4x2xf32> -// CHECK-NEXT: return %[[S8]] : tensor<2x1x4x2xf32> +// CHECK-NEXT: %[[S7:.*]] = linalg.fill ins(%[[CST]] : f32) outs(%[[S6]] : tensor<6x2x2xf32>) -> tensor<6x2x2xf32> +// CHECK-NEXT: %[[S8:.*]] = linalg.batch_matmul ins(%[[COLLAPSED_0]], %[[COLLAPSED]] : tensor<6x2x5xf32>, tensor<6x5x2xf32>) outs(%[[S7]] : tensor<6x2x2xf32>) -> tensor<6x2x2xf32> +// CHECK-NEXT: %[[EXPANDED:.*]] = tensor.expand_shape %[[S8]] {{\[}}[0, 1], [2, 3, 4], [5]] output_shape [1, 6, 1, 1, 2, 2] : tensor<6x2x2xf32> into tensor<1x6x1x1x2x2xf32> +// CHECK-NEXT: %[[S9:.*]] = linalg.winograd_output_transform m(4) r(3) ins(%[[EXPANDED]] : tensor<1x6x1x1x2x2xf32>) outs(%[[ARG3]] : tensor<2x1x4x2xf32>) -> tensor<2x1x4x2xf32> +// CHECK-NEXT: return %[[S9]] : tensor<2x1x4x2xf32> // CHECK-NEXT: } // ----- @@ -73,6 +79,7 @@ func.func @conv2d_4x1_3x1(%arg0: tensor<2x6x1x5xf32>, %arg1: tensor<2x3x1x5xf32> // CHECK-LABEL: func.func @conv2d_4x1_3x1 // CHECK-SAME: (%[[ARG0:.*]]: tensor<2x6x1x5xf32>, %[[ARG1:.*]]: tensor<2x3x1x5xf32>, %[[ARG2:.*]]: tensor<1xf32>, %[[ARG3:.*]]: tensor<2x4x1x2xf32>) -> tensor<2x4x1x2xf32> { +// CHECK-NEXT: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 // CHECK-NEXT: %[[S2:.*]] = tensor.empty() : tensor<6x1x5x2xf32> // CHECK-NEXT: %[[S3:.*]] = linalg.winograd_filter_transform m(4) r(3) ins(%[[ARG1]] : tensor<2x3x1x5xf32>) outs(%[[S2]] : tensor<6x1x5x2xf32>) -> tensor<6x1x5x2xf32> // CHECK-NEXT: %[[S4:.*]] = tensor.empty() : tensor<6x1x1x1x2x5xf32> @@ -80,10 +87,11 @@ func.func @conv2d_4x1_3x1(%arg0: tensor<2x6x1x5xf32>, %arg1: tensor<2x3x1x5xf32> // CHECK-NEXT: %[[COLLAPSED:.*]] = tensor.collapse_shape %[[S3]] {{\[}}[0, 1], [2], [3]] : tensor<6x1x5x2xf32> into tensor<6x5x2xf32> // CHECK-NEXT: %[[COLLAPSED_0:.*]] = tensor.collapse_shape %[[S5]] {{\[}}[0, 1], [2, 3, 4], [5]] : tensor<6x1x1x1x2x5xf32> into tensor<6x2x5xf32> // CHECK-NEXT: %[[S6:.*]] = tensor.empty() : tensor<6x2x2xf32> -// CHECK-NEXT: %[[S7:.*]] = linalg.batch_matmul ins(%[[COLLAPSED_0]], %[[COLLAPSED]] : tensor<6x2x5xf32>, tensor<6x5x2xf32>) outs(%[[S6]] : tensor<6x2x2xf32>) -> tensor<6x2x2xf32> -// CHECK-NEXT: %[[EXPANDED:.*]] = tensor.expand_shape %[[S7]] {{\[}}[0, 1], [2, 3, 4], [5]] output_shape [6, 1, 1, 1, 2, 2] : tensor<6x2x2xf32> into tensor<6x1x1x1x2x2xf32> -// CHECK-NEXT: %[[S8:.*]] = linalg.winograd_output_transform m(4) r(3) ins(%[[EXPANDED]] : tensor<6x1x1x1x2x2xf32>) outs(%[[ARG3]] : tensor<2x4x1x2xf32>) -> tensor<2x4x1x2xf32> -// CHECK-NEXT: return %[[S8]] : tensor<2x4x1x2xf32> +// CHECK-NEXT: %[[S7:.*]] = linalg.fill ins(%[[CST]] : f32) outs(%[[S6]] : tensor<6x2x2xf32>) -> tensor<6x2x2xf32> +// CHECK-NEXT: %[[S8:.*]] = linalg.batch_matmul ins(%[[COLLAPSED_0]], %[[COLLAPSED]] : tensor<6x2x5xf32>, tensor<6x5x2xf32>) outs(%[[S7]] : tensor<6x2x2xf32>) -> tensor<6x2x2xf32> +// CHECK-NEXT: %[[EXPANDED:.*]] = tensor.expand_shape %[[S8]] {{\[}}[0, 1], [2, 3, 4], [5]] output_shape [6, 1, 1, 1, 2, 2] : tensor<6x2x2xf32> into tensor<6x1x1x1x2x2xf32> +// CHECK-NEXT: %[[S9:.*]] = linalg.winograd_output_transform m(4) r(3) ins(%[[EXPANDED]] : tensor<6x1x1x1x2x2xf32>) outs(%[[ARG3]] : tensor<2x4x1x2xf32>) -> tensor<2x4x1x2xf32> +// CHECK-NEXT: return %[[S9]] : tensor<2x4x1x2xf32> // CHECK-NEXT: } // ----- @@ -95,6 +103,7 @@ func.func @conv2d_aligned(%arg0: tensor<2x10x10x5xf32>, %arg1: tensor<2x3x3x5xf3 // CHECK-LABEL: func.func @conv2d_aligned // CHECK-SAME: (%[[ARG0:.*]]: tensor<2x10x10x5xf32>, %[[ARG1:.*]]: tensor<2x3x3x5xf32>, %[[ARG2:.*]]: tensor<1xf32>, %[[ARG3:.*]]: tensor<2x8x8x2xf32>) -> tensor<2x8x8x2xf32> { +// CHECK-NEXT: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 // CHECK-NEXT: %[[S2:.*]] = tensor.empty() : tensor<6x6x5x2xf32> // CHECK-NEXT: %[[S3:.*]] = linalg.winograd_filter_transform m(4) r(3) ins(%[[ARG1]] : tensor<2x3x3x5xf32>) outs(%[[S2]] : tensor<6x6x5x2xf32>) -> tensor<6x6x5x2xf32> // CHECK-NEXT: %[[S4:.*]] = tensor.empty() : tensor<6x6x2x2x2x5xf32> @@ -102,10 +111,11 @@ func.func @conv2d_aligned(%arg0: tensor<2x10x10x5xf32>, %arg1: tensor<2x3x3x5xf3 // CHECK-NEXT: %[[COLLAPSED:.*]] = tensor.collapse_shape %[[S3]] {{\[}}[0, 1], [2], [3]] : tensor<6x6x5x2xf32> into tensor<36x5x2xf32> // CHECK-NEXT: %[[COLLAPSED_0:.*]] = tensor.collapse_shape %[[S5]] {{\[}}[0, 1], [2, 3, 4], [5]] : tensor<6x6x2x2x2x5xf32> into tensor<36x8x5xf32> // CHECK-NEXT: %[[S6:.*]] = tensor.empty() : tensor<36x8x2xf32> -// CHECK-NEXT: %[[S7:.*]] = linalg.batch_matmul ins(%[[COLLAPSED_0]], %[[COLLAPSED]] : tensor<36x8x5xf32>, tensor<36x5x2xf32>) outs(%[[S6]] : tensor<36x8x2xf32>) -> tensor<36x8x2xf32> -// CHECK-NEXT: %[[EXPANDED:.*]] = tensor.expand_shape %[[S7]] {{\[}}[0, 1], [2, 3, 4], [5]] output_shape [6, 6, 2, 2, 2, 2] : tensor<36x8x2xf32> into tensor<6x6x2x2x2x2xf32> -// CHECK-NEXT: %[[S8:.*]] = linalg.winograd_output_transform m(4) r(3) ins(%[[EXPANDED]] : tensor<6x6x2x2x2x2xf32>) outs(%[[ARG3]] : tensor<2x8x8x2xf32>) -> tensor<2x8x8x2xf32> -// CHECK-NEXT: return %[[S8]] : tensor<2x8x8x2xf32> +// CHECK-NEXT: %[[S7:.*]] = linalg.fill ins(%[[CST]] : f32) outs(%[[S6]] : tensor<36x8x2xf32>) -> tensor<36x8x2xf32> +// CHECK-NEXT: %[[S8:.*]] = linalg.batch_matmul ins(%[[COLLAPSED_0]], %[[COLLAPSED]] : tensor<36x8x5xf32>, tensor<36x5x2xf32>) outs(%[[S7]] : tensor<36x8x2xf32>) -> tensor<36x8x2xf32> +// CHECK-NEXT: %[[EXPANDED:.*]] = tensor.expand_shape %[[S8]] {{\[}}[0, 1], [2, 3, 4], [5]] output_shape [6, 6, 2, 2, 2, 2] : tensor<36x8x2xf32> into tensor<6x6x2x2x2x2xf32> +// CHECK-NEXT: %[[S9:.*]] = linalg.winograd_output_transform m(4) r(3) ins(%[[EXPANDED]] : tensor<6x6x2x2x2x2xf32>) outs(%[[ARG3]] : tensor<2x8x8x2xf32>) -> tensor<2x8x8x2xf32> +// CHECK-NEXT: return %[[S9]] : tensor<2x8x8x2xf32> // CHECK-NEXT: } // ----- @@ -129,14 +139,15 @@ func.func @conv2d_unaligned(%arg0: tensor<2x11x11x5xf32>, %arg1: tensor<2x3x3x5x // CHECK-NEXT: %[[COLLAPSED:.*]] = tensor.collapse_shape %[[S1]] {{\[}}[0, 1], [2], [3]] : tensor<6x6x5x2xf32> into tensor<36x5x2xf32> // CHECK-NEXT: %[[COLLAPSED_0:.*]] = tensor.collapse_shape %3 {{\[}}[0, 1], [2, 3, 4], [5]] : tensor<6x6x3x3x2x5xf32> into tensor<36x18x5xf32> // CHECK-NEXT: %[[S4:.*]] = tensor.empty() : tensor<36x18x2xf32> -// CHECK-NEXT: %[[S5:.*]] = linalg.batch_matmul ins(%[[COLLAPSED_0]], %[[COLLAPSED]] : tensor<36x18x5xf32>, tensor<36x5x2xf32>) outs(%[[S4]] : tensor<36x18x2xf32>) -> tensor<36x18x2xf32> -// CHECK-NEXT: %[[EXPANDED:.*]] = tensor.expand_shape %[[S5]] {{\[}}[0, 1], [2, 3, 4], [5]] output_shape [6, 6, 3, 3, 2, 2] : tensor<36x18x2xf32> into tensor<6x6x3x3x2x2xf32> +// CHECK-NEXT: %[[S5:.*]] = linalg.fill ins(%[[CST]] : f32) outs(%[[S4]] : tensor<36x18x2xf32>) -> tensor<36x18x2xf32> +// CHECK-NEXT: %[[S6:.*]] = linalg.batch_matmul ins(%[[COLLAPSED_0]], %[[COLLAPSED]] : tensor<36x18x5xf32>, tensor<36x5x2xf32>) outs(%[[S5]] : tensor<36x18x2xf32>) -> tensor<36x18x2xf32> +// CHECK-NEXT: %[[EXPANDED:.*]] = tensor.expand_shape %[[S6]] {{\[}}[0, 1], [2, 3, 4], [5]] output_shape [6, 6, 3, 3, 2, 2] : tensor<36x18x2xf32> into tensor<6x6x3x3x2x2xf32> // CHECK-NEXT: %[[PADDED_1:.*]] = tensor.pad %arg3 low[0, 0, 0, 0] high[0, 3, 3, 0] { // CHECK-NEXT: ^bb0 // CHECK-NEXT: tensor.yield %[[CST]] : f32 // CHECK-NEXT: } : tensor<2x9x9x2xf32> to tensor<2x12x12x2xf32> -// CHECK-NEXT: %[[S6:.*]] = linalg.winograd_output_transform m(4) r(3) ins(%[[EXPANDED]] : tensor<6x6x3x3x2x2xf32>) outs(%[[PADDED_1]] : tensor<2x12x12x2xf32>) -> tensor<2x12x12x2xf32> -// CHECK-NEXT: %[[EXTRACTED_SLICE:.*]] = tensor.extract_slice %[[S6]][0, 0, 0, 0] [2, 9, 9, 2] [1, 1, 1, 1] : tensor<2x12x12x2xf32> to tensor<2x9x9x2xf32> +// CHECK-NEXT: %[[S7:.*]] = linalg.winograd_output_transform m(4) r(3) ins(%[[EXPANDED]] : tensor<6x6x3x3x2x2xf32>) outs(%[[PADDED_1]] : tensor<2x12x12x2xf32>) -> tensor<2x12x12x2xf32> +// CHECK-NEXT: %[[EXTRACTED_SLICE:.*]] = tensor.extract_slice %[[S7]][0, 0, 0, 0] [2, 9, 9, 2] [1, 1, 1, 1] : tensor<2x12x12x2xf32> to tensor<2x9x9x2xf32> // CHECK-NEXT: return %[[EXTRACTED_SLICE]] : tensor<2x9x9x2xf32> // CHECK-NEXT: } @@ -149,17 +160,19 @@ func.func @conv2d_type_promotion(%arg0: tensor<2x6x6x5xf16>, %arg1: tensor<2x3x3 // CHECK-LABEL: func.func @conv2d_type_promotion // CHECK-SAME: (%[[ARG0:.*]]: tensor<2x6x6x5xf16>, %[[ARG1:.*]]: tensor<2x3x3x5xf16>, %[[ARG2:.*]]: tensor<1xf32>, %[[ARG3:.*]]: tensor<2x4x4x2xf32>) -> tensor<2x4x4x2xf32> { -// CHECK: %[[S0:.*]] = tensor.empty() : tensor<6x6x5x2xf16> +// CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK-NEXT: %[[S0:.*]] = tensor.empty() : tensor<6x6x5x2xf16> // CHECK-NEXT: %[[S1:.*]] = linalg.winograd_filter_transform m(4) r(3) ins(%[[ARG1]] : tensor<2x3x3x5xf16>) outs(%[[S0]] : tensor<6x6x5x2xf16>) -> tensor<6x6x5x2xf16> // CHECK-NEXT: %[[S2:.*]] = tensor.empty() : tensor<6x6x1x1x2x5xf16> // CHECK-NEXT: %[[S3:.*]] = linalg.winograd_input_transform m(4) r(3) ins(%[[ARG0]] : tensor<2x6x6x5xf16>) outs(%[[S2]] : tensor<6x6x1x1x2x5xf16>) -> tensor<6x6x1x1x2x5xf16> // CHECK-NEXT: %[[COLLAPSED:.*]] = tensor.collapse_shape %[[S1]] {{\[}}[0, 1], [2], [3]] : tensor<6x6x5x2xf16> into tensor<36x5x2xf16> // CHECK-NEXT: %[[COLLAPSED_0:.*]] = tensor.collapse_shape %[[S3]] {{\[}}[0, 1], [2, 3, 4], [5]] : tensor<6x6x1x1x2x5xf16> into tensor<36x2x5xf16> // CHECK-NEXT: %[[S4:.*]] = tensor.empty() : tensor<36x2x2xf32> -// CHECK-NEXT: %[[S5:.*]] = linalg.batch_matmul ins(%[[COLLAPSED_0]], %[[COLLAPSED]] : tensor<36x2x5xf16>, tensor<36x5x2xf16>) outs(%[[S4]] : tensor<36x2x2xf32>) -> tensor<36x2x2xf32> -// CHECK-NEXT: %[[EXPANDED:.*]] = tensor.expand_shape %[[S5]] {{\[}}[0, 1], [2, 3, 4], [5]] output_shape [6, 6, 1, 1, 2, 2] : tensor<36x2x2xf32> into tensor<6x6x1x1x2x2xf32> -// CHECK-NEXT: %[[S6:.*]] = linalg.winograd_output_transform m(4) r(3) ins(%[[EXPANDED]] : tensor<6x6x1x1x2x2xf32>) outs(%[[ARG3]] : tensor<2x4x4x2xf32>) -> tensor<2x4x4x2xf32> -// CHECK-NEXT: return %[[S6]] : tensor<2x4x4x2xf32> +// CHECK-NEXT: %[[S5:.*]] = linalg.fill ins(%[[CST]] : f32) outs(%[[S4]] : tensor<36x2x2xf32>) -> tensor<36x2x2xf32> +// CHECK-NEXT: %[[S6:.*]] = linalg.batch_matmul ins(%[[COLLAPSED_0]], %[[COLLAPSED]] : tensor<36x2x5xf16>, tensor<36x5x2xf16>) outs(%[[S5]] : tensor<36x2x2xf32>) -> tensor<36x2x2xf32> +// CHECK-NEXT: %[[EXPANDED:.*]] = tensor.expand_shape %[[S6]] {{\[}}[0, 1], [2, 3, 4], [5]] output_shape [6, 6, 1, 1, 2, 2] : tensor<36x2x2xf32> into tensor<6x6x1x1x2x2xf32> +// CHECK-NEXT: %[[S7:.*]] = linalg.winograd_output_transform m(4) r(3) ins(%[[EXPANDED]] : tensor<6x6x1x1x2x2xf32>) outs(%[[ARG3]] : tensor<2x4x4x2xf32>) -> tensor<2x4x4x2xf32> +// CHECK-NEXT: return %[[S7]] : tensor<2x4x4x2xf32> // CHECK-NEXT: } // ----- diff --git a/mlir/test/Target/LLVMIR/Import/nvvmir.ll b/mlir/test/Target/LLVMIR/Import/nvvmir.ll index e4a8773e2dd80..131e9065b2d88 100644 --- a/mlir/test/Target/LLVMIR/Import/nvvmir.ll +++ b/mlir/test/Target/LLVMIR/Import/nvvmir.ll @@ -58,6 +58,9 @@ define i32 @nvvm_special_regs() { %27 = call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctarank() ; CHECK: = nvvm.read.ptx.sreg.cluster.nctarank : i32 %28 = call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctarank() + + ; CHECK = nvvm.read.ptx.sreg.tid.x range <0 : i32, 64 : i32> : i32 + %29 = call range(i32 0, 64) i32 @llvm.nvvm.read.ptx.sreg.tid.x() ret i32 %1 } diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir index 88ffb1c7bfdf7..7fd082a5eb3c7 100644 --- a/mlir/test/Target/LLVMIR/nvvmir.mlir +++ b/mlir/test/Target/LLVMIR/nvvmir.mlir @@ -62,7 +62,10 @@ llvm.func @nvvm_special_regs() -> i32 { %29 = nvvm.read.ptx.sreg.clock : i32 // CHECK: call i64 @llvm.nvvm.read.ptx.sreg.clock64 %30 = nvvm.read.ptx.sreg.clock64 : i64 - + + // CHECK: %31 = call range(i32 0, 64) i32 @llvm.nvvm.read.ptx.sreg.tid.x() + %31 = nvvm.read.ptx.sreg.tid.x range : i32 + llvm.return %1 : i32 }