From e5ffb7c0a353bd7ca2e26e2e565c5ba7d740bfab Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 11 Apr 2025 08:09:52 -0700 Subject: [PATCH 1/7] Fix ARMV9SME target and add support_sme1 code for MacOS --- driver/others/dynamic_arm64.c | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/driver/others/dynamic_arm64.c b/driver/others/dynamic_arm64.c index 31821ae789..eb7a65c3cd 100644 --- a/driver/others/dynamic_arm64.c +++ b/driver/others/dynamic_arm64.c @@ -43,6 +43,14 @@ #include #endif +#ifdef __APPLE__ +#include +int32_t value; +size_t length=sizeof(value); +int64_t value64; +size_t length64=sizeof(value64); +#endif + extern gotoblas_t gotoblas_ARMV8; #ifdef DYNAMIC_LIST #ifdef DYN_CORTEXA53 @@ -168,7 +176,7 @@ extern void openblas_warning(int verbose, const char * msg); #define FALLBACK_VERBOSE 1 #define NEOVERSEN1_FALLBACK "OpenBLAS : Your OS does not support SVE instructions. OpenBLAS is using Neoverse N1 kernels as a fallback, which may give poorer performance.\n" -#define NUM_CORETYPES 18 +#define NUM_CORETYPES 19 /* * In case asm/hwcap.h is outdated on the build system, make sure @@ -207,6 +215,7 @@ static char *corename[] = { "cortexa55", "armv8sve", "a64fx", + "armv9sme", "unknown" }; @@ -229,6 +238,7 @@ char *gotoblas_corename(void) { if (gotoblas == &gotoblas_CORTEXA55) return corename[15]; if (gotoblas == &gotoblas_ARMV8SVE) return corename[16]; if (gotoblas == &gotoblas_A64FX) return corename[17]; + if (gotoblas == &gotoblas_ARMV9SME) return corename[18]; return corename[NUM_CORETYPES]; } @@ -277,6 +287,11 @@ static gotoblas_t *get_coretype(void) { char coremsg[128]; #if defined (OS_DARWIN) +//future #if !defined(NO_SME) +// if (support_sme1) { +// return &gotoblas_ARMV9SME; +// } +// #endif return &gotoblas_NEOVERSEN1; #endif @@ -439,6 +454,7 @@ static gotoblas_t *get_coretype(void) { } break; case 0x61: // Apple +//future if (support_sme1) return &gotoblas_ARMV9SME; return &gotoblas_NEOVERSEN1; break; default: @@ -446,8 +462,8 @@ static gotoblas_t *get_coretype(void) { openblas_warning(1, coremsg); } -#if !defined(NO_SME) && defined(HWCAP2_SME) - if ((getauxval(AT_HWCAP2) & HWCAP2_SME)) { +#if !defined(NO_SME) + if (support_sme1) { return &gotoblas_ARMV9SME; } #endif @@ -511,6 +527,10 @@ int support_sme1(void) { if(getauxval(AT_HWCAP2) & HWCAP2_SME){ ret = 1; } +#endif +#if defined(__APPLE__) + sysctlbyname("hw.optional.arm.FEAT_SME",&value64,&length64,NULL,0); + ret = value64; #endif return ret; } From 27a4084a1c145aa1cc2e9498021a1b3974d94a67 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 8 May 2025 16:30:37 +0200 Subject: [PATCH 2/7] make sgemm_direct unconditionally available on all arm64 --- common_param.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/common_param.h b/common_param.h index d4d5a8eb27..2d771a27da 100644 --- a/common_param.h +++ b/common_param.h @@ -224,10 +224,8 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG); int (*sgemm_direct_performant) (BLASLONG M, BLASLONG N, BLASLONG K); #endif #ifdef ARCH_ARM64 -#ifdef HAVE_SME void (*sgemm_direct) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG , float *, BLASLONG , float * , BLASLONG); #endif -#endif int (*sgemm_kernel )(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG); From 6e210c6e36f124f27167526de3bdd2afa5017b1f Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 8 May 2025 16:40:40 +0200 Subject: [PATCH 3/7] build a (dummy) sgemm_direct kernel on all arm64 --- kernel/Makefile.L3 | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/kernel/Makefile.L3 b/kernel/Makefile.L3 index 41f16f9c91..2bd6b294fb 100644 --- a/kernel/Makefile.L3 +++ b/kernel/Makefile.L3 @@ -103,8 +103,8 @@ endif ifeq ($(ARCH), arm64) ifeq ($(TARGET_CORE), ARMV9SME) HAVE_SME = 1 -SGEMMDIRECTKERNEL = sgemm_direct_arm64_sme1.c endif +SGEMMDIRECTKERNEL = sgemm_direct_arm64_sme1.c endif endif endif @@ -143,9 +143,10 @@ SKERNELOBJS += \ sgemm_direct_performant$(TSUFFIX).$(SUFFIX) endif ifeq ($(ARCH), arm64) +SKERNELOBJS += \ + sgemm_direct$(TSUFFIX).$(SUFFIX) ifdef HAVE_SME SKERNELOBJS += \ - sgemm_direct$(TSUFFIX).$(SUFFIX) \ sgemm_direct_sme1$(TSUFFIX).$(SUFFIX) \ sgemm_direct_sme1_preprocess$(TSUFFIX).$(SUFFIX) endif @@ -835,9 +836,9 @@ $(KDIR)sgemm_direct$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMDIRECTKERNEL) $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ endif ifeq ($(ARCH), arm64) -ifdef HAVE_SME $(KDIR)sgemm_direct$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMDIRECTKERNEL) $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ +ifdef HAVE_SME $(KDIR)sgemm_direct_sme1$(TSUFFIX).$(SUFFIX) : $(CC) $(CFLAGS) -c $(KERNELDIR)/sgemm_direct_sme1.S -UDOUBLE -UCOMPLEX -o $@ $(KDIR)sgemm_direct_sme1_preprocess$(TSUFFIX).$(SUFFIX) : From f912c4ccf42d253af9143dd38b902471ff96b257 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 8 May 2025 16:44:23 +0200 Subject: [PATCH 4/7] provide a dummy implementation for non-SME targets --- kernel/arm64/sgemm_direct_arm64_sme1.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/arm64/sgemm_direct_arm64_sme1.c b/kernel/arm64/sgemm_direct_arm64_sme1.c index bd7e548894..1b65995998 100644 --- a/kernel/arm64/sgemm_direct_arm64_sme1.c +++ b/kernel/arm64/sgemm_direct_arm64_sme1.c @@ -56,4 +56,10 @@ void CNAME (BLASLONG M, BLASLONG N, BLASLONG K, float * __restrict A,\ free(A_mod); } +#else + +void CNAME (BLASLONG M, BLASLONG N, BLASLONG K, float * __restrict A,\ + BLASLONG strideA, float * __restrict B, BLASLONG strideB ,\ + float * __restrict R, BLASLONG strideR){} + #endif From c81c1c8b2533ed70f39188478504e6305ebe71f7 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 8 May 2025 16:48:13 +0200 Subject: [PATCH 5/7] sgemm_direct has to be unconditionally present on all arm64 --- kernel/setparam-ref.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/setparam-ref.c b/kernel/setparam-ref.c index 24c2855579..5a5045ce23 100644 --- a/kernel/setparam-ref.c +++ b/kernel/setparam-ref.c @@ -180,9 +180,7 @@ gotoblas_t TABLE_NAME = { sgemm_direct_performantTS, #endif #ifdef ARCH_ARM64 -#ifdef HAVE_SME sgemm_directTS, -#endif #endif sgemm_kernelTS, sgemm_betaTS, From 7241c570d78348b225bf40aeefc1a7a4f594e244 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 9 May 2025 00:06:18 +0200 Subject: [PATCH 6/7] always build sgemm_direct kernel on arm64, even if just as dummy --- kernel/CMakeLists.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index 81185f6030..48c8955888 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -208,7 +208,7 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) set(USE_TRMM true) endif () set(USE_DIRECT_SGEMM false) - if (X86_64 OR (ARM64 AND (UC_TARGET_CORE MATCHES ARMV9SME))) + if (X86_64 OR ARM64) set(USE_DIRECT_SGEMM true) endif() @@ -225,9 +225,11 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) set (SGEMMDIRECTSMEKERNEL sgemm_direct_sme1.S) set (SGEMMDIRECTPREKERNEL sgemm_direct_sme1_preprocess.S) GenerateNamedObjects("${KERNELDIR}/${SGEMMDIRECTKERNEL}" "" "gemm_direct" false "" "" false SINGLE) + if (HAVE_SME) GenerateNamedObjects("${KERNELDIR}/${SGEMMDIRECTSMEKERNEL}" "" "gemm_direct_sme1" false "" "" false SINGLE) GenerateNamedObjects("${KERNELDIR}/${SGEMMDIRECTPREKERNEL}" "" "gemm_direct_sme1_preprocess" false "" "" false SINGLE) endif () + endif () endif() foreach (float_type SINGLE DOUBLE) From 1e48d04aa50e75525011576d36ce9d0a96dbb8c5 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 9 May 2025 11:59:28 +0200 Subject: [PATCH 7/7] Update dynamic_arm64.c --- driver/others/dynamic_arm64.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/driver/others/dynamic_arm64.c b/driver/others/dynamic_arm64.c index eb7a65c3cd..428c5758be 100644 --- a/driver/others/dynamic_arm64.c +++ b/driver/others/dynamic_arm64.c @@ -128,7 +128,7 @@ extern gotoblas_t gotoblas_ARMV9SME; #else #define gotoblas_ARMV9SME gotoblas_ARMV8 #endif -#ifdef DYN_CORTEX_A55 +#ifdef DYN_CORTEXA55 extern gotoblas_t gotoblas_CORTEXA55; #else #define gotoblas_CORTEXA55 gotoblas_ARMV8 @@ -155,17 +155,17 @@ extern gotoblas_t gotoblas_NEOVERSEV1; extern gotoblas_t gotoblas_NEOVERSEN2; extern gotoblas_t gotoblas_ARMV8SVE; extern gotoblas_t gotoblas_A64FX; +#ifndef NO_SME +extern gotoblas_t gotoblas_ARMV9SME; +#else +#define gotoblas_ARMV9SME gotoblas_ARMV8SVE +#endif #else #define gotoblas_NEOVERSEV1 gotoblas_ARMV8 #define gotoblas_NEOVERSEN2 gotoblas_ARMV8 #define gotoblas_ARMV8SVE gotoblas_ARMV8 #define gotoblas_A64FX gotoblas_ARMV8 -#endif - -#ifndef NO_SME -extern gotoblas_t gotoblas_ARMV9SME; -#else -#define gotoblas_ARMV9SME gotoblas_ARMV8SVE +#define gotoblas_ARMV9SME gotoblas_ARMV8 #endif extern gotoblas_t gotoblas_THUNDERX3T110; @@ -276,6 +276,7 @@ static gotoblas_t *force_coretype(char *coretype) { case 15: return (&gotoblas_CORTEXA55); case 16: return (&gotoblas_ARMV8SVE); case 17: return (&gotoblas_A64FX); + case 18: return (&gotoblas_ARMV9SME); } snprintf(message, 128, "Core not found: %s\n", coretype); openblas_warning(1, message); @@ -288,7 +289,7 @@ static gotoblas_t *get_coretype(void) { #if defined (OS_DARWIN) //future #if !defined(NO_SME) -// if (support_sme1) { +// if (support_sme1()) { // return &gotoblas_ARMV9SME; // } // #endif @@ -454,7 +455,7 @@ static gotoblas_t *get_coretype(void) { } break; case 0x61: // Apple -//future if (support_sme1) return &gotoblas_ARMV9SME; +//future if (support_sme1()) return &gotoblas_ARMV9SME; return &gotoblas_NEOVERSEN1; break; default: @@ -463,7 +464,7 @@ static gotoblas_t *get_coretype(void) { } #if !defined(NO_SME) - if (support_sme1) { + if (support_sme1()) { return &gotoblas_ARMV9SME; } #endif