Fix Arm64 OSS pytorch build with FBGEMM (#4775)

mcfi · facebook-github-bot · commit cdd446a85384 · 2025-09-04T00:11:47.000-07:00
Summary: X-link: facebookresearch/FBGEMM#1796 X-link: pytorch/pytorch#161527 Pull Request resolved: #4775 Without this change, Arm64 OSS pytorch build with FBGEMM failed with the following error. Undefined symbols for architecture arm64: "fbgemm::FindMinMax(float const*, float*, float*, long long)", referenced from: at::native::fbgemm_linear_int8_weight_fp32_activation(at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, c10::Scalar const&, c10::Scalar const&, at::Tensor const&) in QuantizedLinear.cpp.o at::native::fbgemm_linear_quantize_weight(at::Tensor const&) in QuantizedLinear.cpp.o PackedConvWeight<2>::apply_dynamic(at::Tensor const&, bool) in qconv_dynamic.cpp.o PackedConvWeight<3>::apply_dynamic(at::Tensor const&, bool) in qconv_dynamic.cpp.o at::Tensor PackedLinearWeight::apply_dynamic_impl<false>(at::Tensor, bool) in qlinear_dynamic.cpp.o at::Tensor PackedLinearWeight::apply_dynamic_impl<true>(at::Tensor, bool) in qlinear_dynamic.cpp.o ld: symbol(s) not found for architecture arm64 This change fixed the issue by moving FindMinMax's implementation from QuantUtilsAvx2.cc to QuantUtils.cc. FindMinMax is a platform-agnostic function with AVX2-specific optimizations so conceptually it can be put in QuantUtils.cc. Reviewed By: q10 Differential Revision: D81052327 fbshipit-source-id: c50ac43329d939433fcf6a1610cbbe5726dc6f6e
diff --git a/src/QuantUtils.cc b/src/QuantUtils.cc
@@ -19,6 +19,11 @@
 #include "fbgemm/FloatConversion.h"
 #include "fbgemm/Types.h"
 
+#if defined(__x86_64__) || defined(__i386__) || \
+    (defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)))
+#include <immintrin.h>
+#endif
+
 namespace fbgemm {
 
 using namespace std;
@@ -196,6 +201,43 @@ void ChooseRequantizationMultiplier(
 ////////////////////////////////////////////////////////////////////////////////
 // Utility functions
 
+void FindMinMax(const float* m, float* min, float* max, int64_t len) {
+  if (len <= 0) {
+    *min = 0.0f;
+    *max = 0.0f;
+    return;
+  }
+
+  float temp_min = *m, temp_max = *m;
+  int64_t i = 0;
+
+#ifdef __AVX__
+  __m256 min_v = _mm256_set1_ps(*m), max_v = _mm256_set1_ps(*m);
+  constexpr int VLEN = 8;
+  if (len >= VLEN) {
+    for (; i < len / VLEN * VLEN; i += VLEN) {
+      min_v = _mm256_min_ps(min_v, _mm256_loadu_ps(m + i));
+      max_v = _mm256_max_ps(max_v, _mm256_loadu_ps(m + i));
+    }
+
+    float min_buf[VLEN], max_buf[VLEN];
+    _mm256_storeu_ps(min_buf, min_v);
+    _mm256_storeu_ps(max_buf, max_v);
+    for (int j = 0; j < VLEN; ++j) {
+      temp_min = std::min(temp_min, min_buf[j]);
+      temp_max = std::max(temp_max, max_buf[j]);
+    }
+  }
+#endif
+
+  for (; i < len; i++) {
+    temp_min = std::min(temp_min, m[i]);
+    temp_max = std::max(temp_max, m[i]);
+  }
+  *min = temp_min;
+  *max = temp_max;
+}
+
 #define FBGEMM_SPECIALIZED_QUANTIZE(T, LEGACY)                      \
   template <>                                                       \
   FBGEMM_API void Quantize<T, LEGACY>(                              \
diff --git a/src/QuantUtilsAvx2.cc b/src/QuantUtilsAvx2.cc
@@ -278,43 +278,6 @@ SPECIALIZE_FUSEDDQAVX2(int8_t)
 
 #undef SPECIALIZE_FUSEDDQAVX2
 
-void FindMinMax(const float* m, float* min, float* max, int64_t len) {
-  if (len <= 0) {
-    *min = 0.0f;
-    *max = 0.0f;
-    return;
-  }
-
-  float temp_min = *m, temp_max = *m;
-  int64_t i = 0;
-
-#ifdef __AVX__
-  __m256 min_v = _mm256_set1_ps(*m), max_v = _mm256_set1_ps(*m);
-  constexpr int VLEN = 8;
-  if (len >= VLEN) {
-    for (; i < len / VLEN * VLEN; i += VLEN) {
-      min_v = _mm256_min_ps(min_v, _mm256_loadu_ps(m + i));
-      max_v = _mm256_max_ps(max_v, _mm256_loadu_ps(m + i));
-    }
-
-    float min_buf[VLEN], max_buf[VLEN];
-    _mm256_storeu_ps(min_buf, min_v);
-    _mm256_storeu_ps(max_buf, max_v);
-    for (int j = 0; j < VLEN; ++j) {
-      temp_min = std::min(temp_min, min_buf[j]);
-      temp_max = std::max(temp_max, max_buf[j]);
-    }
-  }
-#endif
-
-  for (; i < len; i++) {
-    temp_min = std::min(temp_min, m[i]);
-    temp_max = std::max(temp_max, m[i]);
-  }
-  *min = temp_min;
-  *max = temp_max;
-}
-
 ////////////////////////////////////////////////////////////////////////////////
 // Requantization (with floats)