Modified C file to build on Linux

Niklas Gustafsson · Niklas Gustafsson · commit 287716461902 · 2022-09-09T18:19:23.000-07:00
diff --git a/src/Native/LibTorchSharp/crc32c.c b/src/Native/LibTorchSharp/crc32c.c
@@ -924,174 +924,7 @@ static inline uint32_t shift_crc(uint32_t shift_table[][256], uint32_t crc)
         ^ shift_table[3][crc >> 24];
 }
 
-/* Compute CRC-32C using the Intel hardware instruction. */
-uint32_t crc32c_append_hw(uint32_t crc, buffer buf, size_t len)
-{
-    buffer next = buf;
-    buffer end;
-#ifdef _M_X64
-    uint64_t crc0, crc1, crc2;      /* need to be 64 bits for crc32q */
-#else
-    uint32_t crc0, crc1, crc2;
-#endif
-
-    /* pre-process the crc */
-    crc0 = crc ^ 0xffffffff;
-
-    /* compute the crc for up to seven leading bytes to bring the data pointer
-       to an eight-byte boundary */
-    while (len && ((uintptr_t)next & 7) != 0)
-    {
-        crc0 = _mm_crc32_u8((uint32_t)(crc0), *next);
-        ++next;
-        --len;
-    }
-
-#ifdef _M_X64
-    /* compute the crc on sets of LONG_SHIFT*3 bytes, executing three independent crc
-       instructions, each on LONG_SHIFT bytes -- this is optimized for the Nehalem,
-       Westmere, Sandy Bridge, and Ivy Bridge architectures, which have a
-       throughput of one crc per cycle, but a latency of three cycles */
-    while (len >= 3 * LONG_SHIFT)
-    {
-        crc1 = 0;
-        crc2 = 0;
-        end = next + LONG_SHIFT;
-        do
-        {
-            crc0 = _mm_crc32_u64(crc0, *(const uint64_t *)(next));
-            crc1 = _mm_crc32_u64(crc1, *(const uint64_t *)(next + LONG_SHIFT));
-            crc2 = _mm_crc32_u64(crc2, *(const uint64_t *)(next + 2 * LONG_SHIFT));
-            next += 8;
-        } while (next < end);
-        crc0 = shift_crc(long_shifts.dword_table, (uint32_t)(crc0)) ^ crc1;
-        crc0 = shift_crc(long_shifts.dword_table, (uint32_t)(crc0)) ^ crc2;
-        next += 2 * LONG_SHIFT;
-        len -= 3 * LONG_SHIFT;
-    }
-
-    /* do the same thing, but now on SHORT_SHIFT*3 blocks for the remaining data less
-       than a LONG_SHIFT*3 block */
-    while (len >= 3 * SHORT_SHIFT)
-    {
-        crc1 = 0;
-        crc2 = 0;
-        end = next + SHORT_SHIFT;
-        do
-        {
-            crc0 = _mm_crc32_u64(crc0, *(const uint64_t *)(next));
-            crc1 = _mm_crc32_u64(crc1, *(const uint64_t *)(next + SHORT_SHIFT));
-            crc2 = _mm_crc32_u64(crc2, *(const uint64_t *)(next + 2 * SHORT_SHIFT));
-            next += 8;
-        } while (next < end);
-        crc0 = shift_crc(short_shifts.dword_table, (uint32_t)(crc0)) ^ crc1;
-        crc0 = shift_crc(short_shifts.dword_table, (uint32_t)(crc0)) ^ crc2;
-        next += 2 * SHORT_SHIFT;
-        len -= 3 * SHORT_SHIFT;
-    }
-
-    /* compute the crc on the remaining eight-byte units less than a SHORT_SHIFT*3
-    block */
-    end = next + (len - (len & 7));
-    while (next < end)
-    {
-        crc0 = _mm_crc32_u64(crc0, *(const uint64_t *)(next));
-        next += 8;
-    }
-#else
-    /* compute the crc on sets of LONG_SHIFT*3 bytes, executing three independent crc
-    instructions, each on LONG_SHIFT bytes -- this is optimized for the Nehalem,
-    Westmere, Sandy Bridge, and Ivy Bridge architectures, which have a
-    throughput of one crc per cycle, but a latency of three cycles */
-    while (len >= 3 * LONG_SHIFT)
-    {
-        crc1 = 0;
-        crc2 = 0;
-        end = next + LONG_SHIFT;
-        do
-        {
-            crc0 = _mm_crc32_u32(crc0, *(const uint32_t *)(next));
-            crc1 = _mm_crc32_u32(crc1, *(const uint32_t *)(next + LONG_SHIFT));
-            crc2 = _mm_crc32_u32(crc2, *(const uint32_t *)(next + 2 * LONG_SHIFT));
-            next += 4;
-        } while (next < end);
-        crc0 = shift_crc(long_shifts.dword_table, (uint32_t)(crc0)) ^ crc1;
-        crc0 = shift_crc(long_shifts.dword_table, (uint32_t)(crc0)) ^ crc2;
-        next += 2 * LONG_SHIFT;
-        len -= 3 * LONG_SHIFT;
-    }
-
-    /* do the same thing, but now on SHORT_SHIFT*3 blocks for the remaining data less
-    than a LONG_SHIFT*3 block */
-    while (len >= 3 * SHORT_SHIFT)
-    {
-        crc1 = 0;
-        crc2 = 0;
-        end = next + SHORT_SHIFT;
-        do
-        {
-            crc0 = _mm_crc32_u32(crc0, *(const uint32_t *)(next));
-            crc1 = _mm_crc32_u32(crc1, *(const uint32_t *)(next + SHORT_SHIFT));
-            crc2 = _mm_crc32_u32(crc2, *(const uint32_t *)(next + 2 * SHORT_SHIFT));
-            next += 4;
-        } while (next < end);
-        crc0 = shift_crc(short_shifts.dword_table, (uint32_t)(crc0)) ^ crc1;
-        crc0 = shift_crc(short_shifts.dword_table, (uint32_t)(crc0)) ^ crc2;
-        next += 2 * SHORT_SHIFT;
-        len -= 3 * SHORT_SHIFT;
-    }
-
-    /* compute the crc on the remaining eight-byte units less than a SHORT_SHIFT*3
-    block */
-    end = next + (len - (len & 7));
-    while (next < end)
-    {
-        crc0 = _mm_crc32_u32(crc0, *(const uint32_t *)(next));
-        next += 4;
-    }
-#endif
-    len &= 7;
-
-    /* compute the crc for up to seven trailing bytes */
-    while (len)
-    {
-        crc0 = _mm_crc32_u8((uint32_t)(crc0), *next);
-        ++next;
-        --len;
-    }
-
-    /* return a post-processed crc */
-    return (uint32_t)(crc0) ^ 0xffffffff;
-}
-
-int crc32c_hw_available()
-{
-    int info[4];
-#ifdef CRC32C_GCC
-    __cpuid(1, info[0], info[1], info[2], info[3]);
-#else
-    __cpuid(info, 1);
-#endif
-    return (info[2] & (1 << 20)) != 0;
-}
-
-uint32_t(*append_func)(uint32_t, buffer, size_t)
-#ifdef __cplusplus
-    = crc32c_hw_available() ? crc32c_append_hw : crc32c_append_sw;
-#else
-    = crc32c_append_sw;
-#endif
-
-#ifndef __cplusplus
-void crc32c_init()
-{
-    if (crc32c_hw_available()) {
-        append_func = crc32c_append_hw;
-    }
-}
-#endif
-
 uint32_t crc32c_append(uint32_t crc, buffer input, size_t length)
 {
-	return append_func(crc, input, length);
+	return crc32c_append_sw(crc, input, length);
 }
diff --git a/src/Native/LibTorchSharp/crc32c.h b/src/Native/LibTorchSharp/crc32c.h
@@ -28,23 +28,17 @@
 #define CRC32C_MSC
 #endif
 
-#ifndef CRC32C_STATIC
-#ifdef CRC32C_EXPORTS
-#ifdef CRC32C_GCC
-#define CRC32C_API __attribute__ ((dllexport))
+// ALTERED SOURCE VERSION
+//
+// Per #2 in the copyright notice above:
+//
+// The definition of CRC32C_API has been altered from the original.
+
+#ifndef _WIN32
+#define CRC32C_API __attribute__((visibility("default")))
 #else
 #define CRC32C_API __declspec(dllexport)
 #endif
-#else
-#ifdef CRC32C_GCC
-#define CRC32C_API __attribute__ ((dllimport))
-#else
-#define CRC32C_API __declspec(dllimport)
-#endif
-#endif
-#else
-#define CRC32C_API
-#endif
 
 #include <stdint.h>
 #include <stddef.h>