Skip to content

Commit 2877164

Browse files
author
Niklas Gustafsson
committed
Modified C file to build on Linux
1 parent 2815422 commit 2877164

File tree

2 files changed

+9
-182
lines changed

2 files changed

+9
-182
lines changed

src/Native/LibTorchSharp/crc32c.c

Lines changed: 1 addition & 168 deletions
Original file line numberDiff line numberDiff line change
@@ -924,174 +924,7 @@ static inline uint32_t shift_crc(uint32_t shift_table[][256], uint32_t crc)
924924
^ shift_table[3][crc >> 24];
925925
}
926926

927-
/* Compute CRC-32C using the Intel hardware instruction. */
928-
uint32_t crc32c_append_hw(uint32_t crc, buffer buf, size_t len)
929-
{
930-
buffer next = buf;
931-
buffer end;
932-
#ifdef _M_X64
933-
uint64_t crc0, crc1, crc2; /* need to be 64 bits for crc32q */
934-
#else
935-
uint32_t crc0, crc1, crc2;
936-
#endif
937-
938-
/* pre-process the crc */
939-
crc0 = crc ^ 0xffffffff;
940-
941-
/* compute the crc for up to seven leading bytes to bring the data pointer
942-
to an eight-byte boundary */
943-
while (len && ((uintptr_t)next & 7) != 0)
944-
{
945-
crc0 = _mm_crc32_u8((uint32_t)(crc0), *next);
946-
++next;
947-
--len;
948-
}
949-
950-
#ifdef _M_X64
951-
/* compute the crc on sets of LONG_SHIFT*3 bytes, executing three independent crc
952-
instructions, each on LONG_SHIFT bytes -- this is optimized for the Nehalem,
953-
Westmere, Sandy Bridge, and Ivy Bridge architectures, which have a
954-
throughput of one crc per cycle, but a latency of three cycles */
955-
while (len >= 3 * LONG_SHIFT)
956-
{
957-
crc1 = 0;
958-
crc2 = 0;
959-
end = next + LONG_SHIFT;
960-
do
961-
{
962-
crc0 = _mm_crc32_u64(crc0, *(const uint64_t *)(next));
963-
crc1 = _mm_crc32_u64(crc1, *(const uint64_t *)(next + LONG_SHIFT));
964-
crc2 = _mm_crc32_u64(crc2, *(const uint64_t *)(next + 2 * LONG_SHIFT));
965-
next += 8;
966-
} while (next < end);
967-
crc0 = shift_crc(long_shifts.dword_table, (uint32_t)(crc0)) ^ crc1;
968-
crc0 = shift_crc(long_shifts.dword_table, (uint32_t)(crc0)) ^ crc2;
969-
next += 2 * LONG_SHIFT;
970-
len -= 3 * LONG_SHIFT;
971-
}
972-
973-
/* do the same thing, but now on SHORT_SHIFT*3 blocks for the remaining data less
974-
than a LONG_SHIFT*3 block */
975-
while (len >= 3 * SHORT_SHIFT)
976-
{
977-
crc1 = 0;
978-
crc2 = 0;
979-
end = next + SHORT_SHIFT;
980-
do
981-
{
982-
crc0 = _mm_crc32_u64(crc0, *(const uint64_t *)(next));
983-
crc1 = _mm_crc32_u64(crc1, *(const uint64_t *)(next + SHORT_SHIFT));
984-
crc2 = _mm_crc32_u64(crc2, *(const uint64_t *)(next + 2 * SHORT_SHIFT));
985-
next += 8;
986-
} while (next < end);
987-
crc0 = shift_crc(short_shifts.dword_table, (uint32_t)(crc0)) ^ crc1;
988-
crc0 = shift_crc(short_shifts.dword_table, (uint32_t)(crc0)) ^ crc2;
989-
next += 2 * SHORT_SHIFT;
990-
len -= 3 * SHORT_SHIFT;
991-
}
992-
993-
/* compute the crc on the remaining eight-byte units less than a SHORT_SHIFT*3
994-
block */
995-
end = next + (len - (len & 7));
996-
while (next < end)
997-
{
998-
crc0 = _mm_crc32_u64(crc0, *(const uint64_t *)(next));
999-
next += 8;
1000-
}
1001-
#else
1002-
/* compute the crc on sets of LONG_SHIFT*3 bytes, executing three independent crc
1003-
instructions, each on LONG_SHIFT bytes -- this is optimized for the Nehalem,
1004-
Westmere, Sandy Bridge, and Ivy Bridge architectures, which have a
1005-
throughput of one crc per cycle, but a latency of three cycles */
1006-
while (len >= 3 * LONG_SHIFT)
1007-
{
1008-
crc1 = 0;
1009-
crc2 = 0;
1010-
end = next + LONG_SHIFT;
1011-
do
1012-
{
1013-
crc0 = _mm_crc32_u32(crc0, *(const uint32_t *)(next));
1014-
crc1 = _mm_crc32_u32(crc1, *(const uint32_t *)(next + LONG_SHIFT));
1015-
crc2 = _mm_crc32_u32(crc2, *(const uint32_t *)(next + 2 * LONG_SHIFT));
1016-
next += 4;
1017-
} while (next < end);
1018-
crc0 = shift_crc(long_shifts.dword_table, (uint32_t)(crc0)) ^ crc1;
1019-
crc0 = shift_crc(long_shifts.dword_table, (uint32_t)(crc0)) ^ crc2;
1020-
next += 2 * LONG_SHIFT;
1021-
len -= 3 * LONG_SHIFT;
1022-
}
1023-
1024-
/* do the same thing, but now on SHORT_SHIFT*3 blocks for the remaining data less
1025-
than a LONG_SHIFT*3 block */
1026-
while (len >= 3 * SHORT_SHIFT)
1027-
{
1028-
crc1 = 0;
1029-
crc2 = 0;
1030-
end = next + SHORT_SHIFT;
1031-
do
1032-
{
1033-
crc0 = _mm_crc32_u32(crc0, *(const uint32_t *)(next));
1034-
crc1 = _mm_crc32_u32(crc1, *(const uint32_t *)(next + SHORT_SHIFT));
1035-
crc2 = _mm_crc32_u32(crc2, *(const uint32_t *)(next + 2 * SHORT_SHIFT));
1036-
next += 4;
1037-
} while (next < end);
1038-
crc0 = shift_crc(short_shifts.dword_table, (uint32_t)(crc0)) ^ crc1;
1039-
crc0 = shift_crc(short_shifts.dword_table, (uint32_t)(crc0)) ^ crc2;
1040-
next += 2 * SHORT_SHIFT;
1041-
len -= 3 * SHORT_SHIFT;
1042-
}
1043-
1044-
/* compute the crc on the remaining eight-byte units less than a SHORT_SHIFT*3
1045-
block */
1046-
end = next + (len - (len & 7));
1047-
while (next < end)
1048-
{
1049-
crc0 = _mm_crc32_u32(crc0, *(const uint32_t *)(next));
1050-
next += 4;
1051-
}
1052-
#endif
1053-
len &= 7;
1054-
1055-
/* compute the crc for up to seven trailing bytes */
1056-
while (len)
1057-
{
1058-
crc0 = _mm_crc32_u8((uint32_t)(crc0), *next);
1059-
++next;
1060-
--len;
1061-
}
1062-
1063-
/* return a post-processed crc */
1064-
return (uint32_t)(crc0) ^ 0xffffffff;
1065-
}
1066-
1067-
int crc32c_hw_available()
1068-
{
1069-
int info[4];
1070-
#ifdef CRC32C_GCC
1071-
__cpuid(1, info[0], info[1], info[2], info[3]);
1072-
#else
1073-
__cpuid(info, 1);
1074-
#endif
1075-
return (info[2] & (1 << 20)) != 0;
1076-
}
1077-
1078-
uint32_t(*append_func)(uint32_t, buffer, size_t)
1079-
#ifdef __cplusplus
1080-
= crc32c_hw_available() ? crc32c_append_hw : crc32c_append_sw;
1081-
#else
1082-
= crc32c_append_sw;
1083-
#endif
1084-
1085-
#ifndef __cplusplus
1086-
void crc32c_init()
1087-
{
1088-
if (crc32c_hw_available()) {
1089-
append_func = crc32c_append_hw;
1090-
}
1091-
}
1092-
#endif
1093-
1094927
uint32_t crc32c_append(uint32_t crc, buffer input, size_t length)
1095928
{
1096-
return append_func(crc, input, length);
929+
return crc32c_append_sw(crc, input, length);
1097930
}

src/Native/LibTorchSharp/crc32c.h

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -28,23 +28,17 @@
2828
#define CRC32C_MSC
2929
#endif
3030

31-
#ifndef CRC32C_STATIC
32-
#ifdef CRC32C_EXPORTS
33-
#ifdef CRC32C_GCC
34-
#define CRC32C_API __attribute__ ((dllexport))
31+
// ALTERED SOURCE VERSION
32+
//
33+
// Per #2 in the copyright notice above:
34+
//
35+
// The definition of CRC32C_API has been altered from the original.
36+
37+
#ifndef _WIN32
38+
#define CRC32C_API __attribute__((visibility("default")))
3539
#else
3640
#define CRC32C_API __declspec(dllexport)
3741
#endif
38-
#else
39-
#ifdef CRC32C_GCC
40-
#define CRC32C_API __attribute__ ((dllimport))
41-
#else
42-
#define CRC32C_API __declspec(dllimport)
43-
#endif
44-
#endif
45-
#else
46-
#define CRC32C_API
47-
#endif
4842

4943
#include <stdint.h>
5044
#include <stddef.h>

0 commit comments

Comments
 (0)