From 0bba402cea0abf699a2d64fc74b9ad80c314ec01 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Thu, 18 Apr 2019 14:21:09 +0200 Subject: [PATCH 1/3] Minor performance improvements for System.Globalization.Native --- .../System.Globalization.Native/pal_calendarData.c | 9 ++++++--- .../System.Globalization.Native/pal_collation.c | 12 ++++++------ .../pal_localeNumberData.c | 9 ++++++--- .../pal_localeStringData.c | 6 ++++-- 4 files changed, 22 insertions(+), 14 deletions(-) diff --git a/src/corefx/System.Globalization.Native/pal_calendarData.c b/src/corefx/System.Globalization.Native/pal_calendarData.c index 07f8a6f290de..852e33874dec 100644 --- a/src/corefx/System.Globalization.Native/pal_calendarData.c +++ b/src/corefx/System.Globalization.Native/pal_calendarData.c @@ -224,7 +224,7 @@ static int InvokeCallbackForDatePattern(const char* locale, UErrorCode ignore = U_ZERO_ERROR; int32_t patternLen = udat_toPattern(pFormat, FALSE, NULL, 0, &ignore) + 1; - UChar* pattern = calloc(patternLen, sizeof(UChar)); + UChar* pattern = malloc(patternLen * sizeof(UChar)); if (pattern == NULL) { udat_close(pFormat); @@ -232,6 +232,7 @@ static int InvokeCallbackForDatePattern(const char* locale, } udat_toPattern(pFormat, FALSE, pattern, patternLen, &err); + pattern[patternLen - 1] = 0; udat_close(pFormat); if (U_SUCCESS(err)) @@ -264,7 +265,7 @@ static int InvokeCallbackForDateTimePattern(const char* locale, UErrorCode ignore = U_ZERO_ERROR; int32_t patternLen = udatpg_getBestPattern(pGenerator, patternSkeleton, -1, NULL, 0, &ignore) + 1; - UChar* bestPattern = calloc(patternLen, sizeof(UChar)); + UChar* bestPattern = malloc(patternLen * sizeof(UChar)); if (bestPattern == NULL) { udatpg_close(pGenerator); @@ -272,6 +273,7 @@ static int InvokeCallbackForDateTimePattern(const char* locale, } udatpg_getBestPattern(pGenerator, patternSkeleton, -1, bestPattern, patternLen, &err); + bestPattern[patternLen - 1] = 0; udatpg_close(pGenerator); if (U_SUCCESS(err)) @@ -332,7 +334,7 @@ static int32_t EnumSymbols(const char* locale, } else { - symbolBuf = calloc(symbolLen, sizeof(UChar)); + symbolBuf = malloc(symbolLen * sizeof(UChar)); if (symbolBuf == NULL) { err = U_MEMORY_ALLOCATION_ERROR; @@ -341,6 +343,7 @@ static int32_t EnumSymbols(const char* locale, } udat_getSymbols(pFormat, type, i, symbolBuf, symbolLen, &err); + symbolBuf[symbolLen - 1] = 0; if (U_SUCCESS(err)) { diff --git a/src/corefx/System.Globalization.Native/pal_collation.c b/src/corefx/System.Globalization.Native/pal_collation.c index a8c94cfb71fa..67308317c85d 100644 --- a/src/corefx/System.Globalization.Native/pal_collation.c +++ b/src/corefx/System.Globalization.Native/pal_collation.c @@ -188,15 +188,14 @@ static UCharList* GetCustomRules(int32_t options, UColAttributeValue strength, i // Use 512 as the starting size, so the customRules won't have to grow if we are just // doing the KanaType custom rule. customRules->capacity = 512; - customRules->items = calloc(customRules->capacity, sizeof(UChar)); + customRules->size = 0; + customRules->items = malloc(customRules->capacity * sizeof(UChar)); if (customRules->items == NULL) { free(customRules); return NULL; } - customRules->size = 0; - if (needsIgnoreKanaTypeCustomRule || needsNotIgnoreKanaTypeCustomRule) { UChar compareChar = needsIgnoreKanaTypeCustomRule ? '=' : '<'; @@ -280,7 +279,7 @@ UCollator* CloneCollatorWithOptions(const UCollator* pCollator, int32_t options, UCollator* pClonedCollator; UCharList* customRules = GetCustomRules(options, strength, isIgnoreSymbols); - if (customRules == NULL) + if (customRules == NULL || customRules->size == 0) { pClonedCollator = ucol_safeClone(pCollator, NULL, NULL, pErr); } @@ -292,7 +291,7 @@ UCollator* CloneCollatorWithOptions(const UCollator* pCollator, int32_t options, const UChar* localeRules = ucol_getRules(pCollator, &localeRulesLength); int32_t completeRulesLength = localeRulesLength + customRuleLength + 1; - UChar* completeRules = calloc(completeRulesLength, sizeof(UChar)); + UChar* completeRules = malloc(completeRulesLength * sizeof(UChar)); for (int i = 0; i < localeRulesLength; i++) { @@ -302,11 +301,12 @@ UCollator* CloneCollatorWithOptions(const UCollator* pCollator, int32_t options, { completeRules[localeRulesLength + i] = customRules->items[i]; } + completeRules[completeRulesLength - 1] = 0; pClonedCollator = ucol_openRules(completeRules, completeRulesLength, UCOL_DEFAULT, strength, NULL, pErr); free(completeRules); - free(customRules); } + free(customRules); if (isIgnoreSymbols) { diff --git a/src/corefx/System.Globalization.Native/pal_localeNumberData.c b/src/corefx/System.Globalization.Native/pal_localeNumberData.c index 5663c63d8251..2fb1559cb77d 100644 --- a/src/corefx/System.Globalization.Native/pal_localeNumberData.c +++ b/src/corefx/System.Globalization.Native/pal_localeNumberData.c @@ -86,13 +86,13 @@ static char* NormalizeNumericPattern(const UChar* srcPattern, int isNegative) if (isNegative && !minusAdded) { size_t length = (iEnd - iStart) + 2; - destPattern = calloc(length, sizeof(char)); + destPattern = malloc(length * sizeof(char)); destPattern[index++] = '-'; } else { size_t length = (iEnd - iStart) + 1; - destPattern = calloc(length, sizeof(char)); + destPattern = malloc(length * sizeof(char)); } for (int i = iStart; i <= iEnd; i++) @@ -142,6 +142,8 @@ static char* NormalizeNumericPattern(const UChar* srcPattern, int isNegative) } } + destPattern[index] = 0; + return destPattern; } @@ -164,7 +166,7 @@ static int GetNumericPattern(const UNumberFormat* pNumberFormat, UErrorCode ignore = U_ZERO_ERROR; int32_t icuPatternLength = unum_toPattern(pNumberFormat, FALSE, NULL, 0, &ignore) + 1; - UChar* icuPattern = calloc(icuPatternLength, sizeof(UChar)); + UChar* icuPattern = malloc(icuPatternLength * sizeof(UChar)); if (icuPattern == NULL) { return U_MEMORY_ALLOCATION_ERROR; @@ -173,6 +175,7 @@ static int GetNumericPattern(const UNumberFormat* pNumberFormat, UErrorCode err = U_ZERO_ERROR; unum_toPattern(pNumberFormat, FALSE, icuPattern, icuPatternLength, &err); + icuPattern[icuPatternLength - 1] = 0; assert(U_SUCCESS(err)); diff --git a/src/corefx/System.Globalization.Native/pal_localeStringData.c b/src/corefx/System.Globalization.Native/pal_localeStringData.c index a989b57f7946..cac4777d2e92 100644 --- a/src/corefx/System.Globalization.Native/pal_localeStringData.c +++ b/src/corefx/System.Globalization.Native/pal_localeStringData.c @@ -77,13 +77,14 @@ static UErrorCode GetLocaleIso639LanguageTwoLetterName(const char* locale, UChar UErrorCode status = U_ZERO_ERROR, ignore = U_ZERO_ERROR; int32_t length = uloc_getLanguage(locale, NULL, 0, &ignore) + 1; - char* buf = calloc(length, sizeof(char)); + char* buf = malloc(length * sizeof(char)); if (buf == NULL) { return U_MEMORY_ALLOCATION_ERROR; } uloc_getLanguage(locale, buf, length, &status); + buf[length - 1] = 0; u_charsToUChars_safe(buf, value, valueLength, &status); free(buf); @@ -120,13 +121,14 @@ static UErrorCode GetLocaleIso3166CountryName(const char* locale, UChar* value, UErrorCode status = U_ZERO_ERROR, ignore = U_ZERO_ERROR; int32_t length = uloc_getCountry(locale, NULL, 0, &ignore) + 1; - char* buf = calloc(length, sizeof(char)); + char* buf = malloc(length * sizeof(char)); if (buf == NULL) { return U_MEMORY_ALLOCATION_ERROR; } uloc_getCountry(locale, buf, length, &status); + buf[length - 1] = 0; u_charsToUChars_safe(buf, value, valueLength, &status); free(buf); From 9f611bfb4b76fcd9bb0ba0ffefa1c6e94b099f98 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Thu, 18 Apr 2019 18:59:13 +0200 Subject: [PATCH 2/3] Add guards against integer overflows, fix one allocation size and limit array resizing to stable increments --- .../System.Globalization.Native/pal_alloc.h | 26 +++++++++++++++++++ .../pal_calendarData.c | 7 ++--- .../pal_collation.c | 13 +++++++--- .../pal_localeNumberData.c | 7 ++--- .../pal_localeStringData.c | 5 ++-- 5 files changed, 46 insertions(+), 12 deletions(-) create mode 100644 src/corefx/System.Globalization.Native/pal_alloc.h diff --git a/src/corefx/System.Globalization.Native/pal_alloc.h b/src/corefx/System.Globalization.Native/pal_alloc.h new file mode 100644 index 000000000000..ce33c0576027 --- /dev/null +++ b/src/corefx/System.Globalization.Native/pal_alloc.h @@ -0,0 +1,26 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +#pragma once + +#include +#include + +static void *pal_allocarray(size_t numMembers, size_t count) +{ + if (numMembers == 0 || count == 0) + return NULL; + if (SIZE_MAX / numMembers < count) + return NULL; + return malloc(numMembers * count); +} + +static void *pal_reallocarray(void *ptr, size_t numMembers, size_t count) +{ + if (numMembers == 0 || count == 0) + return NULL; + if (SIZE_MAX / numMembers < count) + return NULL; + return realloc(ptr, numMembers * count); +} diff --git a/src/corefx/System.Globalization.Native/pal_calendarData.c b/src/corefx/System.Globalization.Native/pal_calendarData.c index 852e33874dec..4d6ddd35a9bb 100644 --- a/src/corefx/System.Globalization.Native/pal_calendarData.c +++ b/src/corefx/System.Globalization.Native/pal_calendarData.c @@ -7,6 +7,7 @@ #include #include +#include "pal_alloc.h" #include "pal_calendarData.h" #define GREGORIAN_NAME "gregorian" @@ -224,7 +225,7 @@ static int InvokeCallbackForDatePattern(const char* locale, UErrorCode ignore = U_ZERO_ERROR; int32_t patternLen = udat_toPattern(pFormat, FALSE, NULL, 0, &ignore) + 1; - UChar* pattern = malloc(patternLen * sizeof(UChar)); + UChar* pattern = pal_allocarray(patternLen, sizeof(UChar)); if (pattern == NULL) { udat_close(pFormat); @@ -265,7 +266,7 @@ static int InvokeCallbackForDateTimePattern(const char* locale, UErrorCode ignore = U_ZERO_ERROR; int32_t patternLen = udatpg_getBestPattern(pGenerator, patternSkeleton, -1, NULL, 0, &ignore) + 1; - UChar* bestPattern = malloc(patternLen * sizeof(UChar)); + UChar* bestPattern = pal_allocarray(patternLen, sizeof(UChar)); if (bestPattern == NULL) { udatpg_close(pGenerator); @@ -334,7 +335,7 @@ static int32_t EnumSymbols(const char* locale, } else { - symbolBuf = malloc(symbolLen * sizeof(UChar)); + symbolBuf = pal_allocarray(symbolLen, sizeof(UChar)); if (symbolBuf == NULL) { err = U_MEMORY_ALLOCATION_ERROR; diff --git a/src/corefx/System.Globalization.Native/pal_collation.c b/src/corefx/System.Globalization.Native/pal_collation.c index 67308317c85d..0ae2c226a914 100644 --- a/src/corefx/System.Globalization.Native/pal_collation.c +++ b/src/corefx/System.Globalization.Native/pal_collation.c @@ -10,6 +10,7 @@ #include #include +#include "pal_alloc.h" #include "pal_collation.h" c_static_assert_msg(UCOL_EQUAL == 0, "managed side requires 0 for equal strings"); @@ -142,8 +143,12 @@ static int AddItem(UCharList* list, const UChar item) size_t size = list->size++; if (size >= list->capacity) { - list->capacity *= 2; - UChar* ptr = (UChar*)realloc(list->items, list->capacity * sizeof(UChar*)); + list->capacity += 512; + if (list->capacity < size) + { + return FALSE; + } + UChar* ptr = (UChar*)pal_reallocarray(list->items, list->capacity, sizeof(UChar)); if (ptr == NULL) { return FALSE; @@ -189,7 +194,7 @@ static UCharList* GetCustomRules(int32_t options, UColAttributeValue strength, i // doing the KanaType custom rule. customRules->capacity = 512; customRules->size = 0; - customRules->items = malloc(customRules->capacity * sizeof(UChar)); + customRules->items = pal_allocarray(customRules->capacity, sizeof(UChar)); if (customRules->items == NULL) { free(customRules); @@ -291,7 +296,7 @@ UCollator* CloneCollatorWithOptions(const UCollator* pCollator, int32_t options, const UChar* localeRules = ucol_getRules(pCollator, &localeRulesLength); int32_t completeRulesLength = localeRulesLength + customRuleLength + 1; - UChar* completeRules = malloc(completeRulesLength * sizeof(UChar)); + UChar* completeRules = pal_allocarray(completeRulesLength, sizeof(UChar)); for (int i = 0; i < localeRulesLength; i++) { diff --git a/src/corefx/System.Globalization.Native/pal_localeNumberData.c b/src/corefx/System.Globalization.Native/pal_localeNumberData.c index 2fb1559cb77d..420097041f45 100644 --- a/src/corefx/System.Globalization.Native/pal_localeNumberData.c +++ b/src/corefx/System.Globalization.Native/pal_localeNumberData.c @@ -7,6 +7,7 @@ #include #include +#include "pal_alloc.h" #include "pal_localeNumberData.h" // invariant character definitions used by ICU @@ -86,13 +87,13 @@ static char* NormalizeNumericPattern(const UChar* srcPattern, int isNegative) if (isNegative && !minusAdded) { size_t length = (iEnd - iStart) + 2; - destPattern = malloc(length * sizeof(char)); + destPattern = pal_allocarray(length, sizeof(char)); destPattern[index++] = '-'; } else { size_t length = (iEnd - iStart) + 1; - destPattern = malloc(length * sizeof(char)); + destPattern = pal_allocarray(length, sizeof(char)); } for (int i = iStart; i <= iEnd; i++) @@ -166,7 +167,7 @@ static int GetNumericPattern(const UNumberFormat* pNumberFormat, UErrorCode ignore = U_ZERO_ERROR; int32_t icuPatternLength = unum_toPattern(pNumberFormat, FALSE, NULL, 0, &ignore) + 1; - UChar* icuPattern = malloc(icuPatternLength * sizeof(UChar)); + UChar* icuPattern = pal_allocarray(icuPatternLength, sizeof(UChar)); if (icuPattern == NULL) { return U_MEMORY_ALLOCATION_ERROR; diff --git a/src/corefx/System.Globalization.Native/pal_localeStringData.c b/src/corefx/System.Globalization.Native/pal_localeStringData.c index cac4777d2e92..cb9614ab304d 100644 --- a/src/corefx/System.Globalization.Native/pal_localeStringData.c +++ b/src/corefx/System.Globalization.Native/pal_localeStringData.c @@ -7,6 +7,7 @@ #include #include +#include "pal_alloc.h" #include "pal_localeStringData.h" /* @@ -77,7 +78,7 @@ static UErrorCode GetLocaleIso639LanguageTwoLetterName(const char* locale, UChar UErrorCode status = U_ZERO_ERROR, ignore = U_ZERO_ERROR; int32_t length = uloc_getLanguage(locale, NULL, 0, &ignore) + 1; - char* buf = malloc(length * sizeof(char)); + char* buf = pal_allocarray(length, sizeof(char)); if (buf == NULL) { return U_MEMORY_ALLOCATION_ERROR; @@ -121,7 +122,7 @@ static UErrorCode GetLocaleIso3166CountryName(const char* locale, UChar* value, UErrorCode status = U_ZERO_ERROR, ignore = U_ZERO_ERROR; int32_t length = uloc_getCountry(locale, NULL, 0, &ignore) + 1; - char* buf = malloc(length * sizeof(char)); + char* buf = pal_allocarray(length, sizeof(char)); if (buf == NULL) { return U_MEMORY_ALLOCATION_ERROR; From 0ee0cd65fc7f9234c42d6eb130d2efb9af1263fd Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Thu, 18 Apr 2019 21:29:49 +0200 Subject: [PATCH 3/3] Fix performance regression from #22390 on Linux --- src/corefx/System.Globalization.Native/pal_collation.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/corefx/System.Globalization.Native/pal_collation.c b/src/corefx/System.Globalization.Native/pal_collation.c index 0ae2c226a914..35cde1c8cb61 100644 --- a/src/corefx/System.Globalization.Native/pal_collation.c +++ b/src/corefx/System.Globalization.Native/pal_collation.c @@ -444,11 +444,15 @@ const UCollator* GetCollatorFromSortHandle(SortHandle* pSortHandle, int32_t opti TCollatorMap* map = (TCollatorMap*)malloc(sizeof(TCollatorMap)); map->key = options; - void* entry = tsearch(map, &pSortHandle->collatorsPerOptionRoot, TreeComparer); - if ((*(TCollatorMap**)entry) == map) + // tfind on glibc is significantly faster than tsearch and we expect + // to hit the cache here often so it's benefitial to prefer lookup time + // over addition time + void* entry = tfind(map, &pSortHandle->collatorsPerOptionRoot, TreeComparer); + if (entry == NULL) { pCollator = CloneCollatorWithOptions(pSortHandle->regular, options, pErr); map->UCollator = pCollator; + tsearch(map, &pSortHandle->collatorsPerOptionRoot, TreeComparer); } else {