Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/libraries/Common/src/Interop/Interop.Collation.cs
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,11 @@ internal static partial class Globalization

[DllImport(Libraries.GlobalizationNative, CharSet = CharSet.Unicode, EntryPoint = "GlobalizationNative_StartsWith")]
[return: MarshalAs(UnmanagedType.Bool)]
internal static extern unsafe bool StartsWith(IntPtr sortHandle, char* target, int cwTargetLength, char* source, int cwSourceLength, CompareOptions options);
internal static extern unsafe bool StartsWith(IntPtr sortHandle, char* target, int cwTargetLength, char* source, int cwSourceLength, CompareOptions options, int* matchedLength);

[DllImport(Libraries.GlobalizationNative, CharSet = CharSet.Unicode, EntryPoint = "GlobalizationNative_EndsWith")]
[return: MarshalAs(UnmanagedType.Bool)]
internal static extern unsafe bool EndsWith(IntPtr sortHandle, char* target, int cwTargetLength, char* source, int cwSourceLength, CompareOptions options);
internal static extern unsafe bool EndsWith(IntPtr sortHandle, char* target, int cwTargetLength, char* source, int cwSourceLength, CompareOptions options, int* matchedLength);

[DllImport(Libraries.GlobalizationNative, CharSet = CharSet.Unicode, EntryPoint = "GlobalizationNative_StartsWith")]
[return: MarshalAs(UnmanagedType.Bool)]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -492,7 +492,7 @@ int32_t GlobalizationNative_IndexOf(
result = GlobalizationNative_CompareString(pSortHandle, lpTarget, cwTargetLength, lpSource, cwSourceLength, options);
if (result == UCOL_EQUAL && pMatchedLength != NULL)
{
*pMatchedLength = cwTargetLength;
*pMatchedLength = cwSourceLength;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was incorrectly returning the target length instead of the source length and was causing some newly introduced unit tests to fail.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

great catch @GrabYourPitchforks ! it's not a common case code path:

// It's possible somebody passed us (source = <empty>, target = <non-empty>).

but should we consider backporting this fix to older 3.1/2.1?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't know if it requires backporting, to be honest. Here's a simplified repro:

var newString = "".Replace("\u200d", "x", StringComparison.InvariantCulture);

Before this fix, this throws an ArgumentOutOfRangeException (due to attempting to slice a span in an invalid fashion). After the fix, this correctly returns "".

To trigger the bug, the "this" string must be empty, and the "old" string must be non-empty and consist only of zero-weight chars, and the caller must be passing a linguistic comparison such as InvariantCulture. I think the likelihood of all three happening at the same time are low enough where we couldn't justify the backport.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree we don't have to service this till we get a case need it.

}

return (result == UCOL_EQUAL) ? 0 : -1;
Expand Down Expand Up @@ -551,7 +551,7 @@ int32_t GlobalizationNative_LastIndexOf(
result = GlobalizationNative_CompareString(pSortHandle, lpTarget, cwTargetLength, lpSource, cwSourceLength, options);
if (result == UCOL_EQUAL && pMatchedLength != NULL)
{
*pMatchedLength = cwTargetLength;
*pMatchedLength = cwSourceLength;
}

return (result == UCOL_EQUAL) ? 0 : -1;
Expand Down Expand Up @@ -689,13 +689,14 @@ static int32_t GetCollationElementMask(UColAttributeValue strength)
}
}

static int32_t inline SimpleAffix_Iterators(UCollationElements* pPatternIterator, UCollationElements* pSourceIterator, UColAttributeValue strength, int32_t forwardSearch)
static int32_t inline SimpleAffix_Iterators(UCollationElements* pPatternIterator, UCollationElements* pSourceIterator, UColAttributeValue strength, int32_t forwardSearch, int32_t* pCapturedOffset)
{
assert(strength >= UCOL_SECONDARY);

UErrorCode errorCode = U_ZERO_ERROR;
int32_t movePattern = TRUE, moveSource = TRUE;
int32_t patternElement = UCOL_IGNORABLE, sourceElement = UCOL_IGNORABLE;
int32_t capturedOffset = 0;

int32_t collationElementMask = GetCollationElementMask(strength);

Expand All @@ -707,6 +708,10 @@ static int32_t inline SimpleAffix_Iterators(UCollationElements* pPatternIterator
}
if (moveSource)
{
if (pCapturedOffset != NULL)
{
capturedOffset = ucol_getOffset(pSourceIterator); // need to capture offset before advancing iterator
}
sourceElement = forwardSearch ? ucol_next(pSourceIterator, &errorCode) : ucol_previous(pSourceIterator, &errorCode);
}
movePattern = TRUE; moveSource = TRUE;
Expand All @@ -715,19 +720,19 @@ static int32_t inline SimpleAffix_Iterators(UCollationElements* pPatternIterator
{
if (sourceElement == UCOL_NULLORDER)
{
return TRUE; // source is equal to pattern, we have reached both ends|beginnings at the same time
goto ReturnTrue; // source is equal to pattern, we have reached both ends|beginnings at the same time
}
else if (sourceElement == UCOL_IGNORABLE)
{
return TRUE; // the next|previous character in source is an ignorable character, an example: "o\u0000".StartsWith("o")
goto ReturnTrue; // the next|previous character in source is an ignorable character, an example: "o\u0000".StartsWith("o")
}
else if (forwardSearch && ((sourceElement & UCOL_PRIMARYORDERMASK) == 0) && (sourceElement & UCOL_SECONDARYORDERMASK) != 0)
{
return FALSE; // the next character in source text is a combining character, an example: "o\u0308".StartsWith("o")
}
else
{
return TRUE;
goto ReturnTrue;
}
}
else if (patternElement == UCOL_IGNORABLE)
Expand All @@ -743,9 +748,16 @@ static int32_t inline SimpleAffix_Iterators(UCollationElements* pPatternIterator
return FALSE;
}
}

ReturnTrue:
if (pCapturedOffset != NULL)
{
*pCapturedOffset = capturedOffset;
}
return TRUE;
}

static int32_t SimpleAffix(const UCollator* pCollator, UErrorCode* pErrorCode, const UChar* pPattern, int32_t patternLength, const UChar* pText, int32_t textLength, int32_t forwardSearch)
static int32_t SimpleAffix(const UCollator* pCollator, UErrorCode* pErrorCode, const UChar* pPattern, int32_t patternLength, const UChar* pText, int32_t textLength, int32_t forwardSearch, int32_t* pMatchedLength)
{
int32_t result = FALSE;

Expand All @@ -757,7 +769,15 @@ static int32_t SimpleAffix(const UCollator* pCollator, UErrorCode* pErrorCode, c
{
UColAttributeValue strength = ucol_getStrength(pCollator);

result = SimpleAffix_Iterators(pPatternIterator, pSourceIterator, strength, forwardSearch);
int32_t capturedOffset = 0;
result = SimpleAffix_Iterators(pPatternIterator, pSourceIterator, strength, forwardSearch, (pMatchedLength != NULL) ? &capturedOffset : NULL);

if (result && pMatchedLength != NULL)
{
// depending on whether we're searching forward or backward, the matching substring
// is [start of source string .. curIdx] or [curIdx .. end of source string]
*pMatchedLength = (forwardSearch) ? capturedOffset : (textLength - capturedOffset);
}

ucol_closeElements(pSourceIterator);
}
Expand All @@ -768,7 +788,7 @@ static int32_t SimpleAffix(const UCollator* pCollator, UErrorCode* pErrorCode, c
return result;
}

static int32_t ComplexStartsWith(const UCollator* pCollator, UErrorCode* pErrorCode, const UChar* pPattern, int32_t patternLength, const UChar* pText, int32_t textLength)
static int32_t ComplexStartsWith(const UCollator* pCollator, UErrorCode* pErrorCode, const UChar* pPattern, int32_t patternLength, const UChar* pText, int32_t textLength, int32_t* pMatchedLength)
{
int32_t result = FALSE;

Expand All @@ -786,6 +806,12 @@ static int32_t ComplexStartsWith(const UCollator* pCollator, UErrorCode* pErrorC
{
result = CanIgnoreAllCollationElements(pCollator, pText, idx);
}

if (result && pMatchedLength != NULL)
{
// adjust matched length to account for all the elements we implicitly consumed at beginning of string
*pMatchedLength = idx + usearch_getMatchedLength(pSearch);
}
}

usearch_close(pSearch);
Expand All @@ -803,9 +829,9 @@ int32_t GlobalizationNative_StartsWith(
int32_t cwTargetLength,
const UChar* lpSource,
int32_t cwSourceLength,
int32_t options)
int32_t options,
int32_t* pMatchedLength)
{

UErrorCode err = U_ZERO_ERROR;
const UCollator* pCollator = GetCollatorFromSortHandle(pSortHandle, options, &err);

Expand All @@ -815,15 +841,15 @@ int32_t GlobalizationNative_StartsWith(
}
else if (options > CompareOptionsIgnoreCase)
{
return ComplexStartsWith(pCollator, &err, lpTarget, cwTargetLength, lpSource, cwSourceLength);
return ComplexStartsWith(pCollator, &err, lpTarget, cwTargetLength, lpSource, cwSourceLength, pMatchedLength);
}
else
{
return SimpleAffix(pCollator, &err, lpTarget, cwTargetLength, lpSource, cwSourceLength, TRUE);
return SimpleAffix(pCollator, &err, lpTarget, cwTargetLength, lpSource, cwSourceLength, TRUE, pMatchedLength);
}
}

static int32_t ComplexEndsWith(const UCollator* pCollator, UErrorCode* pErrorCode, const UChar* pPattern, int32_t patternLength, const UChar* pText, int32_t textLength)
static int32_t ComplexEndsWith(const UCollator* pCollator, UErrorCode* pErrorCode, const UChar* pPattern, int32_t patternLength, const UChar* pText, int32_t textLength, int32_t* pMatchedLength)
{
int32_t result = FALSE;

Expand All @@ -846,6 +872,12 @@ static int32_t ComplexEndsWith(const UCollator* pCollator, UErrorCode* pErrorCod

result = CanIgnoreAllCollationElements(pCollator, pText + matchEnd, remainingStringLength);
}

if (result && pMatchedLength != NULL)
{
// adjust matched length to account for all the elements we implicitly consumed at end of string
*pMatchedLength = textLength - idx;
}
}

usearch_close(pSearch);
Expand All @@ -863,7 +895,8 @@ int32_t GlobalizationNative_EndsWith(
int32_t cwTargetLength,
const UChar* lpSource,
int32_t cwSourceLength,
int32_t options)
int32_t options,
int32_t* pMatchedLength)
{
UErrorCode err = U_ZERO_ERROR;
const UCollator* pCollator = GetCollatorFromSortHandle(pSortHandle, options, &err);
Expand All @@ -874,11 +907,11 @@ int32_t GlobalizationNative_EndsWith(
}
else if (options > CompareOptionsIgnoreCase)
{
return ComplexEndsWith(pCollator, &err, lpTarget, cwTargetLength, lpSource, cwSourceLength);
return ComplexEndsWith(pCollator, &err, lpTarget, cwTargetLength, lpSource, cwSourceLength, pMatchedLength);
}
else
{
return SimpleAffix(pCollator, &err, lpTarget, cwTargetLength, lpSource, cwSourceLength, FALSE);
return SimpleAffix(pCollator, &err, lpTarget, cwTargetLength, lpSource, cwSourceLength, FALSE, pMatchedLength);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,16 @@ PALEXPORT int32_t GlobalizationNative_StartsWith(SortHandle* pSortHandle,
int32_t cwTargetLength,
const UChar* lpSource,
int32_t cwSourceLength,
int32_t options);
int32_t options,
int32_t* pMatchedLength);

PALEXPORT int32_t GlobalizationNative_EndsWith(SortHandle* pSortHandle,
const UChar* lpTarget,
int32_t cwTargetLength,
const UChar* lpSource,
int32_t cwSourceLength,
int32_t options);
int32_t options,
int32_t* pMatchedLength);

PALEXPORT int32_t GlobalizationNative_GetSortKey(SortHandle* pSortHandle,
const UChar* lpStr,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@
PER_FUNCTION_BLOCK(ucal_set, libicui18n) \
PER_FUNCTION_BLOCK(ucol_close, libicui18n) \
PER_FUNCTION_BLOCK(ucol_closeElements, libicui18n) \
PER_FUNCTION_BLOCK(ucol_getOffset, libicui18n) \
PER_FUNCTION_BLOCK(ucol_getRules, libicui18n) \
PER_FUNCTION_BLOCK(ucol_getSortKey, libicui18n) \
PER_FUNCTION_BLOCK(ucol_getStrength, libicui18n) \
Expand Down Expand Up @@ -199,6 +200,7 @@ FOR_ALL_ICU_FUNCTIONS
#define ucal_set(...) ucal_set_ptr(__VA_ARGS__)
#define ucol_close(...) ucol_close_ptr(__VA_ARGS__)
#define ucol_closeElements(...) ucol_closeElements_ptr(__VA_ARGS__)
#define ucol_getOffset(...) ucol_getOffset_ptr(__VA_ARGS__)
#define ucol_getRules(...) ucol_getRules_ptr(__VA_ARGS__)
#define ucol_getSortKey(...) ucol_getSortKey_ptr(__VA_ARGS__)
#define ucol_getStrength(...) ucol_getStrength_ptr(__VA_ARGS__)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,7 @@ UCalendar * ucal_open(const UChar * zoneID, int32_t len, const char * locale, UC
void ucal_set(UCalendar * cal, UCalendarDateFields field, int32_t value);
void ucol_close(UCollator * coll);
void ucol_closeElements(UCollationElements * elems);
int32_t ucol_getOffset(const UCollationElements *elems);
const UChar * ucol_getRules(const UCollator * coll, int32_t * length);
int32_t ucol_getSortKey(const UCollator * coll, const UChar * source, int32_t sourceLength, uint8_t * result, int32_t resultLength);
UCollationStrength ucol_getStrength(const UCollator * coll);
Expand Down
Loading