@@ -65,13 +65,14 @@ const UChar g_HalfFullLowerChars[] = {
6565
6666 // fullwidth characters
6767 0x3002 , 0x300c , 0x300d , 0x3001 , 0x30fb , 0x30f2 , 0x30a1 , 0x30a3 , 0x30a5 , 0x30a7 , 0x30a9 , 0x30e3 , 0x30e5 , 0x30e7 , 0x30c3 ,
68- 0x30fc , 0x30a2 , 0x30a4 , 0x30a6 , 0x30a8 , 0x30aa , 0x30ab , 0x30ad , 0x30af , 0x30b1 , 0x30b3 , 0x30b5 , 0x30b7 , 0x30b9 , 0x30bb ,
69- 0x30bd , 0x30bf , 0x30c1 , 0x30c4 , 0x30c6 , 0x30c8 , 0x30ca , 0x30cb , 0x30cc , 0x30cd , 0x30ce , 0x30cf , 0x30d2 , 0x30d5 , 0x30d8 ,
70- 0x30db , 0x30de , 0x30df , 0x30e0 , 0x30e1 , 0x30e2 , 0x30e4 , 0x30e6 , 0x30e8 , 0x30e9 , 0x30ea , 0x30eb , 0x30ec , 0x30ed , 0x30ef ,
71- 0x30f3 , 0x3099 , 0x309a , 0x3164 , 0x3131 , 0x3132 , 0x3133 , 0x3134 , 0x3135 , 0x3136 , 0x3137 , 0x3138 , 0x3139 , 0x313a , 0x313b ,
72- 0x313c , 0x313d , 0x313e , 0x313f , 0x3140 , 0x3141 , 0x3142 , 0x3143 , 0x3144 , 0x3145 , 0x3146 , 0x3147 , 0x3148 , 0x3149 , 0x314a ,
73- 0x314b , 0x314c , 0x314d , 0x314e , 0x314f , 0x3150 , 0x3151 , 0x3152 , 0x3153 , 0x3154 , 0x3155 , 0x3156 , 0x3157 , 0x3158 , 0x3159 ,
74- 0x315a , 0x315b , 0x315c , 0x315d , 0x315e , 0x315f , 0x3160 , 0x3161 , 0x3162 , 0x3163
68+ 0x30a2 , 0x30a4 , 0x30a6 , 0x30a8 , 0x30aa , 0x30ab , 0x30ad , 0x30af , 0x30b1 , 0x30b3 , 0x30b5 , 0x30b7 , 0x30b9 , 0x30bb , 0x30bd ,
69+ 0x30bf , 0x30c1 , 0x30c4 , 0x30c6 , 0x30c8 , 0x30ca , 0x30cb , 0x30cc , 0x30cd , 0x30ce , 0x30cf , 0x30d2 , 0x30d5 , 0x30d8 , 0x30db ,
70+ 0x30de , 0x30df , 0x30e0 , 0x30e1 , 0x30e2 , 0x30e4 , 0x30e6 , 0x30e8 , 0x30e9 , 0x30ea , 0x30eb , 0x30ec , 0x30ed , 0x30ef , 0x30f3 ,
71+ 0x3164 , 0x3131 , 0x3132 , 0x3133 , 0x3134 , 0x3135 , 0x3136 , 0x3137 , 0x3138 , 0x3139 , 0x313a , 0x313b , 0x313c , 0x313d , 0x313e ,
72+ 0x313f , 0x3140 , 0x3141 , 0x3142 , 0x3143 , 0x3144 , 0x3145 , 0x3146 , 0x3147 , 0x3148 , 0x3149 , 0x314a , 0x314b , 0x314c , 0x314d ,
73+ 0x314e , 0x314f , 0x3150 , 0x3151 , 0x3152 , 0x3153 , 0x3154 , 0x3155 , 0x3156 , 0x3157 , 0x3158 , 0x3159 , 0x315a , 0x315b , 0x315c ,
74+ 0x315d , 0x315e , 0x315f , 0x3160 , 0x3161 , 0x3162 , 0x3163
75+
7576};
7677const UChar g_HalfFullHigherChars[] = {
7778 // fullwidth characters
@@ -85,13 +86,13 @@ const UChar g_HalfFullHigherChars[] = {
8586
8687 // halfwidth characters
8788 0xff61 , 0xff62 , 0xff63 , 0xff64 , 0xff65 , 0xff66 , 0xff67 , 0xff68 , 0xff69 , 0xff6a , 0xff6b , 0xff6c , 0xff6d , 0xff6e , 0xff6f ,
88- 0xff70 , 0xff71 , 0xff72 , 0xff73 , 0xff74 , 0xff75 , 0xff76 , 0xff77 , 0xff78 , 0xff79 , 0xff7a , 0xff7b , 0xff7c , 0xff7d , 0xff7e ,
89- 0xff7f , 0xff80 , 0xff81 , 0xff82 , 0xff83 , 0xff84 , 0xff85 , 0xff86 , 0xff87 , 0xff88 , 0xff89 , 0xff8a , 0xff8b , 0xff8c , 0xff8d ,
90- 0xff8e , 0xff8f , 0xff90 , 0xff91 , 0xff92 , 0xff93 , 0xff94 , 0xff95 , 0xff96 , 0xff97 , 0xff98 , 0xff99 , 0xff9a , 0xff9b , 0xff9c ,
91- 0xff9d , 0xff9e , 0xff9f , 0xffa0 , 0xffa1 , 0xffa2 , 0xffa3 , 0xffa4 , 0xffa5 , 0xffa6 , 0xffa7 , 0xffa8 , 0xffa9 , 0xffaa , 0xffab ,
92- 0xffac , 0xffad , 0xffae , 0xffaf , 0xffb0 , 0xffb1 , 0xffb2 , 0xffb3 , 0xffb4 , 0xffb5 , 0xffb6 , 0xffb7 , 0xffb8 , 0xffb9 , 0xffba ,
93- 0xffbb , 0xffbc , 0xffbd , 0xffbe , 0xffc2 , 0xffc3 , 0xffc4 , 0xffc5 , 0xffc6 , 0xffc7 , 0xffca , 0xffcb , 0xffcc , 0xffcd , 0xffce ,
94- 0xffcf , 0xffd2 , 0xffd3 , 0xffd4 , 0xffd5 , 0xffd6 , 0xffd7 , 0xffda , 0xffdb , 0xffdc
89+ 0xff71 , 0xff72 , 0xff73 , 0xff74 , 0xff75 , 0xff76 , 0xff77 , 0xff78 , 0xff79 , 0xff7a , 0xff7b , 0xff7c , 0xff7d , 0xff7e , 0xff7f ,
90+ 0xff80 , 0xff81 , 0xff82 , 0xff83 , 0xff84 , 0xff85 , 0xff86 , 0xff87 , 0xff88 , 0xff89 , 0xff8a , 0xff8b , 0xff8c , 0xff8d , 0xff8e ,
91+ 0xff8f , 0xff90 , 0xff91 , 0xff92 , 0xff93 , 0xff94 , 0xff95 , 0xff96 , 0xff97 , 0xff98 , 0xff99 , 0xff9a , 0xff9b , 0xff9c , 0xff9d ,
92+ 0xffa0 , 0xffa1 , 0xffa2 , 0xffa3 , 0xffa4 , 0xffa5 , 0xffa6 , 0xffa7 , 0xffa8 , 0xffa9 , 0xffaa , 0xffab , 0xffac , 0xffad , 0xffae ,
93+ 0xffaf , 0xffb0 , 0xffb1 , 0xffb2 , 0xffb3 , 0xffb4 , 0xffb5 , 0xffb6 , 0xffb7 , 0xffb8 , 0xffb9 , 0xffba , 0xffbb , 0xffbc , 0xffbd ,
94+ 0xffbe , 0xffc2 , 0xffc3 , 0xffc4 , 0xffc5 , 0xffc6 , 0xffc7 , 0xffca , 0xffcb , 0xffcc , 0xffcd , 0xffce , 0xffcf , 0xffd2 , 0xffd3 ,
95+ 0xffd4 , 0xffd5 , 0xffd6 , 0xffd7 , 0xffda , 0xffdb , 0xffdc
9596};
9697const int32_t g_HalfFullCharsLength = (sizeof (g_HalfFullHigherChars) / sizeof (UChar));
9798
@@ -109,20 +110,36 @@ bool NeedsEscape(UChar character)
109110 || (0x7b <= character && character <= 0x7e ));
110111}
111112
113+ /*
114+ Gets a value indicating whether the HalfFullHigher character is considered a symbol character.
115+
116+ The ranges specified here are only checking for characters in the g_HalfFullHigherChars list and needs
117+ to be combined with NeedsEscape above with the g_HalfFullLowerChars for all the IgnoreSymbols characters.
118+ This is done so we can use range checks instead of comparing individual characters.
119+
120+ These ranges were obtained by running the above characters through .NET CompareInfo.Compare
121+ with CompareOptions.IgnoreSymbols on Windows.
122+ */
123+ bool IsHalfFullHigherSymbol (UChar character)
124+ {
125+ return (0xffe0 <= character && character <= 0xffe6 )
126+ || (0xff61 <= character && character <= 0xff65 );
127+ }
128+
112129/*
113130Gets a string of custom collation rules, if necessary.
114131
115132Since the CompareOptions flags don't map 1:1 with ICU default functionality, we need to fall back to using
116133custom rules in order to support IgnoreKanaType and IgnoreWidth CompareOptions correctly.
117134*/
118- std::vector<UChar> GetCustomRules (int32_t options, UColAttributeValue strength)
135+ std::vector<UChar> GetCustomRules (int32_t options, UColAttributeValue strength, bool isIgnoreSymbols )
119136{
120137 bool isIgnoreKanaType = (options & CompareOptionsIgnoreKanaType) == CompareOptionsIgnoreKanaType;
121138 bool isIgnoreWidth = (options & CompareOptionsIgnoreWidth) == CompareOptionsIgnoreWidth;
122139
123- // kana differs at the quaternary level
124- bool needsIgnoreKanaTypeCustomRule = isIgnoreKanaType && strength >= UCOL_QUATERNARY ;
125- bool needsNotIgnoreKanaTypeCustomRule = !isIgnoreKanaType && strength < UCOL_QUATERNARY ;
140+ // kana differs at the tertiary level
141+ bool needsIgnoreKanaTypeCustomRule = isIgnoreKanaType && strength >= UCOL_TERTIARY ;
142+ bool needsNotIgnoreKanaTypeCustomRule = !isIgnoreKanaType && strength < UCOL_TERTIARY ;
126143
127144 // character width differs at the tertiary level
128145 bool needsIgnoreWidthCustomRule = isIgnoreWidth && strength >= UCOL_TERTIARY;
@@ -157,22 +174,32 @@ std::vector<UChar> GetCustomRules(int32_t options, UColAttributeValue strength)
157174 if (needsIgnoreWidthCustomRule || needsNotIgnoreWidthCustomRule)
158175 {
159176 UChar compareChar = needsIgnoreWidthCustomRule ? ' =' : ' <' ;
160-
177+
161178 UChar lowerChar;
179+ UChar higherChar;
180+ bool needsEscape;
162181 for (int i = 0 ; i < g_HalfFullCharsLength; i++)
163182 {
164- customRules.push_back (' &' );
165-
166- // the lower chars need to be checked for escaping since they contain ASCII punctuation
167183 lowerChar = g_HalfFullLowerChars[i];
168- if (NeedsEscape (lowerChar))
184+ higherChar = g_HalfFullHigherChars[i];
185+ // the lower chars need to be checked for escaping since they contain ASCII punctuation
186+ needsEscape = NeedsEscape (lowerChar);
187+
188+ // when isIgnoreSymbols is true and we are not ignoring width, check to see if
189+ // this character is a symbol, and if so skip it
190+ if (!(isIgnoreSymbols && needsNotIgnoreWidthCustomRule && (needsEscape || IsHalfFullHigherSymbol (higherChar))))
169191 {
170- customRules.push_back (' \\ ' );
171- }
172- customRules.push_back (lowerChar);
192+ customRules.push_back (' &' );
173193
174- customRules.push_back (compareChar);
175- customRules.push_back (g_HalfFullHigherChars[i]);
194+ if (needsEscape)
195+ {
196+ customRules.push_back (' \\ ' );
197+ }
198+ customRules.push_back (lowerChar);
199+
200+ customRules.push_back (compareChar);
201+ customRules.push_back (higherChar);
202+ }
176203 }
177204 }
178205 }
@@ -205,7 +232,7 @@ UCollator* CloneCollatorWithOptions(const UCollator* pCollator, int32_t options,
205232 }
206233
207234 UCollator* pClonedCollator;
208- std::vector<UChar> customRules = GetCustomRules (options, strength);
235+ std::vector<UChar> customRules = GetCustomRules (options, strength, isIgnoreSymbols );
209236 if (customRules.empty ())
210237 {
211238 pClonedCollator = ucol_safeClone (pCollator, nullptr , nullptr , pErr);
0 commit comments