@@ -101,11 +101,8 @@ PW32CP wchar_t *php_win32_cp_conv_to_w(DWORD cp, DWORD flags, const char* in, si
101
101
return NULL;
102
102
PW32CP wchar_t * php_win32_cp_conv_ascii_to_w (const char * in , size_t in_len , size_t * out_len )
103
103
{/*{{{*/
104
- wchar_t * ret = NULL ;
105
- const char * idx = in , * end ;
106
- size_t i = 0 ;
107
- wchar_t * ret_idx ;
108
-
104
+ wchar_t * ret , * ret_idx ;
105
+ const char * idx = in , * end , * aidx ;
109
106
110
107
assert (in && in_len ? in [in_len ] == '\0' : 1 );
111
108
@@ -118,16 +115,29 @@ PW32CP wchar_t *php_win32_cp_conv_ascii_to_w(const char* in, size_t in_len, size
118
115
}
119
116
120
117
end = in + in_len ;
118
+ aidx = (const char * )ZEND_SLIDE_TO_ALIGNED16 (in );
121
119
122
- while (end - idx > 16 ) {
123
- const __m128i block = _mm_loadu_si128 ((__m128i * )idx );
124
- if (_mm_movemask_epi8 (block )) {
125
- ASCII_FAIL_RETURN ()
120
+ if (in_len > 15 ) {
121
+ /* Process unaligned chunk. */
122
+ while (idx < aidx ) {
123
+ if (!__isascii (* idx ) && '\0' != * idx ) {
124
+ ASCII_FAIL_RETURN ()
125
+ }
126
+ idx ++ ;
127
+ }
128
+
129
+ /* Process aligned chunk. */
130
+ while (end - idx > 15 ) {
131
+ const __m128i block = _mm_load_si128 ((__m128i * )idx );
132
+ if (_mm_movemask_epi8 (block )) {
133
+ ASCII_FAIL_RETURN ()
134
+ }
135
+ idx += 16 ;
126
136
}
127
- idx += 16 ;
128
137
}
129
- /* Finish the job on remaining chars. */
130
- while (idx != end ) {
138
+
139
+ /* Process the trailing part, or otherwise process string < 16 bytes. */
140
+ while (idx < end ) {
131
141
if (!__isascii (* idx ) && '\0' != * idx ) {
132
142
ASCII_FAIL_RETURN ()
133
143
}
@@ -140,29 +150,46 @@ PW32CP wchar_t *php_win32_cp_conv_ascii_to_w(const char* in, size_t in_len, size
140
150
return NULL ;
141
151
}
142
152
143
-
144
153
ret_idx = ret ;
145
154
idx = in ;
146
- const __m128i mask = _mm_set1_epi32 (0 );
147
- while (i < in_len && in_len - i > 16 ) {
148
- __m128i hl ;
149
- const __m128i block = _mm_loadu_si128 ((__m128i * )idx );
150
155
151
- hl = _mm_unpacklo_epi8 (block , mask );
152
- _mm_storeu_si128 ((__m128i * )ret_idx , hl );
156
+ /* Check and conversion could be merged. This however would
157
+ be more expencive, if a non ASCII string was passed.
158
+ TODO check wether the impact is acceptable. */
159
+ if (in_len > 15 ) {
160
+ /* Process unaligned chunk. */
161
+ while (idx < aidx ) {
162
+ * ret_idx ++ = (wchar_t )* idx ++ ;
163
+ }
153
164
154
- ret_idx += 8 ;
155
- hl = _mm_unpackhi_epi8 (block , mask );
156
- _mm_storeu_si128 ((__m128i * )ret_idx , hl );
165
+ /* Process aligned chunk. */
166
+ if (end - idx > 15 ) {
167
+ const __m128i mask = _mm_set1_epi32 (0 );
168
+ while (end - idx > 15 ) {
169
+ const __m128i block = _mm_load_si128 ((__m128i * )idx );
157
170
158
- i += 16 ;
159
- idx += 16 ;
160
- ret_idx += 8 ;
171
+ {
172
+ const __m128i lo = _mm_unpacklo_epi8 (block , mask );
173
+ _mm_storeu_si128 ((__m128i * )ret_idx , lo );
174
+ }
175
+
176
+ ret_idx += 8 ;
177
+ {
178
+ const __m128i hi = _mm_unpackhi_epi8 (block , mask );
179
+ _mm_storeu_si128 ((__m128i * )ret_idx , hi );
180
+ }
181
+
182
+ idx += 16 ;
183
+ ret_idx += 8 ;
184
+ }
185
+ }
161
186
}
162
- /* Finish the job on remaining chars. */
163
- while (in_len > i ++ ) {
187
+
188
+ /* Process the trailing part, or otherwise process string < 16 bytes. */
189
+ while (idx < end ) {
164
190
* ret_idx ++ = (wchar_t )* idx ++ ;
165
191
}
192
+
166
193
ret [in_len ] = L'\0' ;
167
194
168
195
assert (ret ? wcslen (ret ) == in_len : 1 );
0 commit comments