Skip to content

Commit b6f66f1

Browse files
committed
Reimplement ASCII conversion using aligned routine
1 parent 92f9fde commit b6f66f1

File tree

1 file changed

+54
-27
lines changed

1 file changed

+54
-27
lines changed

win32/codepage.c

Lines changed: 54 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -101,11 +101,8 @@ PW32CP wchar_t *php_win32_cp_conv_to_w(DWORD cp, DWORD flags, const char* in, si
101101
return NULL;
102102
PW32CP wchar_t *php_win32_cp_conv_ascii_to_w(const char* in, size_t in_len, size_t *out_len)
103103
{/*{{{*/
104-
wchar_t *ret = NULL;
105-
const char *idx = in, *end;
106-
size_t i = 0;
107-
wchar_t *ret_idx;
108-
104+
wchar_t *ret, *ret_idx;
105+
const char *idx = in, *end, *aidx;
109106

110107
assert(in && in_len ? in[in_len] == '\0' : 1);
111108

@@ -118,16 +115,29 @@ PW32CP wchar_t *php_win32_cp_conv_ascii_to_w(const char* in, size_t in_len, size
118115
}
119116

120117
end = in + in_len;
118+
aidx = (const char *)ZEND_SLIDE_TO_ALIGNED16(in);
121119

122-
while (end - idx > 16) {
123-
const __m128i block = _mm_loadu_si128((__m128i *)idx);
124-
if (_mm_movemask_epi8(block)) {
125-
ASCII_FAIL_RETURN()
120+
if (in_len > 15) {
121+
/* Process unaligned chunk. */
122+
while (idx < aidx) {
123+
if (!__isascii(*idx) && '\0' != *idx) {
124+
ASCII_FAIL_RETURN()
125+
}
126+
idx++;
127+
}
128+
129+
/* Process aligned chunk. */
130+
while (end - idx > 15) {
131+
const __m128i block = _mm_load_si128((__m128i *)idx);
132+
if (_mm_movemask_epi8(block)) {
133+
ASCII_FAIL_RETURN()
134+
}
135+
idx += 16;
126136
}
127-
idx += 16;
128137
}
129-
/* Finish the job on remaining chars. */
130-
while (idx != end) {
138+
139+
/* Process the trailing part, or otherwise process string < 16 bytes. */
140+
while (idx < end) {
131141
if (!__isascii(*idx) && '\0' != *idx) {
132142
ASCII_FAIL_RETURN()
133143
}
@@ -140,29 +150,46 @@ PW32CP wchar_t *php_win32_cp_conv_ascii_to_w(const char* in, size_t in_len, size
140150
return NULL;
141151
}
142152

143-
144153
ret_idx = ret;
145154
idx = in;
146-
const __m128i mask = _mm_set1_epi32(0);
147-
while (i < in_len && in_len - i > 16) {
148-
__m128i hl;
149-
const __m128i block = _mm_loadu_si128((__m128i *)idx);
150155

151-
hl = _mm_unpacklo_epi8(block, mask);
152-
_mm_storeu_si128((__m128i *)ret_idx, hl);
156+
/* Check and conversion could be merged. This however would
157+
be more expencive, if a non ASCII string was passed.
158+
TODO check wether the impact is acceptable. */
159+
if (in_len > 15) {
160+
/* Process unaligned chunk. */
161+
while (idx < aidx) {
162+
*ret_idx++ = (wchar_t)*idx++;
163+
}
153164

154-
ret_idx += 8;
155-
hl = _mm_unpackhi_epi8(block, mask);
156-
_mm_storeu_si128((__m128i *)ret_idx, hl);
165+
/* Process aligned chunk. */
166+
if (end - idx > 15) {
167+
const __m128i mask = _mm_set1_epi32(0);
168+
while (end - idx > 15) {
169+
const __m128i block = _mm_load_si128((__m128i *)idx);
157170

158-
i += 16;
159-
idx += 16;
160-
ret_idx += 8;
171+
{
172+
const __m128i lo = _mm_unpacklo_epi8(block, mask);
173+
_mm_storeu_si128((__m128i *)ret_idx, lo);
174+
}
175+
176+
ret_idx += 8;
177+
{
178+
const __m128i hi = _mm_unpackhi_epi8(block, mask);
179+
_mm_storeu_si128((__m128i *)ret_idx, hi);
180+
}
181+
182+
idx += 16;
183+
ret_idx += 8;
184+
}
185+
}
161186
}
162-
/* Finish the job on remaining chars. */
163-
while (in_len > i++) {
187+
188+
/* Process the trailing part, or otherwise process string < 16 bytes. */
189+
while (idx < end) {
164190
*ret_idx++ = (wchar_t)*idx++;
165191
}
192+
166193
ret[in_len] = L'\0';
167194

168195
assert(ret ? wcslen(ret) == in_len : 1);

0 commit comments

Comments
 (0)