Skip to content

Commit 6a500cb

Browse files
committed
SSE2 str_tolower
1 parent f578d57 commit 6a500cb

File tree

1 file changed

+37
-23
lines changed

1 file changed

+37
-23
lines changed

Zend/zend_operators.c

Lines changed: 37 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,10 @@
3030
#include "zend_exceptions.h"
3131
#include "zend_closures.h"
3232

33+
#ifdef __SSE2__
34+
#include <emmintrin.h>
35+
#endif
36+
3337
#if ZEND_USE_TOLOWER_L
3438
#include <locale.h>
3539
static _locale_t current_locale = NULL;
@@ -2456,17 +2460,38 @@ ZEND_API void zend_update_current_locale(void) /* {{{ */
24562460
/* }}} */
24572461
#endif
24582462

2459-
ZEND_API char* ZEND_FASTCALL zend_str_tolower_copy(char *dest, const char *source, size_t length) /* {{{ */
2460-
{
2461-
register unsigned char *str = (unsigned char*)source;
2462-
register unsigned char *result = (unsigned char*)dest;
2463-
register unsigned char *end = str + length;
2464-
2465-
while (str < end) {
2466-
*result++ = zend_tolower_ascii(*str++);
2463+
static zend_always_inline void zend_str_tolower_impl(char *dest, const char *str, size_t length) /* {{{ */ {
2464+
register unsigned char *p = (unsigned char*)str;
2465+
register unsigned char *q = (unsigned char*)dest;
2466+
register unsigned char *end = p + length;
2467+
#ifdef __SSE2__
2468+
if (length >= 16) {
2469+
const __m128i _A = _mm_set1_epi8('A' - 1);
2470+
const __m128i Z_ = _mm_set1_epi8('Z' + 1);
2471+
const __m128i delta = _mm_set1_epi8('a' - 'A');
2472+
do {
2473+
__m128i op = _mm_loadu_si128((__m128i*)p);
2474+
__m128i gt = _mm_cmpgt_epi8(op, _A);
2475+
__m128i lt = _mm_cmplt_epi8(op, Z_);
2476+
__m128i mingle = _mm_and_si128(gt, lt);
2477+
__m128i add = _mm_and_si128(mingle, delta);
2478+
__m128i lower = _mm_add_epi8(op, add);
2479+
_mm_storeu_si128((__m128i *)q, lower);
2480+
p += 16;
2481+
q += 16;
2482+
} while (p + 16 <= end);
24672483
}
2468-
*result = '\0';
2484+
#endif
2485+
while (p < end) {
2486+
*q++ = zend_tolower_ascii(*p++);
2487+
}
2488+
}
2489+
/* }}} */
24692490

2491+
ZEND_API char* ZEND_FASTCALL zend_str_tolower_copy(char *dest, const char *source, size_t length) /* {{{ */
2492+
{
2493+
zend_str_tolower_impl(dest, source, length);
2494+
dest[length] = '\0';
24702495
return dest;
24712496
}
24722497
/* }}} */
@@ -2479,13 +2504,7 @@ ZEND_API char* ZEND_FASTCALL zend_str_tolower_dup(const char *source, size_t len
24792504

24802505
ZEND_API void ZEND_FASTCALL zend_str_tolower(char *str, size_t length) /* {{{ */
24812506
{
2482-
register unsigned char *p = (unsigned char*)str;
2483-
register unsigned char *end = p + length;
2484-
2485-
while (p < end) {
2486-
*p = zend_tolower_ascii(*p);
2487-
p++;
2488-
}
2507+
zend_str_tolower_impl(str, (const char*)str, length);
24892508
}
24902509
/* }}} */
24912510

@@ -2521,7 +2540,6 @@ ZEND_API zend_string* ZEND_FASTCALL zend_string_tolower_ex(zend_string *str, int
25212540
{
25222541
register unsigned char *p = (unsigned char*)ZSTR_VAL(str);
25232542
register unsigned char *end = p + ZSTR_LEN(str);
2524-
25252543
while (p < end) {
25262544
if (*p != zend_tolower_ascii(*p)) {
25272545
zend_string *res = zend_string_alloc(ZSTR_LEN(str), persistent);
@@ -2531,12 +2549,8 @@ ZEND_API zend_string* ZEND_FASTCALL zend_string_tolower_ex(zend_string *str, int
25312549
memcpy(ZSTR_VAL(res), ZSTR_VAL(str), p - (unsigned char*)ZSTR_VAL(str));
25322550
}
25332551
r = p + (ZSTR_VAL(res) - ZSTR_VAL(str));
2534-
while (p < end) {
2535-
*r = zend_tolower_ascii(*p);
2536-
p++;
2537-
r++;
2538-
}
2539-
*r = '\0';
2552+
zend_str_tolower_impl((char*)r, (const char*)p, end - p);
2553+
ZSTR_VAL(res)[ZSTR_LEN(res)] = '\0';
25402554
return res;
25412555
}
25422556
p++;

0 commit comments

Comments
 (0)