Skip to content

Commit d5d99ce

Browse files
committed
Merge branch 'sse2_urlencode' of https://github.com/laruence/php-src
* 'sse2_urlencode' of https://github.com/laruence/php-src: drop use of extract_epi16 Use SSE2 instructions do url_encode
2 parents 74f3bfc + e97a679 commit d5d99ce

File tree

1 file changed

+79
-31
lines changed

1 file changed

+79
-31
lines changed

ext/standard/url.c

Lines changed: 79 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@
1919
#include <ctype.h>
2020
#include <sys/types.h>
2121

22+
#ifdef __SSE2__
23+
#include <emmintrin.h>
24+
#endif
25+
2226
#include "php.h"
2327

2428
#include "url.h"
@@ -444,10 +448,7 @@ static int php_htoi(char *s)
444448

445449
static unsigned char hexchars[] = "0123456789ABCDEF";
446450

447-
/* {{{ php_url_encode
448-
*/
449-
PHPAPI zend_string *php_url_encode(char const *s, size_t len)
450-
{
451+
static zend_always_inline zend_string *php_url_encode_impl(const char *s, size_t len, zend_bool raw) /* {{{ */ {
451452
register unsigned char c;
452453
unsigned char *to;
453454
unsigned char const *from, *end;
@@ -458,15 +459,76 @@ PHPAPI zend_string *php_url_encode(char const *s, size_t len)
458459
start = zend_string_safe_alloc(3, len, 0, 0);
459460
to = (unsigned char*)ZSTR_VAL(start);
460461

462+
#ifdef __SSE2__
463+
while (from + 16 < end) {
464+
__m128i mask;
465+
uint32_t bits;
466+
const __m128i _A = _mm_set1_epi8('A' - 1);
467+
const __m128i Z_ = _mm_set1_epi8('Z' + 1);
468+
const __m128i _a = _mm_set1_epi8('a' - 1);
469+
const __m128i z_ = _mm_set1_epi8('z' + 1);
470+
const __m128i _zero = _mm_set1_epi8('0' - 1);
471+
const __m128i nine_ = _mm_set1_epi8('9' + 1);
472+
const __m128i dot = _mm_set1_epi8('.');
473+
const __m128i minus = _mm_set1_epi8('-');
474+
const __m128i under = _mm_set1_epi8('_');
475+
476+
__m128i in = _mm_loadu_si128((__m128i *)from);
477+
478+
__m128i gt = _mm_cmpgt_epi8(in, _A);
479+
__m128i lt = _mm_cmplt_epi8(in, Z_);
480+
mask = _mm_and_si128(lt, gt); /* upper */
481+
gt = _mm_cmpgt_epi8(in, _a);
482+
lt = _mm_cmplt_epi8(in, z_);
483+
mask = _mm_or_si128(mask, _mm_and_si128(lt, gt)); /* lower */
484+
gt = _mm_cmpgt_epi8(in, _zero);
485+
lt = _mm_cmplt_epi8(in, nine_);
486+
mask = _mm_or_si128(mask, _mm_and_si128(lt, gt)); /* number */
487+
mask = _mm_or_si128(mask, _mm_cmpeq_epi8(in, dot));
488+
mask = _mm_or_si128(mask, _mm_cmpeq_epi8(in, minus));
489+
mask = _mm_or_si128(mask, _mm_cmpeq_epi8(in, under));
490+
491+
if (!raw) {
492+
const __m128i blank = _mm_set1_epi8(' ');
493+
__m128i eq = _mm_cmpeq_epi8(in, blank);
494+
if (_mm_movemask_epi8(eq)) {
495+
in = _mm_add_epi8(in, _mm_and_si128(eq, _mm_set1_epi8('+' - ' ')));
496+
mask = _mm_or_si128(mask, eq);
497+
}
498+
}
499+
if (raw) {
500+
const __m128i wavy = _mm_set1_epi8('~');
501+
mask = _mm_or_si128(mask, _mm_cmpeq_epi8(in, wavy));
502+
}
503+
if (((bits = _mm_movemask_epi8(mask)) & 0xffff) == 0xffff) {
504+
_mm_storeu_si128((__m128i*)to, in);
505+
to += 16;
506+
} else {
507+
int i;
508+
unsigned char xmm[16];
509+
_mm_storeu_si128((__m128i*)xmm, in);
510+
for (i = 0; i < sizeof(xmm); i++) {
511+
if ((bits & (0x1 << i))) {
512+
*to++ = xmm[i];
513+
} else {
514+
*to++ = '%';
515+
*to++ = hexchars[xmm[i] >> 4];
516+
*to++ = hexchars[xmm[i] & 0xf];
517+
}
518+
}
519+
}
520+
from += 16;
521+
}
522+
#endif
461523
while (from < end) {
462524
c = *from++;
463525

464-
if (c == ' ') {
526+
if (!raw && c == ' ') {
465527
*to++ = '+';
466528
} else if ((c < '0' && c != '-' && c != '.') ||
467-
(c < 'A' && c > '9') ||
468-
(c > 'Z' && c < 'a' && c != '_') ||
469-
(c > 'z')) {
529+
(c < 'A' && c > '9') ||
530+
(c > 'Z' && c < 'a' && c != '_') ||
531+
(c > 'z' && (!raw || c != '~'))) {
470532
to[0] = '%';
471533
to[1] = hexchars[c >> 4];
472534
to[2] = hexchars[c & 15];
@@ -483,6 +545,14 @@ PHPAPI zend_string *php_url_encode(char const *s, size_t len)
483545
}
484546
/* }}} */
485547

548+
/* {{{ php_url_encode
549+
*/
550+
PHPAPI zend_string *php_url_encode(char const *s, size_t len)
551+
{
552+
return php_url_encode_impl(s, len, 0);
553+
}
554+
/* }}} */
555+
486556
/* {{{ proto string urlencode(string str)
487557
URL-encodes string */
488558
PHP_FUNCTION(urlencode)
@@ -545,29 +615,7 @@ PHPAPI size_t php_url_decode(char *str, size_t len)
545615
*/
546616
PHPAPI zend_string *php_raw_url_encode(char const *s, size_t len)
547617
{
548-
register size_t x, y;
549-
zend_string *str;
550-
char *ret;
551-
552-
str = zend_string_safe_alloc(3, len, 0, 0);
553-
ret = ZSTR_VAL(str);
554-
for (x = 0, y = 0; len--; x++, y++) {
555-
char c = s[x];
556-
557-
ret[y] = c;
558-
if ((c < '0' && c != '-' && c != '.') ||
559-
(c < 'A' && c > '9') ||
560-
(c > 'Z' && c < 'a' && c != '_') ||
561-
(c > 'z' && c != '~')) {
562-
ret[y++] = '%';
563-
ret[y++] = hexchars[(unsigned char) c >> 4];
564-
ret[y] = hexchars[(unsigned char) c & 15];
565-
}
566-
}
567-
ret[y] = '\0';
568-
str = zend_string_truncate(str, y, 0);
569-
570-
return str;
618+
return php_url_encode_impl(s, len, 1);
571619
}
572620
/* }}} */
573621

0 commit comments

Comments
 (0)