19
19
#include <ctype.h>
20
20
#include <sys/types.h>
21
21
22
+ #ifdef __SSE2__
23
+ #include <emmintrin.h>
24
+ #endif
25
+
22
26
#include "php.h"
23
27
24
28
#include "url.h"
@@ -444,10 +448,7 @@ static int php_htoi(char *s)
444
448
445
449
static unsigned char hexchars [] = "0123456789ABCDEF" ;
446
450
447
- /* {{{ php_url_encode
448
- */
449
- PHPAPI zend_string * php_url_encode (char const * s , size_t len )
450
- {
451
+ static zend_always_inline zend_string * php_url_encode_impl (const char * s , size_t len , zend_bool raw ) /* {{{ */ {
451
452
register unsigned char c ;
452
453
unsigned char * to ;
453
454
unsigned char const * from , * end ;
@@ -458,15 +459,76 @@ PHPAPI zend_string *php_url_encode(char const *s, size_t len)
458
459
start = zend_string_safe_alloc (3 , len , 0 , 0 );
459
460
to = (unsigned char * )ZSTR_VAL (start );
460
461
462
+ #ifdef __SSE2__
463
+ while (from + 16 < end ) {
464
+ __m128i mask ;
465
+ uint32_t bits ;
466
+ const __m128i _A = _mm_set1_epi8 ('A' - 1 );
467
+ const __m128i Z_ = _mm_set1_epi8 ('Z' + 1 );
468
+ const __m128i _a = _mm_set1_epi8 ('a' - 1 );
469
+ const __m128i z_ = _mm_set1_epi8 ('z' + 1 );
470
+ const __m128i _zero = _mm_set1_epi8 ('0' - 1 );
471
+ const __m128i nine_ = _mm_set1_epi8 ('9' + 1 );
472
+ const __m128i dot = _mm_set1_epi8 ('.' );
473
+ const __m128i minus = _mm_set1_epi8 ('-' );
474
+ const __m128i under = _mm_set1_epi8 ('_' );
475
+
476
+ __m128i in = _mm_loadu_si128 ((__m128i * )from );
477
+
478
+ __m128i gt = _mm_cmpgt_epi8 (in , _A );
479
+ __m128i lt = _mm_cmplt_epi8 (in , Z_ );
480
+ mask = _mm_and_si128 (lt , gt ); /* upper */
481
+ gt = _mm_cmpgt_epi8 (in , _a );
482
+ lt = _mm_cmplt_epi8 (in , z_ );
483
+ mask = _mm_or_si128 (mask , _mm_and_si128 (lt , gt )); /* lower */
484
+ gt = _mm_cmpgt_epi8 (in , _zero );
485
+ lt = _mm_cmplt_epi8 (in , nine_ );
486
+ mask = _mm_or_si128 (mask , _mm_and_si128 (lt , gt )); /* number */
487
+ mask = _mm_or_si128 (mask , _mm_cmpeq_epi8 (in , dot ));
488
+ mask = _mm_or_si128 (mask , _mm_cmpeq_epi8 (in , minus ));
489
+ mask = _mm_or_si128 (mask , _mm_cmpeq_epi8 (in , under ));
490
+
491
+ if (!raw ) {
492
+ const __m128i blank = _mm_set1_epi8 (' ' );
493
+ __m128i eq = _mm_cmpeq_epi8 (in , blank );
494
+ if (_mm_movemask_epi8 (eq )) {
495
+ in = _mm_add_epi8 (in , _mm_and_si128 (eq , _mm_set1_epi8 ('+' - ' ' )));
496
+ mask = _mm_or_si128 (mask , eq );
497
+ }
498
+ }
499
+ if (raw ) {
500
+ const __m128i wavy = _mm_set1_epi8 ('~' );
501
+ mask = _mm_or_si128 (mask , _mm_cmpeq_epi8 (in , wavy ));
502
+ }
503
+ if (((bits = _mm_movemask_epi8 (mask )) & 0xffff ) == 0xffff ) {
504
+ _mm_storeu_si128 ((__m128i * )to , in );
505
+ to += 16 ;
506
+ } else {
507
+ int i ;
508
+ unsigned char xmm [16 ];
509
+ _mm_storeu_si128 ((__m128i * )xmm , in );
510
+ for (i = 0 ; i < sizeof (xmm ); i ++ ) {
511
+ if ((bits & (0x1 << i ))) {
512
+ * to ++ = xmm [i ];
513
+ } else {
514
+ * to ++ = '%' ;
515
+ * to ++ = hexchars [xmm [i ] >> 4 ];
516
+ * to ++ = hexchars [xmm [i ] & 0xf ];
517
+ }
518
+ }
519
+ }
520
+ from += 16 ;
521
+ }
522
+ #endif
461
523
while (from < end ) {
462
524
c = * from ++ ;
463
525
464
- if (c == ' ' ) {
526
+ if (! raw && c == ' ' ) {
465
527
* to ++ = '+' ;
466
528
} else if ((c < '0' && c != '-' && c != '.' ) ||
467
- (c < 'A' && c > '9' ) ||
468
- (c > 'Z' && c < 'a' && c != '_' ) ||
469
- ( c > 'z' )) {
529
+ (c < 'A' && c > '9' ) ||
530
+ (c > 'Z' && c < 'a' && c != '_' ) ||
531
+ ( c > 'z' && (! raw || c != '~' ) )) {
470
532
to [0 ] = '%' ;
471
533
to [1 ] = hexchars [c >> 4 ];
472
534
to [2 ] = hexchars [c & 15 ];
@@ -483,6 +545,14 @@ PHPAPI zend_string *php_url_encode(char const *s, size_t len)
483
545
}
484
546
/* }}} */
485
547
548
+ /* {{{ php_url_encode
549
+ */
550
+ PHPAPI zend_string * php_url_encode (char const * s , size_t len )
551
+ {
552
+ return php_url_encode_impl (s , len , 0 );
553
+ }
554
+ /* }}} */
555
+
486
556
/* {{{ proto string urlencode(string str)
487
557
URL-encodes string */
488
558
PHP_FUNCTION (urlencode )
@@ -545,29 +615,7 @@ PHPAPI size_t php_url_decode(char *str, size_t len)
545
615
*/
546
616
PHPAPI zend_string * php_raw_url_encode (char const * s , size_t len )
547
617
{
548
- register size_t x , y ;
549
- zend_string * str ;
550
- char * ret ;
551
-
552
- str = zend_string_safe_alloc (3 , len , 0 , 0 );
553
- ret = ZSTR_VAL (str );
554
- for (x = 0 , y = 0 ; len -- ; x ++ , y ++ ) {
555
- char c = s [x ];
556
-
557
- ret [y ] = c ;
558
- if ((c < '0' && c != '-' && c != '.' ) ||
559
- (c < 'A' && c > '9' ) ||
560
- (c > 'Z' && c < 'a' && c != '_' ) ||
561
- (c > 'z' && c != '~' )) {
562
- ret [y ++ ] = '%' ;
563
- ret [y ++ ] = hexchars [(unsigned char ) c >> 4 ];
564
- ret [y ] = hexchars [(unsigned char ) c & 15 ];
565
- }
566
- }
567
- ret [y ] = '\0' ;
568
- str = zend_string_truncate (str , y , 0 );
569
-
570
- return str ;
618
+ return php_url_encode_impl (s , len , 1 );
571
619
}
572
620
/* }}} */
573
621
0 commit comments