@@ -167,7 +167,7 @@ static PHP_MINFO_FUNCTION(json)
167
167
}
168
168
/* }}} */
169
169
170
- static void json_escape_string (smart_str * buf , char * s , int len , int options TSRMLS_DC );
170
+ static void json_escape_string (smart_str * buf , char * s , size_t len , int options TSRMLS_DC );
171
171
172
172
static int json_determine_array_type (zval * val TSRMLS_DC ) /* {{{ */
173
173
{
@@ -380,12 +380,11 @@ static int json_utf8_to_utf16(unsigned short *utf16, char utf8[], int len) /* {{
380
380
}
381
381
/* }}} */
382
382
383
- static void json_escape_string (smart_str * buf , char * s , int len , int options TSRMLS_DC ) /* {{{ */
383
+ static void json_escape_string (smart_str * buf , char * s , size_t len , int options TSRMLS_DC ) /* {{{ */
384
384
{
385
- int pos = 0 , ulen = 0 ;
386
- unsigned short us ;
387
- unsigned short * utf16 ;
388
- size_t newlen ;
385
+ int status ;
386
+ unsigned int us , next_us = 0 ;
387
+ size_t pos , checkpoint , newlen ;
389
388
390
389
if (len == 0 ) {
391
390
smart_str_appendl (buf , "\"\"" , 2 );
@@ -416,34 +415,51 @@ static void json_escape_string(smart_str *buf, char *s, int len, int options TSR
416
415
417
416
}
418
417
419
- utf16 = (options & PHP_JSON_UNESCAPED_UNICODE ) ? NULL : (unsigned short * ) safe_emalloc (len , sizeof (unsigned short ), 0 );
420
- ulen = json_utf8_to_utf16 (utf16 , s , len );
421
- if (ulen <= 0 ) {
422
- if (utf16 ) {
423
- efree (utf16 );
424
- }
425
- if (ulen < 0 ) {
418
+ if (options & PHP_JSON_UNESCAPED_UNICODE ) {
419
+ /* validate UTF-8 string first */
420
+ if (json_utf8_to_utf16 (NULL , s , len ) < 0 ) {
426
421
JSON_G (error_code ) = PHP_JSON_ERROR_UTF8 ;
427
422
smart_str_appendl (buf , "null" , 4 );
428
- } else {
429
- smart_str_appendl (buf , "\"\"" , 2 );
423
+ return ;
430
424
}
431
- return ;
432
- }
433
- if (!(options & PHP_JSON_UNESCAPED_UNICODE )) {
434
- len = ulen ;
435
425
}
436
426
427
+ pos = 0 ;
428
+ checkpoint = buf -> s ? buf -> s -> len : 0 ;
429
+
437
430
/* pre-allocate for string length plus 2 quotes */
438
431
smart_str_alloc (buf , len + 2 , 0 );
439
432
smart_str_appendc (buf , '"' );
440
433
441
- while (pos < len )
442
- {
443
- us = (options & PHP_JSON_UNESCAPED_UNICODE ) ? s [pos ++ ] : utf16 [pos ++ ];
434
+ do {
435
+ if (UNEXPECTED (next_us )) {
436
+ us = next_us ;
437
+ next_us = 0 ;
438
+ } else {
439
+ us = (unsigned char )s [pos ];
440
+ if (!(options & PHP_JSON_UNESCAPED_UNICODE ) && us >= 0x80 ) {
441
+ /* UTF-8 character */
442
+ us = php_next_utf8_char ((const unsigned char * )s , len , & pos , & status );
443
+ if (status != SUCCESS ) {
444
+ if (buf -> s ) {
445
+ buf -> s -> len = checkpoint ;
446
+ }
447
+ JSON_G (error_code ) = PHP_JSON_ERROR_UTF8 ;
448
+ smart_str_appendl (buf , "null" , 4 );
449
+ return ;
450
+ }
451
+ /* From http://en.wikipedia.org/wiki/UTF16 */
452
+ if (us >= 0x10000 ) {
453
+ us -= 0x10000 ;
454
+ next_us = (unsigned short )((us & 0x3ff ) | 0xdc00 );
455
+ us = (unsigned short )((us >> 10 ) | 0xd800 );
456
+ }
457
+ } else {
458
+ pos ++ ;
459
+ }
460
+ }
444
461
445
- switch (us )
446
- {
462
+ switch (us ) {
447
463
case '"' :
448
464
if (options & PHP_JSON_HEX_QUOT ) {
449
465
smart_str_appendl (buf , "\\u0022" , 6 );
@@ -528,12 +544,9 @@ static void json_escape_string(smart_str *buf, char *s, int len, int options TSR
528
544
}
529
545
break ;
530
546
}
531
- }
532
-
547
+ } while ( pos < len || next_us );
548
+
533
549
smart_str_appendc (buf , '"' );
534
- if (utf16 ) {
535
- efree (utf16 );
536
- }
537
550
}
538
551
/* }}} */
539
552
0 commit comments