File tree 1 file changed +9
-1
lines changed
1 file changed +9
-1
lines changed Original file line number Diff line number Diff line change @@ -517,8 +517,16 @@ static bool dom_decode_encode_fast_path(
517
517
const lxb_char_t * buf_ref = * buf_ref_ref ;
518
518
const lxb_char_t * last_output = buf_ref ;
519
519
while (buf_ref != buf_end ) {
520
- const lxb_char_t * buf_ref_backup = buf_ref ;
521
520
/* Fast path converts non-validated UTF-8 -> validated UTF-8 */
521
+ if (decoding_encoding_ctx -> decode .u .utf_8 .need == 0 && * buf_ref < 0x80 ) {
522
+ /* Fast path within the fast path: try to skip non-mb bytes in bulk if we are not in a state where we
523
+ * need more UTF-8 bytes to complete a sequence.
524
+ * It might be tempting to use SIMD here, but it turns out that this is less efficient because
525
+ * we need to process the same byte multiple times sometimes when mixing ASCII with multibyte. */
526
+ buf_ref ++ ;
527
+ continue ;
528
+ }
529
+ const lxb_char_t * buf_ref_backup = buf_ref ;
522
530
lxb_codepoint_t codepoint = lxb_encoding_decode_utf_8_single (& decoding_encoding_ctx -> decode , & buf_ref , buf_end );
523
531
if (UNEXPECTED (codepoint > LXB_ENCODING_MAX_CODEPOINT )) {
524
532
size_t skip = buf_ref - buf_ref_backup ; /* Skip invalid data, it's replaced by the UTF-8 replacement bytes */
You can’t perform that action at this time.
0 commit comments