Skip to content

Commit 1584352

Browse files
committed
Add fuzzer for mb_convert_encoding
This uses the php-fuzz-mbstring name, moving the existing fuzzer to php-fuzz-mbregex.
1 parent be9adc4 commit 1584352

File tree

6 files changed

+124
-43
lines changed

6 files changed

+124
-43
lines changed

sapi/fuzzer/Makefile.frag

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,3 +28,6 @@ $(SAPI_FUZZER_PATH)/php-fuzz-exif: $(PHP_GLOBAL_OBJS) $(PHP_SAPI_OBJS) $(PHP_FUZ
2828

2929
$(SAPI_FUZZER_PATH)/php-fuzz-mbstring: $(PHP_GLOBAL_OBJS) $(PHP_SAPI_OBJS) $(PHP_FUZZER_MBSTRING_OBJS)
3030
$(FUZZER_BUILD) $(PHP_FUZZER_MBSTRING_OBJS) -o $@
31+
32+
$(SAPI_FUZZER_PATH)/php-fuzz-mbregex: $(PHP_GLOBAL_OBJS) $(PHP_SAPI_OBJS) $(PHP_FUZZER_MBREGEX_OBJS)
33+
$(FUZZER_BUILD) $(PHP_FUZZER_MBREGEX_OBJS) -o $@

sapi/fuzzer/README.md

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@ When running `make` it creates these binaries in `sapi/fuzzer/`:
2828
* `php-fuzz-unserializehash`: Fuzzing unserialize() for HashContext objects
2929
* `php-fuzz-json`: Fuzzing JSON parser (requires --enable-json)
3030
* `php-fuzz-exif`: Fuzzing `exif_read_data()` function (requires --enable-exif)
31-
* `php-fuzz-mbstring`: Fuzzing `mb_ereg[i]()` (requires --enable-mbstring)
31+
* `php-fuzz-mbstring`: Fuzzing `mb_convert_encoding()` (requires `--enable-mbstring`)
32+
* `php-fuzz-mbregex`: Fuzzing `mb_ereg[i]()` (requires --enable-mbstring)
3233
* `php-fuzz-execute`: Fuzzing the executor
3334
* `php-fuzz-function-jit`: Fuzzing the function JIT (requires --enable-opcache)
3435
* `php-fuzz-tracing-jit`: Fuzzing the tracing JIT (requires --enable-opcache)
@@ -72,7 +73,14 @@ sapi/cli/php sapi/fuzzer/generate_execute_corpus.php ./execute-corpus Zend/tests
7273
sapi/fuzzer/php-fuzzer-function-jit ./execute-corpus
7374
```
7475

75-
For the mbstring fuzzer, you may want to build the libonig dependency with instrumentation. At this time, libonig is not clean under ubsan, so only the fuzzer and address sanitizers may be used.
76+
For the mbstring fuzzer, a dictionary of encodings should be generated first:
77+
78+
```sh
79+
sapi/cli/php sapi/fuzzer/generate_mbstring_dict.php
80+
sapi/fuzzer/php-fuzz-mbstring -dict=$PWD/sapi/fuzzer/dict/mbstring ./my-mbstring-corpus
81+
```
82+
83+
For the mbregex fuzzer, you may want to build the libonig dependency with instrumentation. At this time, libonig is not clean under ubsan, so only the fuzzer and address sanitizers may be used.
7684

7785
```sh
7886
git clone https://github.com/kkos/oniguruma.git

sapi/fuzzer/config.m4

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,9 @@ if test "$PHP_FUZZER" != "no"; then
6565
fi
6666
if test -n "$enable_mbstring" && test "$enable_mbstring" != "no"; then
6767
PHP_FUZZER_TARGET([mbstring], PHP_FUZZER_MBSTRING_OBJS)
68+
if test -n "$enable_mbregex" && test "$enable_mbregex" != "no"; then
69+
PHP_FUZZER_TARGET([mbregex], PHP_FUZZER_MBREGEX_OBJS)
70+
fi
6871
fi
6972

7073
PHP_SUBST(PHP_FUZZER_BINARIES)

sapi/fuzzer/fuzzer-mbregex.c

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
/*
2+
+----------------------------------------------------------------------+
3+
| Copyright (c) The PHP Group |
4+
+----------------------------------------------------------------------+
5+
| This source file is subject to version 3.01 of the PHP license, |
6+
| that is bundled with this package in the file LICENSE, and is |
7+
| available through the world-wide-web at the following url: |
8+
| https://www.php.net/license/3_01.txt |
9+
| If you did not receive a copy of the PHP license and are unable to |
10+
| obtain it through the world-wide-web, please send a note to |
11+
| license@php.net so we can mail you a copy immediately. |
12+
+----------------------------------------------------------------------+
13+
| Authors: Stanislav Malyshev <stas@php.net> |
14+
+----------------------------------------------------------------------+
15+
*/
16+
17+
18+
#include "fuzzer.h"
19+
20+
#include "Zend/zend.h"
21+
#include "main/php_config.h"
22+
#include "main/php_main.h"
23+
#include "oniguruma.h"
24+
25+
#include <stdio.h>
26+
#include <stdint.h>
27+
#include <stdlib.h>
28+
29+
#include "fuzzer-sapi.h"
30+
31+
int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
32+
#ifdef HAVE_MBREGEX
33+
char *args[2];
34+
char *data = malloc(Size+1);
35+
memcpy(data, Data, Size);
36+
data[Size] = '\0';
37+
38+
if (fuzzer_request_startup() == FAILURE) {
39+
return 0;
40+
}
41+
42+
fuzzer_setup_dummy_frame();
43+
44+
args[0] = data;
45+
args[1] = "test123";
46+
fuzzer_call_php_func("mb_ereg", 2, args);
47+
48+
args[0] = data;
49+
args[1] = "test123";
50+
fuzzer_call_php_func("mb_eregi", 2, args);
51+
52+
args[0] = data;
53+
args[1] = data;
54+
fuzzer_call_php_func("mb_ereg", 2, args);
55+
56+
args[0] = data;
57+
args[1] = data;
58+
fuzzer_call_php_func("mb_eregi", 2, args);
59+
60+
fuzzer_request_shutdown();
61+
62+
free(data);
63+
#else
64+
fprintf(stderr, "\n\nERROR:\nPHP built without mbstring, recompile with --enable-mbstring to use this fuzzer\n");
65+
exit(1);
66+
#endif
67+
return 0;
68+
}
69+
70+
int LLVMFuzzerInitialize(int *argc, char ***argv) {
71+
fuzzer_init_php(NULL);
72+
73+
/* The default parse depth limit allows stack overflows under asan. */
74+
onig_set_parse_depth_limit(512);
75+
76+
/* fuzzer_shutdown_php(); */
77+
return 0;
78+
}

sapi/fuzzer/fuzzer-mbstring.c

Lines changed: 29 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -15,64 +15,52 @@
1515
*/
1616

1717

18+
#include "zend.h"
1819
#include "fuzzer.h"
19-
20-
#include "Zend/zend.h"
21-
#include "main/php_config.h"
22-
#include "main/php_main.h"
23-
#include "oniguruma.h"
24-
25-
#include <stdio.h>
26-
#include <stdint.h>
27-
#include <stdlib.h>
28-
2920
#include "fuzzer-sapi.h"
21+
#include "ext/mbstring/mbstring.h"
3022

3123
int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
32-
#ifdef HAVE_MBREGEX
33-
char *args[2];
34-
char *data = malloc(Size+1);
35-
memcpy(data, Data, Size);
36-
data[Size] = '\0';
37-
38-
if (fuzzer_request_startup() == FAILURE) {
24+
const uint8_t *Comma1 = memchr(Data, ',', Size);
25+
if (!Comma1) {
3926
return 0;
4027
}
4128

42-
fuzzer_setup_dummy_frame();
29+
size_t ToEncodingNameLen = Comma1 - Data;
30+
char *ToEncodingName = estrndup((char *) Data, ToEncodingNameLen);
31+
Data = Comma1 + 1;
32+
Size -= ToEncodingNameLen + 1;
33+
34+
const uint8_t *Comma2 = memchr(Data, ',', Size);
35+
if (!Comma2) {
36+
efree(ToEncodingName);
37+
return 0;
38+
}
4339

44-
args[0] = data;
45-
args[1] = "test123";
46-
fuzzer_call_php_func("mb_ereg", 2, args);
40+
size_t FromEncodingNameLen = Comma2 - Data;
41+
char *FromEncodingName = estrndup((char *) Data, FromEncodingNameLen);
42+
Data = Comma2 + 1;
43+
Size -= FromEncodingNameLen + 1;
4744

48-
args[0] = data;
49-
args[1] = "test123";
50-
fuzzer_call_php_func("mb_eregi", 2, args);
45+
const mbfl_encoding *ToEncoding = mbfl_name2encoding(ToEncodingName);
46+
const mbfl_encoding *FromEncoding = mbfl_name2encoding(FromEncodingName);
5147

52-
args[0] = data;
53-
args[1] = data;
54-
fuzzer_call_php_func("mb_ereg", 2, args);
48+
if (!ToEncoding || !FromEncoding || fuzzer_request_startup() == FAILURE) {
49+
efree(ToEncodingName);
50+
efree(FromEncodingName);
51+
return 0;
52+
}
5553

56-
args[0] = data;
57-
args[1] = data;
58-
fuzzer_call_php_func("mb_eregi", 2, args);
54+
char *Result = php_mb_convert_encoding_ex((char *) Data, Size, ToEncoding, FromEncoding, NULL);
55+
efree(Result);
56+
efree(ToEncodingName);
57+
efree(FromEncodingName);
5958

6059
fuzzer_request_shutdown();
61-
62-
free(data);
63-
#else
64-
fprintf(stderr, "\n\nERROR:\nPHP built without mbstring, recompile with --enable-mbstring to use this fuzzer\n");
65-
exit(1);
66-
#endif
6760
return 0;
6861
}
6962

7063
int LLVMFuzzerInitialize(int *argc, char ***argv) {
7164
fuzzer_init_php(NULL);
72-
73-
/* The default parse depth limit allows stack overflows under asan. */
74-
onig_set_parse_depth_limit(512);
75-
76-
/* fuzzer_shutdown_php(); */
7765
return 0;
7866
}

sapi/fuzzer/generate_all.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
<?php
22
require __DIR__ . '/generate_unserialize_dict.php';
3+
require __DIR__ . '/generate_mbstring_dict.php';
34
require __DIR__ . '/generate_unserializehash_corpus.php';
45
require __DIR__ . '/generate_parser_corpus.php';
56

0 commit comments

Comments
 (0)