Skip to content

Commit 3394efc

Browse files
alexdowadramsey
authored andcommitted
Fix infinite loop in mb_encode_mimeheader
1 parent 3d9941f commit 3394efc

File tree

2 files changed

+61
-27
lines changed

2 files changed

+61
-27
lines changed

ext/mbstring/mbstring.c

+31-24
Original file line numberDiff line numberDiff line change
@@ -5858,6 +5858,9 @@ static zend_string* mb_mime_header_encode(zend_string *input, const mbfl_encodin
58585858
unsigned char *in = (unsigned char*)ZSTR_VAL(input);
58595859
size_t in_len = ZSTR_LEN(input);
58605860

5861+
ZEND_ASSERT(outcode->mime_name != NULL);
5862+
ZEND_ASSERT(outcode->mime_name[0] != '\0');
5863+
58615864
if (!in_len) {
58625865
return zend_empty_string;
58635866
}
@@ -5880,7 +5883,8 @@ static zend_string* mb_mime_header_encode(zend_string *input, const mbfl_encodin
58805883
unsigned int state = 0;
58815884
/* wchar_buf should be big enough that when it is full, we definitely have enough
58825885
* wchars to fill an entire line of output */
5883-
uint32_t wchar_buf[80];
5886+
const size_t wchar_buf_len = 90;
5887+
uint32_t wchar_buf[wchar_buf_len];
58845888
uint32_t *p, *e;
58855889
/* What part of wchar_buf is filled with still-unprocessed data which should not
58865890
* be overwritten? */
@@ -5891,7 +5895,7 @@ static zend_string* mb_mime_header_encode(zend_string *input, const mbfl_encodin
58915895
* spaces), just pass it through unchanged */
58925896
bool checking_leading_spaces = true;
58935897
while (in_len) {
5894-
size_t out_len = incode->to_wchar(&in, &in_len, wchar_buf, 80, &state);
5898+
size_t out_len = incode->to_wchar(&in, &in_len, wchar_buf, wchar_buf_len, &state);
58955899
p = wchar_buf;
58965900
e = wchar_buf + out_len;
58975901

@@ -5925,9 +5929,9 @@ no_passthrough: ;
59255929
* do so all the way to the end of the string */
59265930
while (in_len) {
59275931
/* Decode part of the input string, refill wchar_buf */
5928-
ZEND_ASSERT(offset < 80);
5929-
size_t out_len = incode->to_wchar(&in, &in_len, wchar_buf + offset, 80 - offset, &state);
5930-
ZEND_ASSERT(out_len <= 80 - offset);
5932+
ZEND_ASSERT(offset + MBSTRING_MIN_WCHAR_BUFSIZE <= wchar_buf_len);
5933+
size_t out_len = incode->to_wchar(&in, &in_len, wchar_buf + offset, wchar_buf_len - offset, &state);
5934+
ZEND_ASSERT(out_len <= wchar_buf_len - offset);
59315935
p = wchar_buf;
59325936
e = wchar_buf + offset + out_len;
59335937
/* ASCII output is broken into space-delimited 'words'
@@ -5948,6 +5952,7 @@ no_passthrough: ;
59485952
* If we are already too far along on a line to include Base64/QPrint encoded data
59495953
* on the same line (without overrunning max line length), then add a line feed
59505954
* right now */
5955+
feed_and_mime_encode:
59515956
if (mb_convert_buf_len(&buf) - line_start + indent + strlen(outcode->mime_name) > 55) {
59525957
MB_CONVERT_BUF_ENSURE(&buf, buf.out, buf.limit, (e - word_start) + linefeed_len + 1);
59535958
buf.out = mb_convert_buf_appendn(buf.out, linefeed, linefeed_len);
@@ -5985,7 +5990,13 @@ no_passthrough: ;
59855990

59865991
if (in_len) {
59875992
/* Copy chars which are part of an incomplete 'word' to the beginning
5988-
* of wchar_buf and reprocess them on the next iteration */
5993+
* of wchar_buf and reprocess them on the next iteration.
5994+
* But first make sure that the incomplete 'word' isn't so big that
5995+
* there will be no space to add any more decoded wchars in the buffer
5996+
* (which could lead to an infinite loop) */
5997+
if ((word_start - wchar_buf) < MBSTRING_MIN_WCHAR_BUFSIZE) {
5998+
goto feed_and_mime_encode;
5999+
}
59896000
offset = e - word_start;
59906001
if (offset) {
59916002
memmove(wchar_buf, word_start, offset * sizeof(uint32_t));
@@ -6027,17 +6038,17 @@ mime_encoding_needed: ;
60276038

60286039
/* Do we need to refill wchar_buf to make sure we don't run out of wchars
60296040
* in the middle of a line? */
6030-
if (p == wchar_buf) {
6041+
offset = e - p;
6042+
if (wchar_buf_len - offset < MBSTRING_MIN_WCHAR_BUFSIZE) {
60316043
goto start_new_line;
60326044
}
6033-
offset = e - p;
60346045
memmove(wchar_buf, p, offset * sizeof(uint32_t));
60356046

60366047
while(true) {
60376048
refill_wchar_buf: ;
6038-
ZEND_ASSERT(offset < 80);
6039-
size_t out_len = incode->to_wchar(&in, &in_len, wchar_buf + offset, 80 - offset, &state);
6040-
ZEND_ASSERT(out_len <= 80 - offset);
6049+
ZEND_ASSERT(offset + MBSTRING_MIN_WCHAR_BUFSIZE <= wchar_buf_len);
6050+
size_t out_len = incode->to_wchar(&in, &in_len, wchar_buf + offset, wchar_buf_len - offset, &state);
6051+
ZEND_ASSERT(out_len <= wchar_buf_len - offset);
60416052
p = wchar_buf;
60426053
e = wchar_buf + offset + out_len;
60436054

@@ -6112,22 +6123,18 @@ start_new_line: ;
61126123

61136124
indent = 0; /* Indent argument must only affect the first line */
61146125

6115-
if (in_len) {
6116-
/* We still have more of input string remaining to decode */
6126+
if (in_len || p < e) {
6127+
/* We still have more input to process */
61176128
buf.out = mb_convert_buf_appendn(buf.out, linefeed, linefeed_len);
61186129
buf.out = mb_convert_buf_add(buf.out, ' ');
61196130
line_start = mb_convert_buf_len(&buf);
6120-
/* Copy remaining wchars to beginning of buffer so they will be
6121-
* processed on the next iteration of outer 'do' loop */
61226131
offset = e - p;
6123-
memmove(wchar_buf, p, offset * sizeof(uint32_t));
6124-
goto refill_wchar_buf;
6125-
} else if (p < e) {
6126-
/* Input string is finished, but we still have trailing wchars
6127-
* remaining to be processed in wchar_buf */
6128-
buf.out = mb_convert_buf_appendn(buf.out, linefeed, linefeed_len);
6129-
buf.out = mb_convert_buf_add(buf.out, ' ');
6130-
line_start = mb_convert_buf_len(&buf);
6132+
if (in_len && (wchar_buf_len - offset >= MBSTRING_MIN_WCHAR_BUFSIZE)) {
6133+
/* Copy any remaining wchars to beginning of buffer and refill
6134+
* the rest of the buffer */
6135+
memmove(wchar_buf, p, offset * sizeof(uint32_t));
6136+
goto refill_wchar_buf;
6137+
}
61316138
goto start_new_line;
61326139
} else {
61336140
/* We are done! */
@@ -6165,7 +6172,7 @@ PHP_FUNCTION(mb_encode_mimeheader)
61656172
charset = php_mb_get_encoding(charset_name, 2);
61666173
if (!charset) {
61676174
RETURN_THROWS();
6168-
} else if (charset->mime_name == NULL || charset->mime_name[0] == '\0') {
6175+
} else if (charset->mime_name == NULL || charset->mime_name[0] == '\0' || charset == &mbfl_encoding_qprint) {
61696176
zend_argument_value_error(2, "\"%s\" cannot be used for MIME header encoding", ZSTR_VAL(charset_name));
61706177
RETURN_THROWS();
61716178
}

ext/mbstring/tests/mb_encode_mimeheader_basic4.phpt

+30-3
Original file line numberDiff line numberDiff line change
@@ -115,11 +115,29 @@ var_dump(mb_encode_mimeheader("\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\
115115
// In the general case, matching the old implementation's decision to transfer-encode or not
116116
// perfectly would require allocating potentially unbounded scratch memory (up to the size of
117117
// the input string), but we aim to only use a constant amount of temporarily allocated memory
118-
var_dump(mb_encode_mimeheader("2\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20!3", "GB18030", "Q", ""));
118+
var_dump(mb_encode_mimeheader("2\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20!3", "GB18030", "Q", ""));
119+
120+
// Regression test for infinite loop which was unintentionally caused when refactoring
121+
var_dump(mb_encode_mimeheader(",9868949,9868978,9869015,9689100,9869121,9869615,9870690,9867116,98558119861183. ", "utf-8", "B"));
122+
var_dump(mb_encode_mimeheader('xx ' . str_repeat("A", 81) . " ", "utf-8", "B"));
123+
124+
// Regression test for problem where MIME encoding loop would not leave enough space in wchar
125+
// buffer for the next iteration, causing an assertion failure
126+
mb_internal_encoding('MacJapanese');
127+
var_dump(mb_encode_mimeheader("ne\xf6\xff\xff\xffs\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff1\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff1", 'CP50220', 'B', "A", 44));
128+
129+
// Regression test for failing assertion caused by the fact that QPrint deliberately generates no
130+
// wchars for CR (0x0D) bytes
131+
try {
132+
mb_internal_encoding('Quoted-Printable');
133+
var_dump(mb_encode_mimeheader("=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=00=00=00=00=00=00=00=01=00=00=00=00=00=00=00850r=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=0D=00=00=00=0050r=08=0DCP850r850r0r", "Quoted-Printable", "B", "", 184));
134+
} catch (\ValueError $e) {
135+
echo $e->getMessage() . \PHP_EOL;
136+
}
119137

120138
echo "Done";
121139
?>
122-
--EXPECT--
140+
--EXPECTF--
123141
string(0) ""
124142
string(21) "=?UTF-8?Q?abc=00abc?="
125143
string(16) "=?UTF-8?B?Pw==?="
@@ -156,5 +174,14 @@ string(75) " 111111111111111111111111111111111111111111111111111111111111111111
156174
string(33) "=?HZ-GB-2312?Q?=7E=7Bs=5B=7E=7D?="
157175
string(77) "2 !3"
158176
string(282) "=?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20!=33=20?="
159-
string(296) "2 =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20!=33?="
177+
string(344) "2 =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20?= =?GB18030?Q?=20=20=20=20=20=20=20=20=20!=33?="
178+
string(135) "=?UTF-8?B?LDk4Njg5NDksOTg2ODk3OCw5ODY5MDE1LDk2ODkxMDAsOTg2OTEyMSw5ODY5?=
179+
=?UTF-8?B?NjE1LDk4NzA2OTAsOTg2NzExNiw5ODU1ODExOTg2MTE4My4g?="
180+
string(142) "xx =?UTF-8?B?QUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFB?=
181+
=?UTF-8?B?QUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBIA==?="
182+
string(690) "=?ISO-2022-JP?B?bmU/?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/cxskQiFEGyhCPw==?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/MRskQiFEGyhCPxskQiFEGyhCPw==?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/GyRCIUQbKEI/?=A =?ISO-2022-JP?B?GyRCIUQbKEI/GyRCIUQbKEI/MQ==?="
183+
184+
185+
Deprecated: mb_encode_mimeheader(): Handling QPrint via mbstring is deprecated; use quoted_printable_encode/quoted_printable_decode instead in %s on line %d
186+
mb_encode_mimeheader(): Argument #2 ($charset) "Quoted-Printable" cannot be used for MIME header encoding
160187
Done

0 commit comments

Comments
 (0)