diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fb3f475..c6de5e2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -4,7 +4,7 @@ jobs: ubuntu: strategy: matrix: - version: ['7.3', '7.4', '8.0', '8.1', '8.2'] + version: ['7.3', '7.4', '8.0', '8.1', '8.2', '8.3'] runs-on: ubuntu-latest steps: - name: Install re2c diff --git a/package.xml b/package.xml index 8a0b8cb..728c969 100644 --- a/package.xml +++ b/package.xml @@ -36,9 +36,9 @@ It can deal with rfc822 and rfc2045 (MIME) compliant messages. remi@php.net yes - 2022-09-15 + 2023-08-22 - 3.1.5dev + 3.1.7dev 3.0 @@ -47,10 +47,7 @@ It can deal with rfc822 and rfc2045 (MIME) compliant messages. PHP-3.01 -- drop usage of removed mbfl APIs in PHP 8.3 -- fix GH-27 MimeMessage::__construct() throws TypeError with $mode=stream -- fix GH-21, GH-22, GH-24 segfault in mailparse_msg_parse without mime-version -- fix #81403 mailparse_rfc822_parse_addresses drops escaped quotes +- @@ -96,6 +93,8 @@ It can deal with rfc822 and rfc2045 (MIME) compliant messages. + + @@ -131,6 +130,41 @@ It can deal with rfc822 and rfc2045 (MIME) compliant messages. mailparse + + 2023-08-22 + + 3.1.6 + 3.0 + + + stable + stable + + PHP-3.01 + +- fix #29 Segmentation fault with ISO-2022-JP Subject header +- fix #30 Segmentation fault with UTF-8 encoded X-MS-Iris-MetaData header +- revert fix #81403 mailparse_rfc822_parse_addresses drops escaped quotes + + + + 2023-07-27 + + 3.1.5 + 3.0 + + + stable + stable + + PHP-3.01 + +- drop usage of removed mbfl APIs in PHP 8.3 +- fix GH-27 MimeMessage::__construct() throws TypeError with $mode=stream +- fix GH-21, GH-22, GH-24 segfault in mailparse_msg_parse without mime-version +- fix #81403 mailparse_rfc822_parse_addresses drops escaped quotes + + 2022-09-15 diff --git a/php_mailparse.h b/php_mailparse.h index 8fd8aca..0dd8066 100644 --- a/php_mailparse.h +++ b/php_mailparse.h @@ -22,7 +22,7 @@ extern zend_module_entry mailparse_module_entry; #define phpext_mailparse_ptr &mailparse_module_entry -#define PHP_MAILPARSE_VERSION "3.1.5-dev" +#define PHP_MAILPARSE_VERSION "3.1.7-dev" #ifdef PHP_WIN32 #define PHP_MAILPARSE_API __declspec(dllexport) diff --git a/php_mailparse_rfc822.c b/php_mailparse_rfc822.c index 619cccb..3be2915 100644 --- a/php_mailparse_rfc822.c +++ b/php_mailparse_rfc822.c @@ -1,5 +1,5 @@ -/* Generated by re2c 2.1.1 on Tue Jul 25 10:20:24 2023 */ -#line 1 "php_mailparse_rfc822.re" +/* Generated by re2c 2.1.1 on Tue Aug 29 09:13:53 2023 */ +#line 1 "/home/php/git/mailparse/php_mailparse_rfc822.re" /* +----------------------------------------------------------------------+ | Copyright (c) The PHP Group | @@ -21,10 +21,10 @@ #include "php_mailparse_rfc822.h" #include "ext/standard/php_string.h" #include "ext/standard/php_smart_string.h" -#line 36 "php_mailparse_rfc822.re" +#line 36 "/home/php/git/mailparse/php_mailparse_rfc822.re" -#line 45 "php_mailparse_rfc822.re" +#line 45 "/home/php/git/mailparse/php_mailparse_rfc822.re" #define YYFILL(n) if (YYCURSOR == YYLIMIT) goto stop @@ -49,7 +49,6 @@ static void tokenize(const char *header, php_rfc822_token_t *tokens, int *ntokens, int report_errors) { register const char *p, *q, *start; - const char *r = NULL; int in_bracket = 0; /* NB: parser assumes that the header has two bytes of NUL terminator */ @@ -67,73 +66,105 @@ printf("ground: start=%p limit=%p cursor=%p: [%d] %s\n", start, YYLIMIT, YYCURSO #endif -#line 71 "" +#line 70 "" { YYCTYPE yych; + static const unsigned char yybm[] = { + 0, 160, 160, 160, 160, 160, 160, 160, + 160, 192, 192, 160, 160, 192, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, + 192, 128, 0, 160, 160, 128, 160, 160, + 128, 128, 160, 160, 128, 160, 128, 128, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 128, 128, 128, 128, 128, 128, + 128, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 128, 160, 128, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, + }; if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2); yych = *YYCURSOR; - switch (yych) { - case 0x00: goto yy2; - case '\t': - case '\n': - case '\r': - case ' ': goto yy7; - case '!': - case '%': - case ',': - case '.': - case '/': - case ':': - case ';': - case '=': - case '?': - case '@': - case '[': - case ']': goto yy10; - case '"': goto yy12; - case '(': goto yy15; - case ')': goto yy17; - case '<': goto yy19; - case '>': goto yy21; - case '\\': goto yy23; - default: goto yy4; + if (yybm[0+yych] & 64) { + goto yy7; + } + if (yych <= '-') { + if (yych <= '%') { + if (yych <= '!') { + if (yych <= 0x00) goto yy2; + if (yych <= 0x1F) goto yy4; + goto yy10; + } else { + if (yych <= '"') goto yy12; + if (yych <= '$') goto yy4; + goto yy10; + } + } else { + if (yych <= ')') { + if (yych <= '\'') goto yy4; + if (yych <= '(') goto yy15; + goto yy17; + } else { + if (yych == ',') goto yy10; + goto yy4; + } + } + } else { + if (yych <= '>') { + if (yych <= ';') { + if (yych <= '/') goto yy10; + if (yych <= '9') goto yy4; + goto yy10; + } else { + if (yych <= '<') goto yy19; + if (yych <= '=') goto yy10; + goto yy21; + } + } else { + if (yych <= '[') { + if (yych <= '@') goto yy10; + if (yych <= 'Z') goto yy4; + goto yy10; + } else { + if (yych <= '\\') goto yy23; + if (yych <= ']') goto yy10; + goto yy4; + } + } } yy2: ++YYCURSOR; -#line 87 "php_mailparse_rfc822.re" +#line 86 "/home/php/git/mailparse/php_mailparse_rfc822.re" { goto stop; } -#line 106 "" +#line 160 "" yy4: ++YYCURSOR; if (YYLIMIT <= YYCURSOR) YYFILL(1); yych = *YYCURSOR; - switch (yych) { - case 0x00: - case '\t': - case '\n': - case '\r': - case ' ': - case '!': - case '"': - case '%': - case '(': - case ')': - case ',': - case '.': - case '/': - case ':': - case ';': - case '<': - case '=': - case '>': - case '?': - case '@': - case '[': - case ']': goto yy6; - default: goto yy4; + if (yybm[0+yych] & 32) { + goto yy4; } -yy6: -#line 141 "php_mailparse_rfc822.re" +#line 140 "/home/php/git/mailparse/php_mailparse_rfc822.re" { DBG_STATE("ANY"); if (tokens) { tokens->token = 0; @@ -144,43 +175,33 @@ printf("ground: start=%p limit=%p cursor=%p: [%d] %s\n", start, YYLIMIT, YYCURSO ++*ntokens; goto state_ground; } -#line 148 "" +#line 179 "" yy7: ++YYCURSOR; if (YYLIMIT <= YYCURSOR) YYFILL(1); yych = *YYCURSOR; - switch (yych) { - case '\t': - case '\n': - case '\r': - case ' ': goto yy7; - default: goto yy9; + if (yybm[0+yych] & 64) { + goto yy7; } -yy9: -#line 88 "php_mailparse_rfc822.re" +#line 87 "/home/php/git/mailparse/php_mailparse_rfc822.re" { DBG_STATE("SPACE"); goto state_ground; } -#line 163 "" +#line 189 "" yy10: ++YYCURSOR; -#line 140 "php_mailparse_rfc822.re" +#line 139 "/home/php/git/mailparse/php_mailparse_rfc822.re" { DBG_STATE("ATOM"); ADD_ATOM_TOKEN(); goto state_ground; } -#line 168 "" +#line 194 "" yy12: ++YYCURSOR; if (YYLIMIT <= YYCURSOR) YYFILL(1); yych = *YYCURSOR; - switch (yych) { - case 0x00: goto yy14; - case '"': goto yy24; - case '\\': goto yy26; - default: goto yy12; + if (yybm[0+yych] & 128) { + goto yy12; } -yy14: - YYCURSOR = YYMARKER; - goto yy25; + if (yych >= 0x01) goto yy24; yy15: ++YYCURSOR; -#line 90 "php_mailparse_rfc822.re" +#line 89 "/home/php/git/mailparse/php_mailparse_rfc822.re" { DBG_STATE("START COMMENT"); if (tokens) { tokens->token = '('; @@ -189,21 +210,17 @@ printf("ground: start=%p limit=%p cursor=%p: [%d] %s\n", start, YYLIMIT, YYCURSO } goto state_comment; } -#line 193 "" +#line 214 "" yy17: ++YYCURSOR; yy18: -#line 89 "php_mailparse_rfc822.re" +#line 88 "/home/php/git/mailparse/php_mailparse_rfc822.re" { REPORT_ERR("token not valid in ground state"); goto state_ground; } -#line 199 "" +#line 220 "" yy19: yych = *++YYCURSOR; - switch (yych) { - case '>': goto yy28; - default: goto yy20; - } -yy20: -#line 122 "php_mailparse_rfc822.re" + if (yych == '>') goto yy26; +#line 121 "/home/php/git/mailparse/php_mailparse_rfc822.re" { DBG_STATE("LANGLE"); if (in_bracket) { REPORT_ERR("already in < bracket"); @@ -213,10 +230,10 @@ printf("ground: start=%p limit=%p cursor=%p: [%d] %s\n", start, YYLIMIT, YYCURSO ADD_ATOM_TOKEN(); goto state_ground; } -#line 217 "" +#line 234 "" yy21: ++YYCURSOR; -#line 131 "php_mailparse_rfc822.re" +#line 130 "/home/php/git/mailparse/php_mailparse_rfc822.re" { DBG_STATE("RANGLE"); if (!in_bracket) { REPORT_ERR("not in < bracket"); @@ -226,38 +243,16 @@ printf("ground: start=%p limit=%p cursor=%p: [%d] %s\n", start, YYLIMIT, YYCURSO ADD_ATOM_TOKEN(); goto state_ground; } -#line 230 "" +#line 247 "" yy23: yych = *++YYCURSOR; - switch (yych) { - case 0x00: - case '\t': - case '\n': - case '\r': - case ' ': - case '!': - case '"': - case '%': - case '(': - case ')': - case ',': - case '.': - case '/': - case ':': - case ';': - case '<': - case '=': - case '>': - case '?': - case '@': - case '[': - case ']': goto yy18; - default: goto yy4; + if (yybm[0+yych] & 32) { + goto yy4; } + goto yy18; yy24: ++YYCURSOR; -yy25: -#line 98 "php_mailparse_rfc822.re" +#line 97 "/home/php/git/mailparse/php_mailparse_rfc822.re" { DBG_STATE("QUOTE STRING"); if (tokens) { tokens->token = '"'; @@ -269,20 +264,10 @@ printf("ground: start=%p limit=%p cursor=%p: [%d] %s\n", start, YYLIMIT, YYCURSO goto state_ground; } -#line 273 "" +#line 268 "" yy26: ++YYCURSOR; - if (YYLIMIT <= YYCURSOR) YYFILL(1); - yych = *YYCURSOR; - switch (yych) { - case 0x00: goto yy14; - case '"': goto yy30; - case '\\': goto yy26; - default: goto yy12; - } -yy28: - ++YYCURSOR; -#line 109 "php_mailparse_rfc822.re" +#line 108 "/home/php/git/mailparse/php_mailparse_rfc822.re" { DBG_STATE("NULL <>"); ADD_ATOM_TOKEN(); if (tokens) { @@ -296,19 +281,9 @@ printf("ground: start=%p limit=%p cursor=%p: [%d] %s\n", start, YYLIMIT, YYCURSO ADD_ATOM_TOKEN(); goto state_ground; } -#line 300 "" -yy30: - YYMARKER = ++YYCURSOR; - if (YYLIMIT <= YYCURSOR) YYFILL(1); - yych = *YYCURSOR; - switch (yych) { - case 0x00: goto yy25; - case '"': goto yy24; - case '\\': goto yy26; - default: goto yy12; - } +#line 285 "" } -#line 151 "php_mailparse_rfc822.re" +#line 150 "/home/php/git/mailparse/php_mailparse_rfc822.re" state_comment: @@ -512,7 +487,7 @@ static void parse_address_tokens(php_rfc822_tokenized_t *toks, /* the stuff from start_tok to i - 1 is the display name part */ if (addrs && !in_group && i - start_tok > 0) { int j, has_comments = 0, has_strings = 0; - switch(toks->tokens[i].token) { + switch(i < toks->ntokens ? toks->tokens[i].token : 0) { case ';': case ',': case '<': addrs->addrs[iaddr].name = php_rfc822_recombine_tokens(toks, start_tok, i - start_tok, PHP_RFC822_RECOMBINE_SPACE_ATOMS); diff --git a/php_mailparse_rfc822.re b/php_mailparse_rfc822.re index 628dd6f..3f4ca53 100644 --- a/php_mailparse_rfc822.re +++ b/php_mailparse_rfc822.re @@ -66,7 +66,6 @@ other = any\allspecials; static void tokenize(const char *header, php_rfc822_token_t *tokens, int *ntokens, int report_errors) { register const char *p, *q, *start; - const char *r = NULL; int in_bracket = 0; /* NB: parser assumes that the header has two bytes of NUL terminator */ @@ -95,7 +94,7 @@ printf("ground: start=%p limit=%p cursor=%p: [%d] %s\n", start, YYLIMIT, YYCURSO } goto state_comment; } - ["] (any\["]|"\\\"")* ["] { DBG_STATE("QUOTE STRING"); + ["] (any\["])* ["] { DBG_STATE("QUOTE STRING"); if (tokens) { tokens->token = '"'; tokens->value = start + 1; @@ -351,7 +350,7 @@ mailbox: /* addr-spec / phrase route-addr */ /* the stuff from start_tok to i - 1 is the display name part */ if (addrs && !in_group && i - start_tok > 0) { int j, has_comments = 0, has_strings = 0; - switch(toks->tokens[i].token) { + switch(i < toks->ntokens ? toks->tokens[i].token : 0) { case ';': case ',': case '<': addrs->addrs[iaddr].name = php_rfc822_recombine_tokens(toks, start_tok, i - start_tok, PHP_RFC822_RECOMBINE_SPACE_ATOMS); diff --git a/tests/bug81403.phpt b/tests/bug81403.phpt index bf81f0c..800f798 100644 --- a/tests/bug81403.phpt +++ b/tests/bug81403.phpt @@ -4,6 +4,8 @@ Bug #81403 (mailparse_rfc822_parse_addresses drops escaped quotes) +--XFAIL-- +Fix reverted see GH-29 and GH-30 --FILE-- '; diff --git a/tests/gh29.phpt b/tests/gh29.phpt new file mode 100644 index 0000000..9d474f2 --- /dev/null +++ b/tests/gh29.phpt @@ -0,0 +1,32 @@ +--TEST-- +GH issue #29 Segmentation fault with ISO-2022-JP Subject header +--SKIPIF-- + +--FILE-- + +--EXPECTF-- +ok diff --git a/tests/gh30.phpt b/tests/gh30.phpt new file mode 100644 index 0000000..ce112cb --- /dev/null +++ b/tests/gh30.phpt @@ -0,0 +1,34 @@ +--TEST-- +GH issue #30 Segmentation fault with UTF-8 encoded X-MS-Iris-MetaData header +--SKIPIF-- + +--FILE-- + +--EXPECTF-- +ok