From dbf5e7f3146e3b8c4d7b8c7e71b3493ef051686a Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Mon, 18 Nov 2024 13:46:57 -0700 Subject: [PATCH] utf8n_to_uvchr_msgs(): Macroize a common paradigm Each of the cases in the loop had a somewhat obscure conditional that had a few variants. This commit creates a macro that puts the complication in one place. --- utf8.c | 108 +++++++++++++++++++++++++++++++-------------------------- 1 file changed, 59 insertions(+), 49 deletions(-) diff --git a/utf8.c b/utf8.c index 42cbaaeff895..a44bd96586db 100644 --- a/utf8.c +++ b/utf8.c @@ -1671,6 +1671,39 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s, *msgs = NULL; } + + /* Returns 0 if no message needs to be generated for this problem even + * if everything else says to. Otherwise returns the warning category + * to use for the message. + * + * No message need be generated if the UTF8_CHECK_ONLY flag has been + * set by the caller. Otherwise, a message should be generated if + * either: + * 1) the caller has furnished a structure into which messages should + * be returned to it (so it itself can decide what to do); or + * 2) warnings are enabled for either of the category parameters to the + * macro. + * + * The 'warning' parameter is the higher priority warning category to + * check. The macro calls ckWARN_d(warning), so warnings for it are + * considered to be on by default. + * + * The second, lower priority category is optional. To specify not to + * use one, call the macro + * like: NEED_MESSAGE(WARN_FOO,,) + * Otherwise like: NEED_MESSAGE(WARN_FOO, ckWARN_d, WARN_BAR) + * + * The second parameter could also have been ckWARN to specify that the + * second category isn't on by default. + * + * When called without a second category, the macro outputs a bunch of + * zeroes that the compiler should fold to nothing */ +#define NEED_MESSAGE(warning, extra_ckWARN, extra_category) \ + ((flags & UTF8_CHECK_ONLY) ? 0 : \ + ((ckWARN_d(warning)) ? warning : \ + ((extra_ckWARN(extra_category +0)) ? extra_category +0 : \ + ((msgs) ? warning : 0)))) + while (possible_problems) { /* Handle each possible problem */ U32 pack_warn = 0; char * message = NULL; @@ -1720,29 +1753,24 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s, || (flags & (UTF8_WARN_SUPER|UTF8_WARN_PERL_EXTENDED))) { - /* The warnings code explicitly says it doesn't handle the - * case of packWARN2 and two categories which have - * parent-child relationship. Even if it works now to - * raise the warning if either is enabled, it wouldn't - * necessarily do so in the future. We output (only) the - * most dire warning */ - if (! (flags & UTF8_CHECK_ONLY)) { - if (ckWARN_d(WARN_UTF8)) { - pack_warn = packWARN(WARN_UTF8); - } - else if (ckWARN_d(WARN_NON_UNICODE)) { - pack_warn = packWARN(WARN_NON_UNICODE); - } - else if (msgs) { - pack_warn = packWARN(WARN_UTF8); - } - - if (pack_warn) { - message = Perl_form(aTHX_ "%s: %s (overflows)", + /* Overflow is a hybrid. If the word size on this platform + * were wide enough for this to not overflow, a non-Unicode + * code point would have been generated. If the caller + * wanted warnings for such code points, the warning + * category would be WARN_NON_UNICODE, On the other hand, + * overflow is considered a malformation, which is serious, + * and the category would be just WARN_UTF8. We clearly + * should warn if either category is enabled, but which + * category to use? Historically, we've used 'utf8' if it + * is enabled; and that seems like the more severe + * category, more befitting a malformation. */ + pack_warn = NEED_MESSAGE(WARN_UTF8, + ckWARN_d, WARN_NON_UNICODE); + if (pack_warn) { + message = Perl_form(aTHX_ "%s: %s (overflows)", malformed_text, _byte_dump_string(s0, curlen, 0)); - this_flag_bit = UTF8_GOT_OVERFLOW; - } + this_flag_bit = UTF8_GOT_OVERFLOW; } } @@ -1759,9 +1787,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s, assert(0); disallowed = TRUE; - if ( (msgs - || ckWARN_d(WARN_UTF8)) && ! (flags & UTF8_CHECK_ONLY)) - { + if (NEED_MESSAGE(WARN_UTF8,,)) { pack_warn = packWARN(WARN_UTF8); message = Perl_form(aTHX_ "%s (empty string)", malformed_text); @@ -1776,9 +1802,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s, if (! (flags & UTF8_ALLOW_CONTINUATION)) { disallowed = TRUE; - if (( msgs - || ckWARN_d(WARN_UTF8)) && ! (flags & UTF8_CHECK_ONLY)) - { + if (NEED_MESSAGE(WARN_UTF8,,)) { pack_warn = packWARN(WARN_UTF8); message = Perl_form(aTHX_ "%s: %s (unexpected continuation byte 0x%02x," @@ -1797,9 +1821,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s, if (! (flags & UTF8_ALLOW_SHORT)) { disallowed = TRUE; - if (( msgs - || ckWARN_d(WARN_UTF8)) && ! (flags & UTF8_CHECK_ONLY)) - { + if (NEED_MESSAGE(WARN_UTF8,,)) { pack_warn = packWARN(WARN_UTF8); message = Perl_form(aTHX_ "%s: %s (too short; %d byte%s available, need %d)", @@ -1820,9 +1842,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s, if (! (flags & UTF8_ALLOW_NON_CONTINUATION)) { disallowed = TRUE; - if (( msgs - || ckWARN_d(WARN_UTF8)) && ! (flags & UTF8_CHECK_ONLY)) - { + if (NEED_MESSAGE(WARN_UTF8,,)) { /* If we don't know for sure that the input length is * valid, avoid as much as possible reading past the @@ -1847,9 +1867,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s, if (flags & UTF8_WARN_SURROGATE) { *errors |= UTF8_GOT_SURROGATE; - if ( ! (flags & UTF8_CHECK_ONLY) - && (msgs || ckWARN_d(WARN_SURROGATE))) - { + if (NEED_MESSAGE(WARN_SURROGATE,,)) { pack_warn = packWARN(WARN_SURROGATE); /* These are the only errors that can occur with a @@ -1879,9 +1897,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s, if (flags & UTF8_WARN_SUPER) { *errors |= UTF8_GOT_SUPER; - if ( ! (flags & UTF8_CHECK_ONLY) - && (msgs || ckWARN_d(WARN_NON_UNICODE))) - { + if (NEED_MESSAGE(WARN_NON_UNICODE,,)) { pack_warn = packWARN(WARN_NON_UNICODE); if (orig_problems & UTF8_GOT_TOO_SHORT) { @@ -1902,10 +1918,8 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s, * and before possibly bailing out, so that the more dire * warning will override the regular one. */ if (UNLIKELY(UTF8_IS_PERL_EXTENDED(s0))) { - if ( ! (flags & UTF8_CHECK_ONLY) - && (flags & (UTF8_WARN_PERL_EXTENDED|UTF8_WARN_SUPER)) - && (msgs || ( ckWARN_d(WARN_NON_UNICODE) - || ckWARN(WARN_PORTABLE)))) + if ( (flags & (UTF8_WARN_PERL_EXTENDED|UTF8_WARN_SUPER)) + && NEED_MESSAGE(WARN_NON_UNICODE, ckWARN, WARN_PORTABLE)) { pack_warn = packWARN2(WARN_NON_UNICODE, WARN_PORTABLE); @@ -1955,9 +1969,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s, if (flags & UTF8_WARN_NONCHAR) { *errors |= UTF8_GOT_NONCHAR; - if ( ! (flags & UTF8_CHECK_ONLY) - && (msgs || ckWARN_d(WARN_NONCHAR))) - { + if (NEED_MESSAGE(WARN_NONCHAR,,)) { /* The code above should have guaranteed that we don't * get here with errors other than overlong */ assert (! (orig_problems @@ -1992,9 +2004,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s, else { disallowed = TRUE; - if (( msgs - || ckWARN_d(WARN_UTF8)) && ! (flags & UTF8_CHECK_ONLY)) - { + if (NEED_MESSAGE(WARN_UTF8,,)) { pack_warn = packWARN(WARN_UTF8); /* These error types cause 'uv' to be something that