Skip to content

Commit

Permalink
utf8n_to_uvchr_msgs(): Macroize a common paradigm
Browse files Browse the repository at this point in the history
Each of the cases in the loop had a somewhat obscure conditional that
had a few variants.  This commit creates a macro that puts the
complication in one place.
  • Loading branch information
khwilliamson committed Nov 26, 2024
1 parent 6c47c37 commit 8f7d284
Showing 1 changed file with 59 additions and 49 deletions.
108 changes: 59 additions & 49 deletions utf8.c
Original file line number Diff line number Diff line change
Expand Up @@ -1671,6 +1671,39 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
*msgs = NULL;
}


/* Returns 0 if no message needs to be generated for this problem even
* if everything else says to. Otherwise returns the warning category
* to use for the message.
*
* No message need be generated if the UTF8_CHECK_ONLY flag has been
* set by the caller. Otherwise, a message should be generated if
* either:
* 1) the caller has furnished a structure into which messages should
* be returned to it (so it itself can decide what to do); or
* 2) warnings are enabled for either of the category parameters to the
* macro.
*
* The 'warning' parameter is the higher priority warning category to
* check. The macro calls ckWARN_d(warning), so warnings for it are
* considered to be on by default.
*
* The second, lower priority category is optional. To specify not to
* use one, call the macro
* like: NEED_MESSAGE(WARN_FOO,,)
* Otherwise like: NEED_MESSAGE(WARN_FOO, ckWARN_d, WARN_BAR)
*
* The second parameter could also have been ckWARN to specify that the
* second category isn't on by default.
*
* When called without a second category, the macro outputs a bunch of
* zeroes that the compiler should fold to nothing */
#define NEED_MESSAGE(warning, extra_ckWARN, extra_category) \
((flags & UTF8_CHECK_ONLY) ? 0 : \
((ckWARN_d(warning)) ? warning : \
((extra_ckWARN(extra_category +0)) ? extra_category +0 : \
((msgs) ? warning : 0))))

while (possible_problems) { /* Handle each possible problem */
U32 pack_warn = 0;
char * message = NULL;
Expand Down Expand Up @@ -1720,29 +1753,24 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
|| (flags & (UTF8_WARN_SUPER|UTF8_WARN_PERL_EXTENDED)))
{

/* The warnings code explicitly says it doesn't handle the
* case of packWARN2 and two categories which have
* parent-child relationship. Even if it works now to
* raise the warning if either is enabled, it wouldn't
* necessarily do so in the future. We output (only) the
* most dire warning */
if (! (flags & UTF8_CHECK_ONLY)) {
if (ckWARN_d(WARN_UTF8)) {
pack_warn = packWARN(WARN_UTF8);
}
else if (ckWARN_d(WARN_NON_UNICODE)) {
pack_warn = packWARN(WARN_NON_UNICODE);
}
else if (msgs) {
pack_warn = packWARN(WARN_UTF8);
}

if (pack_warn) {
message = Perl_form(aTHX_ "%s: %s (overflows)",
/* Overflow is a hybrid. If the word size on this platform
* were wide enough for this to not overflow, a non-Unicode
* code point would have been generated. If the caller
* wanted warnings for such code points, the warning
* category would be WARN_NON_UNICODE, On the other hand,
* overflow is considered a malformation, which is serious,
* and the category would be just WARN_UTF8. We clearly
* should warn if either category is enabled, but which
* category to use? Historically, we've used 'utf8' if it
* is enabled; and that seems like the more severe
* category, more befitting a malformation. */
pack_warn = NEED_MESSAGE(WARN_UTF8,
ckWARN_d, WARN_NON_UNICODE);
if (pack_warn) {
message = Perl_form(aTHX_ "%s: %s (overflows)",
malformed_text,
_byte_dump_string(s0, curlen, 0));
this_flag_bit = UTF8_GOT_OVERFLOW;
}
this_flag_bit = UTF8_GOT_OVERFLOW;
}
}

Expand All @@ -1759,9 +1787,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
assert(0);

disallowed = TRUE;
if ( (msgs
|| ckWARN_d(WARN_UTF8)) && ! (flags & UTF8_CHECK_ONLY))
{
if (NEED_MESSAGE(WARN_UTF8,,)) {
pack_warn = packWARN(WARN_UTF8);
message = Perl_form(aTHX_ "%s (empty string)",
malformed_text);
Expand All @@ -1776,9 +1802,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,

if (! (flags & UTF8_ALLOW_CONTINUATION)) {
disallowed = TRUE;
if (( msgs
|| ckWARN_d(WARN_UTF8)) && ! (flags & UTF8_CHECK_ONLY))
{
if (NEED_MESSAGE(WARN_UTF8,,)) {
pack_warn = packWARN(WARN_UTF8);
message = Perl_form(aTHX_
"%s: %s (unexpected continuation byte 0x%02x,"
Expand All @@ -1797,9 +1821,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,

if (! (flags & UTF8_ALLOW_SHORT)) {
disallowed = TRUE;
if (( msgs
|| ckWARN_d(WARN_UTF8)) && ! (flags & UTF8_CHECK_ONLY))
{
if (NEED_MESSAGE(WARN_UTF8,,)) {
pack_warn = packWARN(WARN_UTF8);
message = Perl_form(aTHX_
"%s: %s (too short; %d byte%s available, need %d)",
Expand All @@ -1820,9 +1842,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,

if (! (flags & UTF8_ALLOW_NON_CONTINUATION)) {
disallowed = TRUE;
if (( msgs
|| ckWARN_d(WARN_UTF8)) && ! (flags & UTF8_CHECK_ONLY))
{
if (NEED_MESSAGE(WARN_UTF8,,)) {

/* If we don't know for sure that the input length is
* valid, avoid as much as possible reading past the
Expand All @@ -1847,9 +1867,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
if (flags & UTF8_WARN_SURROGATE) {
*errors |= UTF8_GOT_SURROGATE;

if ( ! (flags & UTF8_CHECK_ONLY)
&& (msgs || ckWARN_d(WARN_SURROGATE)))
{
if (NEED_MESSAGE(WARN_SURROGATE,,)) {
pack_warn = packWARN(WARN_SURROGATE);

/* These are the only errors that can occur with a
Expand Down Expand Up @@ -1879,9 +1897,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
if (flags & UTF8_WARN_SUPER) {
*errors |= UTF8_GOT_SUPER;

if ( ! (flags & UTF8_CHECK_ONLY)
&& (msgs || ckWARN_d(WARN_NON_UNICODE)))
{
if (NEED_MESSAGE(WARN_NON_UNICODE,,)) {
pack_warn = packWARN(WARN_NON_UNICODE);

if (orig_problems & UTF8_GOT_TOO_SHORT) {
Expand All @@ -1902,10 +1918,8 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
* and before possibly bailing out, so that the more dire
* warning will override the regular one. */
if (UNLIKELY(UTF8_IS_PERL_EXTENDED(s0))) {
if ( ! (flags & UTF8_CHECK_ONLY)
&& (flags & (UTF8_WARN_PERL_EXTENDED|UTF8_WARN_SUPER))
&& (msgs || ( ckWARN_d(WARN_NON_UNICODE)
|| ckWARN(WARN_PORTABLE))))
if ( (flags & (UTF8_WARN_PERL_EXTENDED|UTF8_WARN_SUPER))
&& NEED_MESSAGE(WARN_NON_UNICODE, ckWARN, WARN_PORTABLE))
{
pack_warn = packWARN2(WARN_NON_UNICODE, WARN_PORTABLE);

Expand Down Expand Up @@ -1955,9 +1969,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
if (flags & UTF8_WARN_NONCHAR) {
*errors |= UTF8_GOT_NONCHAR;

if ( ! (flags & UTF8_CHECK_ONLY)
&& (msgs || ckWARN_d(WARN_NONCHAR)))
{
if (NEED_MESSAGE(WARN_NONCHAR,,)) {
/* The code above should have guaranteed that we don't
* get here with errors other than overlong */
assert (! (orig_problems
Expand Down Expand Up @@ -1992,9 +2004,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
else {
disallowed = TRUE;

if (( msgs
|| ckWARN_d(WARN_UTF8)) && ! (flags & UTF8_CHECK_ONLY))
{
if (NEED_MESSAGE(WARN_UTF8,,)) {
pack_warn = packWARN(WARN_UTF8);

/* These error types cause 'uv' to be something that
Expand Down

0 comments on commit 8f7d284

Please sign in to comment.