Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

utf8n_to_uvchr_msgs(): Macroïze a common paradigm #22762

Merged
merged 1 commit into from
Nov 27, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 59 additions & 49 deletions utf8.c
Original file line number Diff line number Diff line change
Expand Up @@ -1671,6 +1671,39 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
*msgs = NULL;
}


/* Returns 0 if no message needs to be generated for this problem even
* if everything else says to. Otherwise returns the warning category
* to use for the message.
*
* No message need be generated if the UTF8_CHECK_ONLY flag has been
* set by the caller. Otherwise, a message should be generated if
* either:
* 1) the caller has furnished a structure into which messages should
* be returned to it (so it itself can decide what to do); or
* 2) warnings are enabled for either of the category parameters to the
* macro.
*
* The 'warning' parameter is the higher priority warning category to
* check. The macro calls ckWARN_d(warning), so warnings for it are
* considered to be on by default.
*
* The second, lower priority category is optional. To specify not to
* use one, call the macro
* like: NEED_MESSAGE(WARN_FOO,,)
* Otherwise like: NEED_MESSAGE(WARN_FOO, ckWARN_d, WARN_BAR)
*
* The second parameter could also have been ckWARN to specify that the
* second category isn't on by default.
*
* When called without a second category, the macro outputs a bunch of
* zeroes that the compiler should fold to nothing */
#define NEED_MESSAGE(warning, extra_ckWARN, extra_category) \
((flags & UTF8_CHECK_ONLY) ? 0 : \
((ckWARN_d(warning)) ? warning : \
((extra_ckWARN(extra_category +0)) ? extra_category +0 : \
((msgs) ? warning : 0))))

while (possible_problems) { /* Handle each possible problem */
U32 pack_warn = 0;
char * message = NULL;
Expand Down Expand Up @@ -1720,29 +1753,24 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
|| (flags & (UTF8_WARN_SUPER|UTF8_WARN_PERL_EXTENDED)))
{

/* The warnings code explicitly says it doesn't handle the
* case of packWARN2 and two categories which have
* parent-child relationship. Even if it works now to
* raise the warning if either is enabled, it wouldn't
* necessarily do so in the future. We output (only) the
* most dire warning */
if (! (flags & UTF8_CHECK_ONLY)) {
if (ckWARN_d(WARN_UTF8)) {
pack_warn = packWARN(WARN_UTF8);
}
else if (ckWARN_d(WARN_NON_UNICODE)) {
pack_warn = packWARN(WARN_NON_UNICODE);
}
else if (msgs) {
pack_warn = packWARN(WARN_UTF8);
}

if (pack_warn) {
message = Perl_form(aTHX_ "%s: %s (overflows)",
/* Overflow is a hybrid. If the word size on this platform
* were wide enough for this to not overflow, a non-Unicode
* code point would have been generated. If the caller
* wanted warnings for such code points, the warning
* category would be WARN_NON_UNICODE, On the other hand,
* overflow is considered a malformation, which is serious,
* and the category would be just WARN_UTF8. We clearly
* should warn if either category is enabled, but which
* category to use? Historically, we've used 'utf8' if it
* is enabled; and that seems like the more severe
* category, more befitting a malformation. */
pack_warn = NEED_MESSAGE(WARN_UTF8,
ckWARN_d, WARN_NON_UNICODE);
Comment on lines +1767 to +1768
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not the same logic. In the old version, if msgs was true, ckWARN_d(WARN_UTF8) false, ckWARN_d(WARN_NON_UNICODE) true, we still used WARN_UTF8. With the new macro, we now use WARN_NON_UNICODE.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes; it is a bug fix. I don't know why I thought it not necessary to point that out in the commit message. Now revised

if (pack_warn) {
message = Perl_form(aTHX_ "%s: %s (overflows)",
malformed_text,
_byte_dump_string(s0, curlen, 0));
this_flag_bit = UTF8_GOT_OVERFLOW;
}
this_flag_bit = UTF8_GOT_OVERFLOW;
}
}

Expand All @@ -1759,9 +1787,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
assert(0);

disallowed = TRUE;
if ( (msgs
|| ckWARN_d(WARN_UTF8)) && ! (flags & UTF8_CHECK_ONLY))
{
if (NEED_MESSAGE(WARN_UTF8,,)) {
pack_warn = packWARN(WARN_UTF8);
message = Perl_form(aTHX_ "%s (empty string)",
malformed_text);
Expand All @@ -1776,9 +1802,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,

if (! (flags & UTF8_ALLOW_CONTINUATION)) {
disallowed = TRUE;
if (( msgs
|| ckWARN_d(WARN_UTF8)) && ! (flags & UTF8_CHECK_ONLY))
{
if (NEED_MESSAGE(WARN_UTF8,,)) {
pack_warn = packWARN(WARN_UTF8);
message = Perl_form(aTHX_
"%s: %s (unexpected continuation byte 0x%02x,"
Expand All @@ -1797,9 +1821,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,

if (! (flags & UTF8_ALLOW_SHORT)) {
disallowed = TRUE;
if (( msgs
|| ckWARN_d(WARN_UTF8)) && ! (flags & UTF8_CHECK_ONLY))
{
if (NEED_MESSAGE(WARN_UTF8,,)) {
pack_warn = packWARN(WARN_UTF8);
message = Perl_form(aTHX_
"%s: %s (too short; %d byte%s available, need %d)",
Expand All @@ -1820,9 +1842,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,

if (! (flags & UTF8_ALLOW_NON_CONTINUATION)) {
disallowed = TRUE;
if (( msgs
|| ckWARN_d(WARN_UTF8)) && ! (flags & UTF8_CHECK_ONLY))
{
if (NEED_MESSAGE(WARN_UTF8,,)) {

/* If we don't know for sure that the input length is
* valid, avoid as much as possible reading past the
Expand All @@ -1847,9 +1867,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
if (flags & UTF8_WARN_SURROGATE) {
*errors |= UTF8_GOT_SURROGATE;

if ( ! (flags & UTF8_CHECK_ONLY)
&& (msgs || ckWARN_d(WARN_SURROGATE)))
{
if (NEED_MESSAGE(WARN_SURROGATE,,)) {
pack_warn = packWARN(WARN_SURROGATE);

/* These are the only errors that can occur with a
Expand Down Expand Up @@ -1879,9 +1897,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
if (flags & UTF8_WARN_SUPER) {
*errors |= UTF8_GOT_SUPER;

if ( ! (flags & UTF8_CHECK_ONLY)
&& (msgs || ckWARN_d(WARN_NON_UNICODE)))
{
if (NEED_MESSAGE(WARN_NON_UNICODE,,)) {
pack_warn = packWARN(WARN_NON_UNICODE);

if (orig_problems & UTF8_GOT_TOO_SHORT) {
Expand All @@ -1902,10 +1918,8 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
* and before possibly bailing out, so that the more dire
* warning will override the regular one. */
if (UNLIKELY(UTF8_IS_PERL_EXTENDED(s0))) {
if ( ! (flags & UTF8_CHECK_ONLY)
&& (flags & (UTF8_WARN_PERL_EXTENDED|UTF8_WARN_SUPER))
&& (msgs || ( ckWARN_d(WARN_NON_UNICODE)
|| ckWARN(WARN_PORTABLE))))
if ( (flags & (UTF8_WARN_PERL_EXTENDED|UTF8_WARN_SUPER))
&& NEED_MESSAGE(WARN_NON_UNICODE, ckWARN, WARN_PORTABLE))
{
pack_warn = packWARN2(WARN_NON_UNICODE, WARN_PORTABLE);

Expand Down Expand Up @@ -1955,9 +1969,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
if (flags & UTF8_WARN_NONCHAR) {
*errors |= UTF8_GOT_NONCHAR;

if ( ! (flags & UTF8_CHECK_ONLY)
&& (msgs || ckWARN_d(WARN_NONCHAR)))
{
if (NEED_MESSAGE(WARN_NONCHAR,,)) {
/* The code above should have guaranteed that we don't
* get here with errors other than overlong */
assert (! (orig_problems
Expand Down Expand Up @@ -1992,9 +2004,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
else {
disallowed = TRUE;

if (( msgs
|| ckWARN_d(WARN_UTF8)) && ! (flags & UTF8_CHECK_ONLY))
{
if (NEED_MESSAGE(WARN_UTF8,,)) {
pack_warn = packWARN(WARN_UTF8);

/* These error types cause 'uv' to be something that
Expand Down
Loading