Skip to content

Commit

Permalink
Bug 1898882: Test what the DB calls utf8
Browse files Browse the repository at this point in the history
  • Loading branch information
justdave committed Aug 20, 2024
1 parent 39cddb6 commit 77d68f7
Show file tree
Hide file tree
Showing 6 changed files with 76 additions and 16 deletions.
7 changes: 5 additions & 2 deletions Bugzilla/Config/Common.pm
Original file line number Diff line number Diff line change
Expand Up @@ -88,12 +88,15 @@ sub check_email {

sub check_utf8 {
my ($utf8, $entry) = @_;

# You cannot turn off the UTF-8 parameter.
my $current_utf8 = Bugzilla->params->{'utf8'};
if (!$utf8) {
return "You cannot disable UTF-8 support.";
}
elsif ($entry eq 'utf8mb4' && $utf8 ne 'utf8mb4') {
elsif ($current_utf8 eq 'utf8mb3' && $utf8 ne 'utf8mb3' && $utf8 ne 'utf8mb4') {
return "You cannot downgrade from utf8mb3 support, only keep it or change to utf8mb4.";
}
elsif ($current_utf8 eq 'utf8mb4' && $utf8 ne 'utf8mb4') {
return "You cannot disable UTF8-MB4 support.";
}

Expand Down
9 changes: 8 additions & 1 deletion Bugzilla/Config/General.pm
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,18 @@ use constant get_param_list => (
{
name => 'utf8',
type => 's',
choices => ['1', 'utf8', 'utf8mb4'],
choices => ['1', 'utf8', 'utf8mb3', 'utf8mb4'],
default => 'utf8',
checker => \&check_utf8
},

{
name => 'utf8_collate',
type => 'r',
no_reset => '1',
default => 'utf8mb4_unicode_520_ci',
},

{name => 'announcehtml', type => 'l', default => ''},

{
Expand Down
31 changes: 26 additions & 5 deletions Bugzilla/DB/MariaDB.pm
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ extends qw(Bugzilla::DB);

use Bugzilla::Constants;
use Bugzilla::Install::Util qw(install_string);
use Bugzilla::Config;
use Bugzilla::Util;
use Bugzilla::Error;
use Bugzilla::DB::Schema::MariaDB;
Expand Down Expand Up @@ -312,6 +313,24 @@ sub bz_check_server_version {
sub bz_setup_database {
my ($self) = @_;

# Before touching anything else, find out whether this database server does
# any aliasing of the character set we plan to use so we can check for
# already converted tables properly. We do this by creating a table as our
# intended charset and then test how it reads back.
my $db_name = Bugzilla->localconfig->{db_name};
my $charset = $self->utf8_charset;
my $collate = $self->utf8_collate;
$self->do("CREATE TABLE `utf8_test` (id tinyint) CHARACTER SET ? COLLATE ?", undef, $charset, $collate);
my ($found_collate) = $self->selectrow_array("SELECT TABLE_COLLATION FROM information_schema.TABLES WHERE TABLE_SCHEMA=? AND TABLE_NAME='utf8_test'", undef, $db_name);
$self->do("DROP TABLE `utf8_test`");
my ($found_charset) = ($found_collate =~ m/^([a-z0-9]+)_/);
Bugzilla->params->{'utf8'} = $found_charset;
Bugzilla->params->{'utf8_collate'} = $found_collate;
Bugzilla::Config::write_params();
# reload these because they get used later.
$charset = $self->utf8_charset;
$collate = $self->utf8_collate;

# The "comments" field of the bugs_fulltext table could easily exceed
# MySQL's default max_allowed_packet. Also, MySQL should never have
# a max_allowed_packet smaller than our max_attachment_size. So, we
Expand Down Expand Up @@ -404,7 +423,6 @@ sub bz_setup_database {
}

# Upgrade tables from MyISAM to InnoDB
my $db_name = Bugzilla->localconfig->db_name;
my $myisam_tables = $self->selectcol_arrayref(
'SELECT TABLE_NAME FROM information_schema.TABLES
WHERE TABLE_SCHEMA = ? AND ENGINE = ?', undef, $db_name, 'MyISAM'
Expand Down Expand Up @@ -629,8 +647,6 @@ sub bz_setup_database {
# the table charsets.
#
# TABLE_COLLATION IS NOT NULL prevents us from trying to convert views.
my $charset = $self->utf8_charset;
my $collate = $self->utf8_collate;
my $non_utf8_tables = $self->selectrow_array(
"SELECT 1 FROM information_schema.TABLES
WHERE TABLE_SCHEMA = ? AND TABLE_COLLATION IS NOT NULL
Expand Down Expand Up @@ -836,11 +852,16 @@ sub _fix_defaults {
}

sub utf8_charset {
return 'utf8mb4';
return 'utf8mb4' unless Bugzilla->params->{'utf8'};
return 'utf8mb4' if Bugzilla->params->{'utf8'} eq '1';
return Bugzilla->params->{'utf8'};
}

sub utf8_collate {
return 'utf8mb4_unicode_520_ci';
my $charset = utf8_charset();
return $charset . '_unicode_520_ci' unless Bugzilla->params->{'utf8_collate'};
return $charset . '_unicode_520_ci' unless (Bugzilla->params->{'utf8_collate'} =~ /^${charset}_/);
return Bugzilla->params->{'utf8_collate'};
}

sub default_row_format {
Expand Down
31 changes: 26 additions & 5 deletions Bugzilla/DB/Mysql.pm
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ extends qw(Bugzilla::DB);

use Bugzilla::Constants;
use Bugzilla::Install::Util qw(install_string);
use Bugzilla::Config;
use Bugzilla::Util;
use Bugzilla::Error;
use Bugzilla::DB::Schema::Mysql;
Expand Down Expand Up @@ -313,6 +314,24 @@ sub bz_check_server_version {
sub bz_setup_database {
my ($self) = @_;

# Before touching anything else, find out whether this database server does
# any aliasing of the character set we plan to use so we can check for
# already converted tables properly. We do this by creating a table as our
# intended charset and then test how it reads back.
my $db_name = Bugzilla->localconfig->{db_name};
my $charset = $self->utf8_charset;
my $collate = $self->utf8_collate;
$self->do("CREATE TABLE `utf8_test` (id tinyint) CHARACTER SET ? COLLATE ?", undef, $charset, $collate);
my ($found_collate) = $self->selectrow_array("SELECT TABLE_COLLATION FROM information_schema.TABLES WHERE TABLE_SCHEMA=? AND TABLE_NAME='utf8_test'", undef, $db_name);
$self->do("DROP TABLE `utf8_test`");
my ($found_charset) = ($found_collate =~ m/^([a-z0-9]+)_/);
Bugzilla->params->{'utf8'} = $found_charset;
Bugzilla->params->{'utf8_collate'} = $found_collate;
Bugzilla::Config::write_params();
# reload these because they get used later.
$charset = $self->utf8_charset;
$collate = $self->utf8_collate;

# The "comments" field of the bugs_fulltext table could easily exceed
# MySQL's default max_allowed_packet. Also, MySQL should never have
# a max_allowed_packet smaller than our max_attachment_size. So, we
Expand Down Expand Up @@ -405,7 +424,6 @@ sub bz_setup_database {
}

# Upgrade tables from MyISAM to InnoDB
my $db_name = Bugzilla->localconfig->db_name;
my $myisam_tables = $self->selectcol_arrayref(
'SELECT TABLE_NAME FROM information_schema.TABLES
WHERE TABLE_SCHEMA = ? AND ENGINE = ?', undef, $db_name, 'MyISAM'
Expand Down Expand Up @@ -630,8 +648,6 @@ sub bz_setup_database {
# the table charsets.
#
# TABLE_COLLATION IS NOT NULL prevents us from trying to convert views.
my $charset = $self->utf8_charset;
my $collate = $self->utf8_collate;
my $non_utf8_tables = $self->selectrow_array(
"SELECT 1 FROM information_schema.TABLES
WHERE TABLE_SCHEMA = ? AND TABLE_COLLATION IS NOT NULL
Expand Down Expand Up @@ -837,11 +853,16 @@ sub _fix_defaults {
}

sub utf8_charset {
return 'utf8mb4';
return 'utf8mb4' unless Bugzilla->params->{'utf8'};
return 'utf8mb4' if Bugzilla->params->{'utf8'} eq '1';
return Bugzilla->params->{'utf8'};
}

sub utf8_collate {
return 'utf8mb4_unicode_520_ci';
my $charset = utf8_charset();
return $charset . '_unicode_520_ci' unless Bugzilla->params->{'utf8_collate'};
return $charset . '_unicode_520_ci' unless (Bugzilla->params->{'utf8_collate'} =~ /^${charset}_/);
return Bugzilla->params->{'utf8_collate'};
}

sub default_row_format {
Expand Down
5 changes: 5 additions & 0 deletions template/en/default/admin/params/common.html.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@
[% IF param.type == "t" %]
<input type="text" size="80" name="[% param.name FILTER html %]"
id="[% param.name FILTER html %]" value="[% Param(param.name) FILTER html %]">
[% ELSIF param.type == "r" %]
<input type="text" size="80" name="[% param.name FILTER html %]_readonly"
id="[% param.name FILTER html %]_readonly" value="[% Param(param.name) FILTER html %]" disabled>
<input type="hidden" name="[% param.name FILTER html %]" value="[% Param(param.name) FILTER html %]"><br>
This value is read-only and you can't change it.
[% ELSIF param.type == "p" %]
<input type="password" size="80" name="[% param.name FILTER html %]"
id="[% param.name FILTER html %]" value="[% Param(param.name) FILTER html %]"
Expand Down
9 changes: 6 additions & 3 deletions template/en/default/admin/params/general.html.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,12 @@
_ " only after the data has been converted from existing legacy"
_ " character encodings to UTF-8, using the <kbd>contrib/recode.pl</kbd>"
_ " script</strong>."
_ " <p>Note that if you turn this parameter from &quot;off&quot; to"
_ " &quot;on&quot;, you must re-run <kbd>checksetup.pl</kbd> immediately"
_ " afterward.</p>",
_ " <p>Note that if you change this parameter you must re-run"
_ " <kbd>checksetup.pl</kbd> immediately afterward.</p>",

utf8_collate =>
"The collation to use in database tables. This parameter is"
_ " automatically set by checksetup.pl.",

announcehtml =>
"If this field is non-empty, then $terms.Bugzilla will display whatever is"
Expand Down

0 comments on commit 77d68f7

Please sign in to comment.