From 77d68f7e33efdd4616f7a4b5fe6d301684dcd58c Mon Sep 17 00:00:00 2001 From: Dave Miller Date: Tue, 20 Aug 2024 00:50:28 -0400 Subject: [PATCH] Bug 1898882: Test what the DB calls utf8 --- Bugzilla/Config/Common.pm | 7 +++-- Bugzilla/Config/General.pm | 9 +++++- Bugzilla/DB/MariaDB.pm | 31 ++++++++++++++++--- Bugzilla/DB/Mysql.pm | 31 ++++++++++++++++--- .../en/default/admin/params/common.html.tmpl | 5 +++ .../en/default/admin/params/general.html.tmpl | 9 ++++-- 6 files changed, 76 insertions(+), 16 deletions(-) diff --git a/Bugzilla/Config/Common.pm b/Bugzilla/Config/Common.pm index 19bf9c068e..14d72115a5 100644 --- a/Bugzilla/Config/Common.pm +++ b/Bugzilla/Config/Common.pm @@ -88,12 +88,15 @@ sub check_email { sub check_utf8 { my ($utf8, $entry) = @_; - # You cannot turn off the UTF-8 parameter. + my $current_utf8 = Bugzilla->params->{'utf8'}; if (!$utf8) { return "You cannot disable UTF-8 support."; } - elsif ($entry eq 'utf8mb4' && $utf8 ne 'utf8mb4') { + elsif ($current_utf8 eq 'utf8mb3' && $utf8 ne 'utf8mb3' && $utf8 ne 'utf8mb4') { + return "You cannot downgrade from utf8mb3 support, only keep it or change to utf8mb4."; + } + elsif ($current_utf8 eq 'utf8mb4' && $utf8 ne 'utf8mb4') { return "You cannot disable UTF8-MB4 support."; } diff --git a/Bugzilla/Config/General.pm b/Bugzilla/Config/General.pm index 5d8ab09463..f6cd369c62 100644 --- a/Bugzilla/Config/General.pm +++ b/Bugzilla/Config/General.pm @@ -34,11 +34,18 @@ use constant get_param_list => ( { name => 'utf8', type => 's', - choices => ['1', 'utf8', 'utf8mb4'], + choices => ['1', 'utf8', 'utf8mb3', 'utf8mb4'], default => 'utf8', checker => \&check_utf8 }, + { + name => 'utf8_collate', + type => 'r', + no_reset => '1', + default => 'utf8mb4_unicode_520_ci', + }, + {name => 'announcehtml', type => 'l', default => ''}, { diff --git a/Bugzilla/DB/MariaDB.pm b/Bugzilla/DB/MariaDB.pm index 3f99afd001..3edc13474b 100644 --- a/Bugzilla/DB/MariaDB.pm +++ b/Bugzilla/DB/MariaDB.pm @@ -28,6 +28,7 @@ extends qw(Bugzilla::DB); use Bugzilla::Constants; use Bugzilla::Install::Util qw(install_string); +use Bugzilla::Config; use Bugzilla::Util; use Bugzilla::Error; use Bugzilla::DB::Schema::MariaDB; @@ -312,6 +313,24 @@ sub bz_check_server_version { sub bz_setup_database { my ($self) = @_; + # Before touching anything else, find out whether this database server does + # any aliasing of the character set we plan to use so we can check for + # already converted tables properly. We do this by creating a table as our + # intended charset and then test how it reads back. + my $db_name = Bugzilla->localconfig->{db_name}; + my $charset = $self->utf8_charset; + my $collate = $self->utf8_collate; + $self->do("CREATE TABLE `utf8_test` (id tinyint) CHARACTER SET ? COLLATE ?", undef, $charset, $collate); + my ($found_collate) = $self->selectrow_array("SELECT TABLE_COLLATION FROM information_schema.TABLES WHERE TABLE_SCHEMA=? AND TABLE_NAME='utf8_test'", undef, $db_name); + $self->do("DROP TABLE `utf8_test`"); + my ($found_charset) = ($found_collate =~ m/^([a-z0-9]+)_/); + Bugzilla->params->{'utf8'} = $found_charset; + Bugzilla->params->{'utf8_collate'} = $found_collate; + Bugzilla::Config::write_params(); + # reload these because they get used later. + $charset = $self->utf8_charset; + $collate = $self->utf8_collate; + # The "comments" field of the bugs_fulltext table could easily exceed # MySQL's default max_allowed_packet. Also, MySQL should never have # a max_allowed_packet smaller than our max_attachment_size. So, we @@ -404,7 +423,6 @@ sub bz_setup_database { } # Upgrade tables from MyISAM to InnoDB - my $db_name = Bugzilla->localconfig->db_name; my $myisam_tables = $self->selectcol_arrayref( 'SELECT TABLE_NAME FROM information_schema.TABLES WHERE TABLE_SCHEMA = ? AND ENGINE = ?', undef, $db_name, 'MyISAM' @@ -629,8 +647,6 @@ sub bz_setup_database { # the table charsets. # # TABLE_COLLATION IS NOT NULL prevents us from trying to convert views. - my $charset = $self->utf8_charset; - my $collate = $self->utf8_collate; my $non_utf8_tables = $self->selectrow_array( "SELECT 1 FROM information_schema.TABLES WHERE TABLE_SCHEMA = ? AND TABLE_COLLATION IS NOT NULL @@ -836,11 +852,16 @@ sub _fix_defaults { } sub utf8_charset { - return 'utf8mb4'; + return 'utf8mb4' unless Bugzilla->params->{'utf8'}; + return 'utf8mb4' if Bugzilla->params->{'utf8'} eq '1'; + return Bugzilla->params->{'utf8'}; } sub utf8_collate { - return 'utf8mb4_unicode_520_ci'; + my $charset = utf8_charset(); + return $charset . '_unicode_520_ci' unless Bugzilla->params->{'utf8_collate'}; + return $charset . '_unicode_520_ci' unless (Bugzilla->params->{'utf8_collate'} =~ /^${charset}_/); + return Bugzilla->params->{'utf8_collate'}; } sub default_row_format { diff --git a/Bugzilla/DB/Mysql.pm b/Bugzilla/DB/Mysql.pm index 1f3a03fd92..c287a0eb95 100644 --- a/Bugzilla/DB/Mysql.pm +++ b/Bugzilla/DB/Mysql.pm @@ -28,6 +28,7 @@ extends qw(Bugzilla::DB); use Bugzilla::Constants; use Bugzilla::Install::Util qw(install_string); +use Bugzilla::Config; use Bugzilla::Util; use Bugzilla::Error; use Bugzilla::DB::Schema::Mysql; @@ -313,6 +314,24 @@ sub bz_check_server_version { sub bz_setup_database { my ($self) = @_; + # Before touching anything else, find out whether this database server does + # any aliasing of the character set we plan to use so we can check for + # already converted tables properly. We do this by creating a table as our + # intended charset and then test how it reads back. + my $db_name = Bugzilla->localconfig->{db_name}; + my $charset = $self->utf8_charset; + my $collate = $self->utf8_collate; + $self->do("CREATE TABLE `utf8_test` (id tinyint) CHARACTER SET ? COLLATE ?", undef, $charset, $collate); + my ($found_collate) = $self->selectrow_array("SELECT TABLE_COLLATION FROM information_schema.TABLES WHERE TABLE_SCHEMA=? AND TABLE_NAME='utf8_test'", undef, $db_name); + $self->do("DROP TABLE `utf8_test`"); + my ($found_charset) = ($found_collate =~ m/^([a-z0-9]+)_/); + Bugzilla->params->{'utf8'} = $found_charset; + Bugzilla->params->{'utf8_collate'} = $found_collate; + Bugzilla::Config::write_params(); + # reload these because they get used later. + $charset = $self->utf8_charset; + $collate = $self->utf8_collate; + # The "comments" field of the bugs_fulltext table could easily exceed # MySQL's default max_allowed_packet. Also, MySQL should never have # a max_allowed_packet smaller than our max_attachment_size. So, we @@ -405,7 +424,6 @@ sub bz_setup_database { } # Upgrade tables from MyISAM to InnoDB - my $db_name = Bugzilla->localconfig->db_name; my $myisam_tables = $self->selectcol_arrayref( 'SELECT TABLE_NAME FROM information_schema.TABLES WHERE TABLE_SCHEMA = ? AND ENGINE = ?', undef, $db_name, 'MyISAM' @@ -630,8 +648,6 @@ sub bz_setup_database { # the table charsets. # # TABLE_COLLATION IS NOT NULL prevents us from trying to convert views. - my $charset = $self->utf8_charset; - my $collate = $self->utf8_collate; my $non_utf8_tables = $self->selectrow_array( "SELECT 1 FROM information_schema.TABLES WHERE TABLE_SCHEMA = ? AND TABLE_COLLATION IS NOT NULL @@ -837,11 +853,16 @@ sub _fix_defaults { } sub utf8_charset { - return 'utf8mb4'; + return 'utf8mb4' unless Bugzilla->params->{'utf8'}; + return 'utf8mb4' if Bugzilla->params->{'utf8'} eq '1'; + return Bugzilla->params->{'utf8'}; } sub utf8_collate { - return 'utf8mb4_unicode_520_ci'; + my $charset = utf8_charset(); + return $charset . '_unicode_520_ci' unless Bugzilla->params->{'utf8_collate'}; + return $charset . '_unicode_520_ci' unless (Bugzilla->params->{'utf8_collate'} =~ /^${charset}_/); + return Bugzilla->params->{'utf8_collate'}; } sub default_row_format { diff --git a/template/en/default/admin/params/common.html.tmpl b/template/en/default/admin/params/common.html.tmpl index 011bcda166..0a0bb540b1 100644 --- a/template/en/default/admin/params/common.html.tmpl +++ b/template/en/default/admin/params/common.html.tmpl @@ -44,6 +44,11 @@ [% IF param.type == "t" %] + [% ELSIF param.type == "r" %] + +
+ This value is read-only and you can't change it. [% ELSIF param.type == "p" %] contrib/recode.pl" _ " script." - _ "

Note that if you turn this parameter from "off" to" - _ " "on", you must re-run checksetup.pl immediately" - _ " afterward.

", + _ "

Note that if you change this parameter you must re-run" + _ " checksetup.pl immediately afterward.

", + + utf8_collate => + "The collation to use in database tables. This parameter is" + _ " automatically set by checksetup.pl.", announcehtml => "If this field is non-empty, then $terms.Bugzilla will display whatever is"