From a9c9e8174aac0b740e5be37b9b3ff26414504c06 Mon Sep 17 00:00:00 2001 From: Omar Ramos Date: Mon, 4 Nov 2024 10:01:05 -0800 Subject: [PATCH] Improve view support for pscale database dump / restore-dump (#932) * Added better support for views within dumper.go Currently, `dumper.go` is able to export view definitions without issue, but it unfortunately also runs the view and includes, or attempts to include, the data associated with the view too which is unnecessary. The changes here add support for collecting the list of views so that processing can be slightly adjusted if a "table" is actually a view. In that situation, the same `-schema-view.sql` file suffix is utilized that the MyDumper project uses for views and the step that unnecessarily runs the view query is skipped to avoid that issue. * Added better support for views within loader.go The changes made to `loader.go` add support for collecting any files ending in the view suffix, `-schema-view.sql`, and processing them slightly differently than in `restoreTableSchema()`. The main change within the new `restoreViews()` method, aside from some variable name adjustments and the use of the `viewSuffix`, is the change to using `DROP VIEW` during the overwrite step to properly allow for deleting the existing views. Additionally, the creation of the views is set to occur after the creation of the tables to help avoid that dependency issue. * Adding test fixes for dumper_test.go Some additional cases for the new `information_schema` query needed to be accounted for in the tests. --- internal/dumper/dumper.go | 40 ++++++- internal/dumper/dumper_test.go | 187 +++++++++++++++++++++++++++++++++ internal/dumper/loader.go | 71 +++++++++++++ 3 files changed, 296 insertions(+), 2 deletions(-) diff --git a/internal/dumper/dumper.go b/internal/dumper/dumper.go index ad04c378..7742257c 100644 --- a/internal/dumper/dumper.go +++ b/internal/dumper/dumper.go @@ -107,11 +107,19 @@ func (d *Dumper) Run(ctx context.Context) error { } tables := make([][]string, len(databases)) + views := make([]map[string]bool, len(databases)) for i, database := range databases { if d.cfg.Table != "" { tables[i] = strings.Split(d.cfg.Table, ",") } else { tables[i], err = d.allTables(conn, database) + + if err != nil { + return err + } + + views[i], err = d.allViews(conn, database) + if err != nil { return err } @@ -135,13 +143,18 @@ func (d *Dumper) Run(ctx context.Context) error { } conn := initPool.Get() - err := d.dumpTableSchema(conn, database, table) + err := d.dumpTableSchema(conn, database, table, views[i]) if err != nil { return err } initPool.Put(conn) + if _, ok := views[i][table]; ok { + // If we just processed a view we don't want to dump it so the next part is skipped: + continue + } + conn = pool.Get() wg.Add(1) go func(conn *Connection, database string, table string) { @@ -200,7 +213,7 @@ func writeMetaData(outdir string) error { return writeFile(file, "") } -func (d *Dumper) dumpTableSchema(conn *Connection, database string, table string) error { +func (d *Dumper) dumpTableSchema(conn *Connection, database string, table string, views map[string]bool) error { qr, err := conn.Fetch(fmt.Sprintf("SHOW CREATE TABLE `%s`.`%s`", database, table)) if err != nil { return err @@ -209,6 +222,11 @@ func (d *Dumper) dumpTableSchema(conn *Connection, database string, table string schema := qr.Rows[0][1].String() + ";\n" file := fmt.Sprintf("%s/%s.%s-schema.sql", d.cfg.Outdir, database, table) + if _, ok := views[table]; ok { + // https://github.com/mydumper/mydumper/blob/e55612616d17281a45eed0a60a9b054cdd1fe064/src/myloader_common.c#L374 + file = fmt.Sprintf("%s/%s.%s-schema-view.sql", d.cfg.Outdir, database, table) + } + err = writeFile(file, schema) if err != nil { return err @@ -379,6 +397,24 @@ func (d *Dumper) allTables(conn *Connection, database string) ([]string, error) return tables, nil } +func (d *Dumper) allViews(conn *Connection, database string) (map[string]bool, error) { + query := `SELECT TABLE_NAME + FROM information_schema.TABLES + WHERE TABLE_SCHEMA LIKE '%s' + AND TABLE_TYPE = 'VIEW' + ` + qr, err := conn.Fetch(fmt.Sprintf(query, database)) + if err != nil { + return nil, err + } + + views := make(map[string]bool) + for _, t := range qr.Rows { + views[t[0].String()] = true + } + return views, nil +} + func (d *Dumper) allDatabases(conn *Connection) ([]string, error) { qr, err := conn.Fetch("SHOW DATABASES") if err != nil { diff --git a/internal/dumper/dumper_test.go b/internal/dumper/dumper_test.go index c90558a4..268d4242 100644 --- a/internal/dumper/dumper_test.go +++ b/internal/dumper/dumper_test.go @@ -115,6 +115,23 @@ func TestDumper(t *testing.T) { }, } + viewsResult := &sqltypes.Result{ + Fields: []*querypb.Field{ + { + Name: "Views_in_test", + Type: querypb.Type_VARCHAR, + }, + }, + Rows: [][]sqltypes.Value{ + { + sqltypes.MakeTrusted(querypb.Type_VARCHAR, []byte("v1-2024-10-25")), + }, + { + sqltypes.MakeTrusted(querypb.Type_VARCHAR, []byte("v2-2024-10-25")), + }, + }, + } + fieldsResult := &sqltypes.Result{ Fields: []*querypb.Field{ {Name: "Field", Type: querypb.Type_VARCHAR}, @@ -140,6 +157,7 @@ func TestDumper(t *testing.T) { fakedbs.AddQueryPattern("use .*", &sqltypes.Result{}) fakedbs.AddQueryPattern("show create table .*", schemaResult) fakedbs.AddQueryPattern("show tables from .*", tablesResult) + fakedbs.AddQueryPattern("select table_name \n\t\t\t from information_schema.tables \n\t\t\t where table_schema like 'test' \n\t\t\t and table_type = 'view'\n\t\t\t", viewsResult) fakedbs.AddQueryPattern("show fields from .*", fieldsResult) fakedbs.AddQueryPattern("select .* from `test`\\..* .*", selectResult) fakedbs.AddQueryPattern("set .*", &sqltypes.Result{}) @@ -261,6 +279,23 @@ func TestDumperUseUseReplica(t *testing.T) { }, } + viewsResult := &sqltypes.Result{ + Fields: []*querypb.Field{ + { + Name: "Views_in_test", + Type: querypb.Type_VARCHAR, + }, + }, + Rows: [][]sqltypes.Value{ + { + sqltypes.MakeTrusted(querypb.Type_VARCHAR, []byte("v1-2024-10-25")), + }, + { + sqltypes.MakeTrusted(querypb.Type_VARCHAR, []byte("v2-2024-10-25")), + }, + }, + } + fieldsResult := &sqltypes.Result{ Fields: []*querypb.Field{ {Name: "Field", Type: querypb.Type_VARCHAR}, @@ -286,6 +321,7 @@ func TestDumperUseUseReplica(t *testing.T) { fakedbs.AddQueryPattern("use .*", &sqltypes.Result{}) fakedbs.AddQueryPattern("show create table .*", schemaResult) fakedbs.AddQueryPattern("show tables from .*", tablesResult) + fakedbs.AddQueryPattern("select table_name \n\t\t\t from information_schema.tables \n\t\t\t where table_schema like 'test' \n\t\t\t and table_type = 'view'\n\t\t\t", viewsResult) fakedbs.AddQueryPattern("show fields from .*", fieldsResult) fakedbs.AddQueryPattern("select .* FROM `test@replica`\\..* .*", selectResult) fakedbs.AddQueryPattern("set .*", &sqltypes.Result{}) @@ -408,6 +444,23 @@ func TestDumperGeneratedFields(t *testing.T) { }, } + viewsResult := &sqltypes.Result{ + Fields: []*querypb.Field{ + { + Name: "Views_in_test", + Type: querypb.Type_VARCHAR, + }, + }, + Rows: [][]sqltypes.Value{ + { + sqltypes.MakeTrusted(querypb.Type_VARCHAR, []byte("v1-2024-10-25")), + }, + { + sqltypes.MakeTrusted(querypb.Type_VARCHAR, []byte("v2-2024-10-25")), + }, + }, + } + fieldsResult := &sqltypes.Result{ Fields: []*querypb.Field{ {Name: "Field", Type: querypb.Type_VARCHAR}, @@ -433,6 +486,7 @@ func TestDumperGeneratedFields(t *testing.T) { fakedbs.AddQueryPattern("use .*", &sqltypes.Result{}) fakedbs.AddQueryPattern("show create table .*", schemaResult) fakedbs.AddQueryPattern("show tables from .*", tablesResult) + fakedbs.AddQueryPattern("select table_name \n\t\t\t from information_schema.tables \n\t\t\t where table_schema like 'test' \n\t\t\t and table_type = 'view'\n\t\t\t", viewsResult) fakedbs.AddQueryPattern("show fields from .*", fieldsResult) fakedbs.AddQueryPattern("select .* from `test`\\..* .*", selectResult) fakedbs.AddQueryPattern("set .*", &sqltypes.Result{}) @@ -571,6 +625,23 @@ func TestDumperAll(t *testing.T) { }, } + viewsResult := &sqltypes.Result{ + Fields: []*querypb.Field{ + { + Name: "Views_in_test", + Type: querypb.Type_VARCHAR, + }, + }, + Rows: [][]sqltypes.Value{ + { + sqltypes.MakeTrusted(querypb.Type_VARCHAR, []byte("v1-2024-10-25")), + }, + { + sqltypes.MakeTrusted(querypb.Type_VARCHAR, []byte("v2-2024-10-25")), + }, + }, + } + databasesResult := &sqltypes.Result{ Fields: []*querypb.Field{ { @@ -614,6 +685,8 @@ func TestDumperAll(t *testing.T) { fakedbs.AddQueryPattern("use .*", &sqltypes.Result{}) fakedbs.AddQueryPattern("show create table .*", schemaResult) fakedbs.AddQueryPattern("show tables from .*", tablesResult) + fakedbs.AddQueryPattern("select table_name \n\t\t\t from information_schema.tables \n\t\t\t where table_schema like 'test1' \n\t\t\t and table_type = 'view'\n\t\t\t", viewsResult) + fakedbs.AddQueryPattern("select table_name \n\t\t\t from information_schema.tables \n\t\t\t where table_schema like 'test2' \n\t\t\t and table_type = 'view'\n\t\t\t", viewsResult) fakedbs.AddQueryPattern("show fields from .*", fieldsResult) fakedbs.AddQueryPattern("select .* from `test1`\\..* .*", selectResult1) fakedbs.AddQueryPattern("select .* from `test2`\\..* .*", selectResult2) @@ -758,6 +831,23 @@ func TestDumperAllUseReplica(t *testing.T) { }, } + viewsResult := &sqltypes.Result{ + Fields: []*querypb.Field{ + { + Name: "Views_in_test", + Type: querypb.Type_VARCHAR, + }, + }, + Rows: [][]sqltypes.Value{ + { + sqltypes.MakeTrusted(querypb.Type_VARCHAR, []byte("v1-2024-10-25")), + }, + { + sqltypes.MakeTrusted(querypb.Type_VARCHAR, []byte("v2-2024-10-25")), + }, + }, + } + databasesResult := &sqltypes.Result{ Fields: []*querypb.Field{ { @@ -801,6 +891,8 @@ func TestDumperAllUseReplica(t *testing.T) { fakedbs.AddQueryPattern("use .*", &sqltypes.Result{}) fakedbs.AddQueryPattern("show create table .*", schemaResult) fakedbs.AddQueryPattern("show tables from .*", tablesResult) + fakedbs.AddQueryPattern("select table_name \n\t\t\t from information_schema.tables \n\t\t\t where table_schema like 'test1' \n\t\t\t and table_type = 'view'\n\t\t\t", viewsResult) + fakedbs.AddQueryPattern("select table_name \n\t\t\t from information_schema.tables \n\t\t\t where table_schema like 'test2' \n\t\t\t and table_type = 'view'\n\t\t\t", viewsResult) fakedbs.AddQueryPattern("show fields from .*", fieldsResult) fakedbs.AddQueryPattern("select .* from `test1@replica`\\..* .*", selectResult1) fakedbs.AddQueryPattern("select .* from `test2@replica`\\..* .*", selectResult2) @@ -946,6 +1038,23 @@ func TestDumperMultiple(t *testing.T) { }, } + viewsResult := &sqltypes.Result{ + Fields: []*querypb.Field{ + { + Name: "Views_in_test", + Type: querypb.Type_VARCHAR, + }, + }, + Rows: [][]sqltypes.Value{ + { + sqltypes.MakeTrusted(querypb.Type_VARCHAR, []byte("v1-2024-10-25")), + }, + { + sqltypes.MakeTrusted(querypb.Type_VARCHAR, []byte("v2-2024-10-25")), + }, + }, + } + databasesResult := &sqltypes.Result{ Fields: []*querypb.Field{ { @@ -989,6 +1098,8 @@ func TestDumperMultiple(t *testing.T) { fakedbs.AddQueryPattern("use .*", &sqltypes.Result{}) fakedbs.AddQueryPattern("show create table .*", schemaResult) fakedbs.AddQueryPattern("show tables from .*", tablesResult) + fakedbs.AddQueryPattern("select table_name \n\t\t\t from information_schema.tables \n\t\t\t where table_schema like 'test1' \n\t\t\t and table_type = 'view'\n\t\t\t", viewsResult) + fakedbs.AddQueryPattern("select table_name \n\t\t\t from information_schema.tables \n\t\t\t where table_schema like 'test2' \n\t\t\t and table_type = 'view'\n\t\t\t", viewsResult) fakedbs.AddQueryPattern("show fields from .*", fieldsResult) fakedbs.AddQueryPattern("select .* from `test1`\\..* .*", selectResult1) fakedbs.AddQueryPattern("select .* from `test2`\\..* .*", selectResult2) @@ -1134,6 +1245,23 @@ func TestDumperMultipleUseReplica(t *testing.T) { }, } + viewsResult := &sqltypes.Result{ + Fields: []*querypb.Field{ + { + Name: "Views_in_test", + Type: querypb.Type_VARCHAR, + }, + }, + Rows: [][]sqltypes.Value{ + { + sqltypes.MakeTrusted(querypb.Type_VARCHAR, []byte("v1-2024-10-25")), + }, + { + sqltypes.MakeTrusted(querypb.Type_VARCHAR, []byte("v2-2024-10-25")), + }, + }, + } + databasesResult := &sqltypes.Result{ Fields: []*querypb.Field{ { @@ -1177,6 +1305,8 @@ func TestDumperMultipleUseReplica(t *testing.T) { fakedbs.AddQueryPattern("use .*", &sqltypes.Result{}) fakedbs.AddQueryPattern("show create table .*", schemaResult) fakedbs.AddQueryPattern("show tables from .*", tablesResult) + fakedbs.AddQueryPattern("select table_name \n\t\t\t from information_schema.tables \n\t\t\t where table_schema like 'test1' \n\t\t\t and table_type = 'view'\n\t\t\t", viewsResult) + fakedbs.AddQueryPattern("select table_name \n\t\t\t from information_schema.tables \n\t\t\t where table_schema like 'test2' \n\t\t\t and table_type = 'view'\n\t\t\t", viewsResult) fakedbs.AddQueryPattern("show fields from .*", fieldsResult) fakedbs.AddQueryPattern("select .* from `test1@replica`\\..* .*", selectResult1) fakedbs.AddQueryPattern("select .* from `test2@replica`\\..* .*", selectResult2) @@ -1323,6 +1453,23 @@ func TestDumperSimpleRegexp(t *testing.T) { }, } + viewsResult := &sqltypes.Result{ + Fields: []*querypb.Field{ + { + Name: "Views_in_test", + Type: querypb.Type_VARCHAR, + }, + }, + Rows: [][]sqltypes.Value{ + { + sqltypes.MakeTrusted(querypb.Type_VARCHAR, []byte("v1-2024-10-25")), + }, + { + sqltypes.MakeTrusted(querypb.Type_VARCHAR, []byte("v2-2024-10-25")), + }, + }, + } + databasesResult := &sqltypes.Result{ Fields: []*querypb.Field{ { @@ -1375,6 +1522,8 @@ func TestDumperSimpleRegexp(t *testing.T) { fakedbs.AddQueryPattern("use .*", &sqltypes.Result{}) fakedbs.AddQueryPattern("show create table .*", schemaResult) fakedbs.AddQueryPattern("show tables from .*", tablesResult) + fakedbs.AddQueryPattern("select table_name \n\t\t\t from information_schema.tables \n\t\t\t where table_schema like 'test1' \n\t\t\t and table_type = 'view'\n\t\t\t", viewsResult) + fakedbs.AddQueryPattern("select table_name \n\t\t\t from information_schema.tables \n\t\t\t where table_schema like 'test2' \n\t\t\t and table_type = 'view'\n\t\t\t", viewsResult) fakedbs.AddQueryPattern("show fields from .*", fieldsResult) fakedbs.AddQueryPattern("select .* from `test1`\\..* .*", selectResult1) fakedbs.AddQueryPattern("select .* from `test2`\\..* .*", selectResult2) @@ -1520,6 +1669,23 @@ func TestDumperComplexRegexp(t *testing.T) { }, } + viewsResult := &sqltypes.Result{ + Fields: []*querypb.Field{ + { + Name: "Views_in_test", + Type: querypb.Type_VARCHAR, + }, + }, + Rows: [][]sqltypes.Value{ + { + sqltypes.MakeTrusted(querypb.Type_VARCHAR, []byte("v1-2024-10-25")), + }, + { + sqltypes.MakeTrusted(querypb.Type_VARCHAR, []byte("v2-2024-10-25")), + }, + }, + } + databasesResult := &sqltypes.Result{ Fields: []*querypb.Field{ { @@ -1572,6 +1738,8 @@ func TestDumperComplexRegexp(t *testing.T) { fakedbs.AddQueryPattern("use .*", &sqltypes.Result{}) fakedbs.AddQueryPattern("show create table .*", schemaResult) fakedbs.AddQueryPattern("show tables from .*", tablesResult) + fakedbs.AddQueryPattern("select table_name \n\t\t\t from information_schema.tables \n\t\t\t where table_schema like 'test1' \n\t\t\t and table_type = 'view'\n\t\t\t", viewsResult) + fakedbs.AddQueryPattern("select table_name \n\t\t\t from information_schema.tables \n\t\t\t where table_schema like 'test2' \n\t\t\t and table_type = 'view'\n\t\t\t", viewsResult) fakedbs.AddQueryPattern("show fields from .*", fieldsResult) fakedbs.AddQueryPattern("select .* from `test1`\\..* .*", selectResult1) fakedbs.AddQueryPattern("select .* from `test2`\\..* .*", selectResult2) @@ -1717,6 +1885,23 @@ func TestDumperInvertMatch(t *testing.T) { }, } + viewsResult := &sqltypes.Result{ + Fields: []*querypb.Field{ + { + Name: "Views_in_test", + Type: querypb.Type_VARCHAR, + }, + }, + Rows: [][]sqltypes.Value{ + { + sqltypes.MakeTrusted(querypb.Type_VARCHAR, []byte("v1-2024-10-25")), + }, + { + sqltypes.MakeTrusted(querypb.Type_VARCHAR, []byte("v2-2024-10-25")), + }, + }, + } + databasesResult := &sqltypes.Result{ Fields: []*querypb.Field{ { @@ -1773,6 +1958,8 @@ func TestDumperInvertMatch(t *testing.T) { fakedbs.AddQueryPattern("use .*", &sqltypes.Result{}) fakedbs.AddQueryPattern("show create table .*", schemaResult) fakedbs.AddQueryPattern("show tables from .*", tablesResult) + fakedbs.AddQueryPattern("select table_name \n\t\t\t from information_schema.tables \n\t\t\t where table_schema like 'test1' \n\t\t\t and table_type = 'view'\n\t\t\t", viewsResult) + fakedbs.AddQueryPattern("select table_name \n\t\t\t from information_schema.tables \n\t\t\t where table_schema like 'test2' \n\t\t\t and table_type = 'view'\n\t\t\t", viewsResult) fakedbs.AddQueryPattern("show fields from .*", fieldsResult) fakedbs.AddQueryPattern("select .* from `test1`\\..* .*", selectResult1) fakedbs.AddQueryPattern("select .* from `test2`\\..* .*", selectResult2) diff --git a/internal/dumper/loader.go b/internal/dumper/loader.go index 66939c8c..8d57108b 100644 --- a/internal/dumper/loader.go +++ b/internal/dumper/loader.go @@ -20,12 +20,14 @@ import ( type Files struct { databases []string schemas []string + views []string tables []string } const ( dbSuffix = "-schema-create.sql" schemaSuffix = "-schema.sql" + viewSuffix = "-schema-view.sql" tableSuffix = ".sql" ) @@ -68,6 +70,13 @@ func (l *Loader) Run(ctx context.Context) error { } pool.Put(conn) + // views. + conn = pool.Get() + if err := l.restoreViews(l.cfg.OverwriteTables, files.views, conn); err != nil { + return err + } + pool.Put(conn) + // Shuffle the tables for i := range files.tables { j := rand.Intn(i + 1) @@ -140,6 +149,8 @@ func (l *Loader) loadFiles(dir string) (*Files, error) { files.databases = append(files.databases, path) case strings.HasSuffix(path, schemaSuffix): files.schemas = append(files.schemas, path) + case strings.HasSuffix(path, viewSuffix): + files.views = append(files.views, path) default: if strings.HasSuffix(path, tableSuffix) { files.tables = append(files.tables, path) @@ -234,6 +245,66 @@ func (l *Loader) restoreTableSchema(overwrite bool, tables []string, conn *Conne return nil } +func (l *Loader) restoreViews(overwrite bool, views []string, conn *Connection) error { + for _, viewFilename := range views { + base := filepath.Base(viewFilename) + name := strings.TrimSuffix(base, viewSuffix) + db := strings.Split(name, ".")[0] + view := strings.Split(name, ".")[1] + name = fmt.Sprintf("`%v`.`%v`", db, view) + + l.log.Info( + "working view", + zap.String("database", db), + zap.String("view ", view), + ) + + err := conn.Execute(fmt.Sprintf("USE `%s`", db)) + if err != nil { + return err + } + + err = conn.Execute("SET FOREIGN_KEY_CHECKS=0") + if err != nil { + return err + } + + data, err := os.ReadFile(viewFilename) + if err != nil { + return err + } + query1 := string(data) + querys := strings.Split(query1, ";\n") + for _, query := range querys { + if !strings.HasPrefix(query, "/*") && query != "" { + if overwrite { + l.log.Info( + "drop(overwrite.is.true)", + zap.String("database", db), + zap.String("view ", view), + ) + + dropQuery := fmt.Sprintf("DROP VIEW IF EXISTS %s", name) + err = conn.Execute(dropQuery) + if err != nil { + return err + } + } + err = conn.Execute(query) + if err != nil { + return err + } + } + } + l.log.Info("restoring views", + zap.String("database", db), + zap.String("view ", view), + ) + } + + return nil +} + func (l *Loader) restoreTable(ctx context.Context, table string, conn *Connection) (int, error) { bytes := 0 part := "0"