From 36ff8219cca5c611785a84fdf80c65226aa1c73a Mon Sep 17 00:00:00 2001 From: Vadim Voitenko Date: Fri, 26 Apr 2024 19:25:36 +0300 Subject: [PATCH 01/13] fix: Fixed generated columns dumping and restoration #77 * Added column attgenerated introspection for pg >= 12 version * Excluded generated columns from COPY stmnt --- internal/db/postgres/context/context.go | 4 +-- internal/db/postgres/context/pg_catalog.go | 4 +-- internal/db/postgres/context/queries.go | 7 +++-- internal/db/postgres/context/schema.go | 4 +-- internal/db/postgres/context/table.go | 30 ++++++++++++++-------- internal/db/postgres/entries/table.go | 5 ++-- pkg/toolkit/column.go | 15 ++++++----- 7 files changed, 42 insertions(+), 27 deletions(-) diff --git a/internal/db/postgres/context/context.go b/internal/db/postgres/context/context.go index efb75f84..c750d2b0 100644 --- a/internal/db/postgres/context/context.go +++ b/internal/db/postgres/context/context.go @@ -71,14 +71,14 @@ func NewRuntimeContext( return nil, fmt.Errorf("cannot validate and build table config: %w", err) } - dataSectionObjects, err := getDumpObjects(ctx, tx, opt, tables) + dataSectionObjects, err := getDumpObjects(ctx, version, tx, opt, tables) if err != nil { return nil, fmt.Errorf("cannot build dump object list: %w", err) } scoreTablesEntriesAndSort(dataSectionObjects, cfg) - schema, err := getDatabaseSchema(ctx, tx, opt) + schema, err := getDatabaseSchema(ctx, tx, opt, version) if err != nil { return nil, fmt.Errorf("cannot get database schema: %w", err) } diff --git a/internal/db/postgres/context/pg_catalog.go b/internal/db/postgres/context/pg_catalog.go index 78c2f28e..203495f9 100644 --- a/internal/db/postgres/context/pg_catalog.go +++ b/internal/db/postgres/context/pg_catalog.go @@ -37,7 +37,7 @@ const ( // TODO: Rewrite it using gotemplate func getDumpObjects( - ctx context.Context, tx pgx.Tx, options *pgdump.Options, config map[toolkit.Oid]*entries.Table, + ctx context.Context, version int, tx pgx.Tx, options *pgdump.Options, config map[toolkit.Oid]*entries.Table, ) ([]entries.Entry, error) { // Building relation search query using regexp adaptation rules and pre-defined query templates @@ -137,7 +137,7 @@ func getDumpObjects( for _, obj := range dataObjects { switch v := obj.(type) { case *entries.Table: - columns, err := getColumnsConfig(ctx, tx, v.Oid) + columns, err := getColumnsConfig(ctx, tx, v.Oid, version) if err != nil { return nil, fmt.Errorf("unable to collect table columns: %w", err) } diff --git a/internal/db/postgres/context/queries.go b/internal/db/postgres/context/queries.go index 22deed43..06b79628 100644 --- a/internal/db/postgres/context/queries.go +++ b/internal/db/postgres/context/queries.go @@ -67,7 +67,7 @@ var ( ` // TableColumnsQuery - SQL query for getting all columns of table - TableColumnsQuery = ` + TableColumnsQuery = template.Must(template.New("TableColumnsQuery").Parse(` SELECT a.attname as name, a.atttypid::TEXT::INT as typeoid, @@ -75,10 +75,13 @@ var ( a.attnotnull as notnull, a.atttypmod as mod, a.attnum as num + {{ if ge .Version 120000 }} + ,a.attgenerated != '' as attgenerated + {{ end }} FROM pg_catalog.pg_attribute a WHERE a.attrelid = $1 AND a.attnum > 0 AND NOT a.attisdropped ORDER BY a.attnum - ` + `)) CustomTypesWithTypeChainQuery = ` with RECURSIVE diff --git a/internal/db/postgres/context/schema.go b/internal/db/postgres/context/schema.go index 893d436d..50e76fe9 100644 --- a/internal/db/postgres/context/schema.go +++ b/internal/db/postgres/context/schema.go @@ -10,7 +10,7 @@ import ( ) func getDatabaseSchema( - ctx context.Context, tx pgx.Tx, options *pgdump.Options, + ctx context.Context, tx pgx.Tx, options *pgdump.Options, version int, ) ([]*toolkit.Table, error) { var res []*toolkit.Table query, err := BuildSchemaIntrospectionQuery( @@ -41,7 +41,7 @@ func getDatabaseSchema( // fill columns for _, table := range res { - columns, err := getColumnsConfig(ctx, tx, table.Oid) + columns, err := getColumnsConfig(ctx, tx, table.Oid, version) if err != nil { return nil, err } diff --git a/internal/db/postgres/context/table.go b/internal/db/postgres/context/table.go index 64c59436..c0626906 100644 --- a/internal/db/postgres/context/table.go +++ b/internal/db/postgres/context/table.go @@ -59,7 +59,7 @@ func validateAndBuildTablesConfig( table.Constraints = constraints // Assign columns and transformersMap if were found - columns, err := getColumnsConfig(ctx, tx, table.Oid) + columns, err := getColumnsConfig(ctx, tx, table.Oid, version) if err != nil { return nil, nil, err } @@ -177,9 +177,17 @@ func getTable(ctx context.Context, tx pgx.Tx, t *domains.Table) ([]*entries.Tabl return tables, warnings, nil } -func getColumnsConfig(ctx context.Context, tx pgx.Tx, oid toolkit.Oid) ([]*toolkit.Column, error) { +func getColumnsConfig(ctx context.Context, tx pgx.Tx, oid toolkit.Oid, version int) ([]*toolkit.Column, error) { var res []*toolkit.Column - rows, err := tx.Query(ctx, TableColumnsQuery, oid) + buf := bytes.NewBuffer(nil) + err := TableColumnsQuery.Execute( + buf, + map[string]int{"Version": version}, + ) + if err != nil { + return nil, fmt.Errorf("error templating TableColumnsQuery: %w", err) + } + rows, err := tx.Query(ctx, buf.String(), oid) if err != nil { return nil, fmt.Errorf("unable execute tableColumnQuery: %w", err) } @@ -188,8 +196,14 @@ func getColumnsConfig(ctx context.Context, tx pgx.Tx, oid toolkit.Oid) ([]*toolk idx := 0 for rows.Next() { column := toolkit.Column{Idx: idx} - if err = rows.Scan(&column.Name, &column.TypeOid, &column.TypeName, - &column.NotNull, &column.Length, &column.Num); err != nil { + if version >= 120000 { + err = rows.Scan(&column.Name, &column.TypeOid, &column.TypeName, + &column.NotNull, &column.Length, &column.Num, &column.IsGenerated) + } else { + err = rows.Scan(&column.Name, &column.TypeOid, &column.TypeName, + &column.NotNull, &column.Length, &column.Num) + } + if err != nil { return nil, fmt.Errorf("cannot scan tableColumnQuery: %w", err) } res = append(res, &column) @@ -289,11 +303,7 @@ func getTableConstraints(ctx context.Context, tx pgx.Tx, tableOid toolkit.Oid, v buf := bytes.NewBuffer(nil) err = TablePrimaryKeyReferencesConstraintsQuery.Execute( buf, - struct { - Version int - }{ - Version: version, - }, + map[string]int{"Version": version}, ) if err != nil { return nil, fmt.Errorf("error templating TablePrimaryKeyReferencesConstraintsQuery: %w", err) diff --git a/internal/db/postgres/entries/table.go b/internal/db/postgres/entries/table.go index fe3263f3..e0ba065b 100644 --- a/internal/db/postgres/entries/table.go +++ b/internal/db/postgres/entries/table.go @@ -78,10 +78,11 @@ func (t *Table) Entry() (*toc.Entry, error) { columns := make([]string, 0, len(t.Columns)) for _, column := range t.Columns { - columns = append(columns, fmt.Sprintf(`"%s"`, column.Name)) + if !column.IsGenerated { + columns = append(columns, fmt.Sprintf(`"%s"`, column.Name)) + } } - //var query = `COPY "%s"."%s" (%s) FROM stdin WITH (FORMAT CSV, NULL '\N');` var query = `COPY "%s"."%s" (%s) FROM stdin` var schemaName, tableName string if t.LoadViaPartitionRoot && t.RootPtSchema != "" && t.RootPtName != "" { diff --git a/pkg/toolkit/column.go b/pkg/toolkit/column.go index 15223b10..e5bb9039 100644 --- a/pkg/toolkit/column.go +++ b/pkg/toolkit/column.go @@ -15,11 +15,12 @@ package toolkit type Column struct { - Name string `json:"name"` - TypeName string `json:"type_name"` - TypeOid Oid `json:"type_oid"` - Num AttNum `json:"num"` - NotNull bool `json:"not_null"` - Length int `json:"length"` - Idx int `json:"idx"` + Name string `json:"name"` + TypeName string `json:"type_name"` + TypeOid Oid `json:"type_oid"` + Num AttNum `json:"num"` + NotNull bool `json:"not_null"` + Length int `json:"length"` + Idx int `json:"idx"` + IsGenerated bool `json:"is_generated"` } From 14cec282c3e0ea9811e35b359453c2b46bb1061d Mon Sep 17 00:00:00 2001 From: Vladimir Tarbaev Date: Sat, 27 Apr 2024 01:11:18 +0300 Subject: [PATCH 02/13] fix deploy-docs jobs --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 023a4259..d25047c3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -16,7 +16,7 @@ on: env: go-version: '1.21.5' - python-version: '3.12' + python-version: 'pypy3.10' cmd-name: 'greenmask' docker-registry: greenmask/greenmask From b4a617a64d677de70ef552f6dbe159d28a072842 Mon Sep 17 00:00:00 2001 From: Vadim Voitenko Date: Sat, 27 Apr 2024 19:30:15 +0300 Subject: [PATCH 03/13] Fix viper and mapstructure limitation workaround This solves problem with map structure described -> https://github.com/spf13/viper/issues/373 that caused issue in Greenmask #76 We need to keep the original keys in the map without lowercasing. To overcome this problem we need use default yaml and json parsers avoiding vaiper or mapstructure usage --- cmd/greenmask/cmd/root.go | 8 ++- internal/domains/config.go | 10 ++- internal/utils/config/viper_workaround.go | 82 +++++++++++++++++++++++ 3 files changed, 98 insertions(+), 2 deletions(-) create mode 100644 internal/utils/config/viper_workaround.go diff --git a/cmd/greenmask/cmd/root.go b/cmd/greenmask/cmd/root.go index 2fc4c1b5..aee62ab3 100644 --- a/cmd/greenmask/cmd/root.go +++ b/cmd/greenmask/cmd/root.go @@ -145,4 +145,10 @@ func initConfig() { if err := viper.Unmarshal(Config, decoderCfg); err != nil { log.Fatal().Err(err).Msg("") } -} \ No newline at end of file + + // This solves problem with map structure described -> https://github.com/spf13/viper/issues/373 + // that caused issue in Greenmask https://github.com/GreenmaskIO/greenmask/issues/76 + if err := configUtils.ParseTransformerParamsManually(cfgFile, Config); err != nil { + log.Fatal().Err(err).Msg("error parsing transformer parameters") + } +} diff --git a/internal/domains/config.go b/internal/domains/config.go index cb688ce9..8400a5d7 100644 --- a/internal/domains/config.go +++ b/internal/domains/config.go @@ -110,7 +110,15 @@ type TransformerSettings struct { type TransformerConfig struct { Name string `mapstructure:"name" yaml:"name" json:"name,omitempty"` Settings *TransformerSettings `mapstructure:"settings,omitempty" yaml:"settings,omitempty" json:"settings,omitempty"` - Params toolkit.Params `mapstructure:"params" yaml:"params" json:"params,omitempty"` + // Params - transformation parameters. It might be any type. If structure should be stored as raw json + // This cannot be parsed with mapstructure due to uncontrollable lowercasing + // https://github.com/spf13/viper/issues/373 + // Instead we have to use workaround and parse it manually + Params toolkit.Params `mapstructure:"-" yaml:"-" json:"-"` // yaml:"params" json:"params,omitempty"` + // TempParams - the https://github.com/spf13/viper/issues/373 workaround + // we decode the values into TempParams and then Unmarshal it manually and set to Params + // The related code is in internal/utils/config/viper_workaround.go + TempParams map[string]any `mapstructure:"-" yaml:"params,omitempty" json:"params,omitempty"` } type Table struct { diff --git a/internal/utils/config/viper_workaround.go b/internal/utils/config/viper_workaround.go new file mode 100644 index 00000000..342125ab --- /dev/null +++ b/internal/utils/config/viper_workaround.go @@ -0,0 +1,82 @@ +// Copyright 2023 Greenmask +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package config + +import ( + "encoding/json" + "fmt" + "os" + "path" + + "gopkg.in/yaml.v3" + + "github.com/greenmaskio/greenmask/internal/domains" + "github.com/greenmaskio/greenmask/pkg/toolkit" +) + +// ParseTransformerParamsManually - manually parse dump.transformation[a].transformers[b].params +// The problem described https://github.com/GreenmaskIO/greenmask/issues/76 +// We need to keep the original keys in the map without lowercasing +// To overcome this problem we need use default yaml and json parsers avoiding vaiper or mapstructure usage. +func ParseTransformerParamsManually(cfgFilePath string, cfg *domains.Config) error { + ext := path.Ext(cfgFilePath) + //cfgMap := make(map[string]any) + tmpCfg := domains.NewConfig() + f, err := os.Open(cfgFilePath) + if err != nil { + return err + } + defer f.Close() + + switch ext { + case ".json": + if err = json.NewDecoder(f).Decode(&tmpCfg); err != nil { + return err + } + case ".yaml", ".yml": + if err = yaml.NewDecoder(f).Decode(&tmpCfg); err != nil { + return err + } + default: + return fmt.Errorf("unsupported file extension \"%s\"", err) + } + return setTransformerParams(tmpCfg, cfg) +} + +// setTransformerParams - get the value from domains.TransformerConfig.TempParams, marshall this value and store into +// domains.TransformerConfig.Params +func setTransformerParams(tmpCfg, cfg *domains.Config) (err error) { + for tableIdx, tableObj := range tmpCfg.Dump.Transformation { + for transformationIdx, transformationObj := range tableObj.Transformers { + transformer := cfg.Dump.Transformation[tableIdx].Transformers[transformationIdx] + paramsMap := make(map[string]toolkit.ParamsValue, len(transformationObj.Params)) + for paramName, decodedValue := range transformer.TempParams { + var encodedVal toolkit.ParamsValue + switch v := decodedValue.(type) { + case string: + encodedVal = toolkit.ParamsValue(v) + default: + encodedVal, err = json.Marshal(v) + if err != nil { + return fmt.Errorf("cannot convert object to json bytes: %w", err) + } + } + paramsMap[paramName] = encodedVal + } + transformer.Params = paramsMap + } + } + return nil +} From 359cc49f50cc33a4f74b8e14c48396dea2dc4697 Mon Sep 17 00:00:00 2001 From: Vadim Voitenko Date: Sun, 28 Apr 2024 10:10:18 +0300 Subject: [PATCH 04/13] fix: table size scoring for transformed table --- internal/db/postgres/context/pg_catalog.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/db/postgres/context/pg_catalog.go b/internal/db/postgres/context/pg_catalog.go index 203495f9..f74240d6 100644 --- a/internal/db/postgres/context/pg_catalog.go +++ b/internal/db/postgres/context/pg_catalog.go @@ -99,6 +99,7 @@ func getDumpObjects( // If table was discovered during Transformer validation - use that object instead of a new table.ExcludeData = excludeData table.LoadViaPartitionRoot = options.LoadViaPartitionRoot + table.Size = relSize } else { // If table is not found - create new table object and collect all the columns From 585241f8e56dbfef656ba300b4f8412467513b44 Mon Sep 17 00:00:00 2001 From: Vadim Voitenko Date: Sun, 28 Apr 2024 10:24:22 +0300 Subject: [PATCH 05/13] fix: restore fails due to connection idle timeout Fixed restore command failure due to connection idle timeout #75 --- internal/db/postgres/cmd/restore.go | 41 ++++++++++++++++++----------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/internal/db/postgres/cmd/restore.go b/internal/db/postgres/cmd/restore.go index 91231772..d119b191 100644 --- a/internal/db/postgres/cmd/restore.go +++ b/internal/db/postgres/cmd/restore.go @@ -156,7 +156,7 @@ func (r *Restore) prepare() error { return nil } -func (r *Restore) preFlightRestore(ctx context.Context, conn *pgx.Conn) error { +func (r *Restore) preFlightRestore(ctx context.Context) error { tocFile, err := r.st.GetObject(ctx, "toc.dat") if err != nil { @@ -225,7 +225,7 @@ func (r *Restore) sortAndFilterEntriesByRestoreList() error { return nil } -func (r *Restore) preDataRestore(ctx context.Context, conn *pgx.Conn) error { +func (r *Restore) preDataRestore(ctx context.Context) error { // pg_dump has a limitation: // If we want to use --cleanup command then this command must be performed for whole schema (--schema-only) // without --section parameter. For avoiding cascade dropping we need to run pg_restore with --schema-only --clean @@ -242,6 +242,12 @@ func (r *Restore) preDataRestore(ctx context.Context, conn *pgx.Conn) error { return nil } + conn, err := pgx.Connect(ctx, r.dsn) + if err != nil { + return fmt.Errorf("cannot establish connection to db: %w", err) + } + defer conn.Close(ctx) + // Execute PreData Before scripts if err := r.RunScripts(ctx, conn, scriptPreDataSection, scriptExecuteBefore); err != nil { return err @@ -338,7 +344,7 @@ func (r *Restore) prepareCleanupToc() (string, string, error) { return preDatadirName, postDatadirName, nil } -func (r *Restore) dataRestore(ctx context.Context, conn *pgx.Conn) error { +func (r *Restore) dataRestore(ctx context.Context) error { // Execute Data Before scripts // Do not restore this section if implicitly provided @@ -347,6 +353,12 @@ func (r *Restore) dataRestore(ctx context.Context, conn *pgx.Conn) error { return nil } + conn, err := pgx.Connect(ctx, r.dsn) + if err != nil { + return fmt.Errorf("cannot establish connection to db: %w", err) + } + defer conn.Close(ctx) + if err := r.RunScripts(ctx, conn, scriptDataSection, scriptExecuteBefore); err != nil { return err } @@ -439,7 +451,7 @@ func (r *Restore) isNeedRestore(e *toc.Entry) bool { return true } -func (r *Restore) postDataRestore(ctx context.Context, conn *pgx.Conn) error { +func (r *Restore) postDataRestore(ctx context.Context) error { // Execute Post Data Before scripts // Do not restore this section if implicitly provided @@ -448,6 +460,12 @@ func (r *Restore) postDataRestore(ctx context.Context, conn *pgx.Conn) error { return nil } + conn, err := pgx.Connect(ctx, r.dsn) + if err != nil { + return fmt.Errorf("cannot establish connection to db: %w", err) + } + defer conn.Close(ctx) + if err := r.RunScripts(ctx, conn, scriptPostDataSection, scriptExecuteBefore); err != nil { return err } @@ -485,26 +503,19 @@ func (r *Restore) Run(ctx context.Context) error { return fmt.Errorf("preparation error: %w", err) } - // Establish connection for scripts - conn, err := pgx.Connect(ctx, r.dsn) - if err != nil { - return fmt.Errorf("cannot establish connection to db: %w", err) - } - defer conn.Close(ctx) - - if err = r.preFlightRestore(ctx, conn); err != nil { + if err := r.preFlightRestore(ctx); err != nil { return fmt.Errorf("pre-flight stage restoration error: %w", err) } - if err = r.preDataRestore(ctx, conn); err != nil { + if err := r.preDataRestore(ctx); err != nil { return fmt.Errorf("pre-data stage restoration error: %w", err) } - if err = r.dataRestore(ctx, conn); err != nil { + if err := r.dataRestore(ctx); err != nil { return fmt.Errorf("data stage restoration error: %w", err) } - if err = r.postDataRestore(ctx, conn); err != nil { + if err := r.postDataRestore(ctx); err != nil { return fmt.Errorf("post-data stage restoration error: %w", err) } From 1008af000fccdbe83e00e5afd704cd95a3e01c5a Mon Sep 17 00:00:00 2001 From: Vadim Voitenko Date: Sun, 28 Apr 2024 10:30:54 +0300 Subject: [PATCH 06/13] docs: added changelog for v0.1.11 --- docs/mkdocs.yml | 1 + docs/overrides/main.html | 2 +- docs/release_notes/greenmask_0_1_11.md | 15 +++++++++++++++ mkdocs.yml | 1 + 4 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 docs/release_notes/greenmask_0_1_11.md diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index f560c4de..af78f3f0 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -117,6 +117,7 @@ nav: - Core custom functions: built_in_transformers/advanced_transformers/custom_functions/core_functions.md - Faker function: built_in_transformers/advanced_transformers/custom_functions/faker_function.md - Release notes: + - Greenmask 0.1.11: release_notes/greenmask_0_1_11.md - Greenmask 0.1.10: release_notes/greenmask_0_1_10.md - Greenmask 0.1.9: release_notes/greenmask_0_1_9.md - Greenmask 0.1.8: release_notes/greenmask_0_1_8.md diff --git a/docs/overrides/main.html b/docs/overrides/main.html index 6629f7bf..ee9fa273 100644 --- a/docs/overrides/main.html +++ b/docs/overrides/main.html @@ -1,5 +1,5 @@ {% extends "base.html" %} {% block announce %} - Version 0.1.10 is released + Version 0.1.11 is released {% endblock %} diff --git a/docs/release_notes/greenmask_0_1_11.md b/docs/release_notes/greenmask_0_1_11.md new file mode 100644 index 00000000..22ca5efd --- /dev/null +++ b/docs/release_notes/greenmask_0_1_11.md @@ -0,0 +1,15 @@ +# Greenmask 0.1.11 + +This release introduces improvements and bug fixes + +## Changes + +* Added support for generated columns in the table +* Fixed transformer parameters encoding issue caused by spf13/viper +* Fixed table scoring for transformed table +* Refactored connection management logic in restore command - fixes connection idle timeout + +## Assets + +To download the Greenmask binary compatible with your system, see +the [release's assets list](https://github.com/GreenmaskIO/greenmask/releases/tag/v0.1.11). diff --git a/mkdocs.yml b/mkdocs.yml index 7ff6b15a..28c5072d 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -117,6 +117,7 @@ nav: - Core custom functions: built_in_transformers/advanced_transformers/custom_functions/core_functions.md - Faker function: built_in_transformers/advanced_transformers/custom_functions/faker_function.md - Release notes: + - Greenmask 0.1.11: release_notes/greenmask_0_1_11.md - Greenmask 0.1.10: release_notes/greenmask_0_1_10.md - Greenmask 0.1.9: release_notes/greenmask_0_1_9.md - Greenmask 0.1.8: release_notes/greenmask_0_1_8.md From 786454127dd1e324dacafcf907661d6b4931eec5 Mon Sep 17 00:00:00 2001 From: Vadim Voitenko Date: Sun, 28 Apr 2024 10:56:01 +0300 Subject: [PATCH 07/13] fix: Fixed when commands call without config file fails --- cmd/greenmask/cmd/root.go | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/cmd/greenmask/cmd/root.go b/cmd/greenmask/cmd/root.go index aee62ab3..b7f21f44 100644 --- a/cmd/greenmask/cmd/root.go +++ b/cmd/greenmask/cmd/root.go @@ -146,9 +146,11 @@ func initConfig() { log.Fatal().Err(err).Msg("") } - // This solves problem with map structure described -> https://github.com/spf13/viper/issues/373 - // that caused issue in Greenmask https://github.com/GreenmaskIO/greenmask/issues/76 - if err := configUtils.ParseTransformerParamsManually(cfgFile, Config); err != nil { - log.Fatal().Err(err).Msg("error parsing transformer parameters") + if cfgFile != "" { + // This solves problem with map structure described -> https://github.com/spf13/viper/issues/373 + // that caused issue in Greenmask https://github.com/GreenmaskIO/greenmask/issues/76 + if err := configUtils.ParseTransformerParamsManually(cfgFile, Config); err != nil { + log.Fatal().Err(err).Msg("error parsing transformer parameters") + } } } From 768d3a3bdb45deedd6d0c5536c78cce376d77594 Mon Sep 17 00:00:00 2001 From: Vadim Voitenko Date: Mon, 29 Apr 2024 19:09:38 +0300 Subject: [PATCH 08/13] fix: Refactored logic for viper and mapstructure workaround Now yaml or json parser that dedicated to transformer parameters parsing parses only transformer parameter Fixes #85 --- internal/domains/config.go | 13 +++++++++++++ internal/utils/config/viper_workaround.go | 8 ++++---- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/internal/domains/config.go b/internal/domains/config.go index 8400a5d7..802a4b58 100644 --- a/internal/domains/config.go +++ b/internal/domains/config.go @@ -129,3 +129,16 @@ type Table struct { Transformers []*TransformerConfig `mapstructure:"transformers" yaml:"transformers" json:"transformers,omitempty"` ColumnsTypeOverride map[string]string `mapstructure:"columns_type_override" yaml:"columns_type_override" json:"columns_type_override,omitempty"` } + +// DummyConfig - This is a dummy config to the viper workaround +// It is used to parse the transformation parameters manually only avoiding parsing other pars of the config +// The reason why is there https://github.com/GreenmaskIO/greenmask/discussions/85 +type DummyConfig struct { + Dump struct { + Transformation []struct { + Transformers []struct { + Params map[string]interface{} `yaml:"params" json:"params"` + } `yaml:"transformers" json:"transformers"` + } `yaml:"transformation" json:"transformation"` + } `yaml:"dump" json:"dump"` +} diff --git a/internal/utils/config/viper_workaround.go b/internal/utils/config/viper_workaround.go index 342125ab..7b1e332a 100644 --- a/internal/utils/config/viper_workaround.go +++ b/internal/utils/config/viper_workaround.go @@ -32,8 +32,7 @@ import ( // To overcome this problem we need use default yaml and json parsers avoiding vaiper or mapstructure usage. func ParseTransformerParamsManually(cfgFilePath string, cfg *domains.Config) error { ext := path.Ext(cfgFilePath) - //cfgMap := make(map[string]any) - tmpCfg := domains.NewConfig() + tmpCfg := &domains.DummyConfig{} f, err := os.Open(cfgFilePath) if err != nil { return err @@ -57,12 +56,13 @@ func ParseTransformerParamsManually(cfgFilePath string, cfg *domains.Config) err // setTransformerParams - get the value from domains.TransformerConfig.TempParams, marshall this value and store into // domains.TransformerConfig.Params -func setTransformerParams(tmpCfg, cfg *domains.Config) (err error) { +func setTransformerParams(tmpCfg *domains.DummyConfig, cfg *domains.Config) (err error) { for tableIdx, tableObj := range tmpCfg.Dump.Transformation { for transformationIdx, transformationObj := range tableObj.Transformers { transformer := cfg.Dump.Transformation[tableIdx].Transformers[transformationIdx] + tmpTransformer := tmpCfg.Dump.Transformation[tableIdx].Transformers[transformationIdx] paramsMap := make(map[string]toolkit.ParamsValue, len(transformationObj.Params)) - for paramName, decodedValue := range transformer.TempParams { + for paramName, decodedValue := range tmpTransformer.Params { var encodedVal toolkit.ParamsValue switch v := decodedValue.(type) { case string: From 5464017abc9fac463aee5729c4bf3e0f21fe3dfe Mon Sep 17 00:00:00 2001 From: Vadim Voitenko Date: Mon, 29 Apr 2024 22:11:30 +0300 Subject: [PATCH 09/13] fix: Refactored logic for viper and mapstructure workaround * Fixed integration tests - adapted for text/template usage * Renamed TempParams to MetadataParams. Now they uses only for metadata in storage Fixes #85 --- internal/domains/config.go | 9 +- internal/utils/config/viper_workaround.go | 3 +- .../greenmask/backward_compatibility_test.go | 120 +++++++++--------- 3 files changed, 66 insertions(+), 66 deletions(-) diff --git a/internal/domains/config.go b/internal/domains/config.go index 802a4b58..e0cd622c 100644 --- a/internal/domains/config.go +++ b/internal/domains/config.go @@ -115,10 +115,11 @@ type TransformerConfig struct { // https://github.com/spf13/viper/issues/373 // Instead we have to use workaround and parse it manually Params toolkit.Params `mapstructure:"-" yaml:"-" json:"-"` // yaml:"params" json:"params,omitempty"` - // TempParams - the https://github.com/spf13/viper/issues/373 workaround - // we decode the values into TempParams and then Unmarshal it manually and set to Params - // The related code is in internal/utils/config/viper_workaround.go - TempParams map[string]any `mapstructure:"-" yaml:"params,omitempty" json:"params,omitempty"` + // MetadataParams - encoded transformer parameters - uses only for storing into storage + // TODO: You need to get rid of it by creating a separate structure for storing metadata in + // internal/db/postgres/storage/metadata_json.go + // this is used only due to https://github.com/spf13/viper/issues/373 + MetadataParams map[string]any `mapstructure:"-" yaml:"params,omitempty" json:"params,omitempty"` } type Table struct { diff --git a/internal/utils/config/viper_workaround.go b/internal/utils/config/viper_workaround.go index 7b1e332a..8b76bfc3 100644 --- a/internal/utils/config/viper_workaround.go +++ b/internal/utils/config/viper_workaround.go @@ -54,7 +54,7 @@ func ParseTransformerParamsManually(cfgFilePath string, cfg *domains.Config) err return setTransformerParams(tmpCfg, cfg) } -// setTransformerParams - get the value from domains.TransformerConfig.TempParams, marshall this value and store into +// setTransformerParams - get the value from domains.TransformerConfig.MetadataParams, marshall this value and store into // domains.TransformerConfig.Params func setTransformerParams(tmpCfg *domains.DummyConfig, cfg *domains.Config) (err error) { for tableIdx, tableObj := range tmpCfg.Dump.Transformation { @@ -76,6 +76,7 @@ func setTransformerParams(tmpCfg *domains.DummyConfig, cfg *domains.Config) (err paramsMap[paramName] = encodedVal } transformer.Params = paramsMap + transformer.MetadataParams = tmpTransformer.Params } } return nil diff --git a/tests/integration/greenmask/backward_compatibility_test.go b/tests/integration/greenmask/backward_compatibility_test.go index 8e32deb6..f37c5164 100644 --- a/tests/integration/greenmask/backward_compatibility_test.go +++ b/tests/integration/greenmask/backward_compatibility_test.go @@ -16,71 +16,57 @@ package greenmask import ( "context" - "encoding/json" "errors" "fmt" "os" "os/exec" "path" + "text/template" "time" "github.com/jackc/pgx/v5" "github.com/rs/zerolog/log" "github.com/stretchr/testify/suite" - - "github.com/greenmaskio/greenmask/internal/db/postgres/pgdump" - "github.com/greenmaskio/greenmask/internal/domains" - "github.com/greenmaskio/greenmask/internal/storages/builder" - "github.com/greenmaskio/greenmask/internal/storages/directory" - "github.com/greenmaskio/greenmask/pkg/toolkit" ) -var config = &domains.Config{ - Common: domains.Common{ - PgBinPath: "/usr/local/opt/postgresql@16/bin", - TempDirectory: "/tmp", - }, - Log: domains.LogConfig{ - Level: "debug", - Format: "text", - }, - Storage: domains.StorageConfig{ - Type: builder.DirectoryStorageType, - Directory: &directory.Config{ - Path: "/tmp", - }, - }, - Dump: domains.Dump{ - PgDumpOptions: pgdump.Options{ - DbName: "host=localhost user=postgres password=example dbname=demo port=54316", - Jobs: 10, - }, - Transformation: []*domains.Table{ - { - Schema: "bookings", - Name: "flights", - Transformers: []*domains.TransformerConfig{ - { - Name: "RandomDate", - Params: map[string]toolkit.ParamsValue{ - "min": toolkit.ParamsValue("2023-01-01 00:00:00.0+03"), - "max": toolkit.ParamsValue("2023-01-02 00:00:00.0+03"), - "column": toolkit.ParamsValue("scheduled_departure"), - }, - }, - { - Name: "RandomDate", - Params: map[string]toolkit.ParamsValue{ - "min": toolkit.ParamsValue("2023-02-02 01:00:00.0+03"), - "max": toolkit.ParamsValue("2023-03-03 00:00:00.0+03"), - "column": toolkit.ParamsValue("scheduled_arrival"), - }, - }, - }, - }, - }, - }, -} +var configStr = template.Must(template.New("config").Parse(` +common: + pg_bin_path: "{{ .pgBinPath }}" + tmp_dir: "{{ .tmpDir }}" + +log: + level: debug + format: json + +storage: + type: "directory" + directory: + path: "{{ .storageDir }}" + +dump: + pg_dump_options: + dbname: "{{ .uri }}" + jobs: 10 + load-via-partition-root: true + schema: public + + transformation: + - schema: "bookings" + name: "flights" + transformers: + + - name: "RandomDate" + params: + "min": "2023-01-01 00:00:00.0+03" + "max": "2023-01-02 00:00:00.0+03" + "column": "scheduled_departure" + + - name: "RandomDate" + params: + "min": "2023-02-02 01:00:00.0+03" + "max": "2023-03-03 00:00:00.0+03" + "column": "scheduled_arrival" +`)) type BackwardCompatibilitySuite struct { suite.Suite @@ -114,16 +100,18 @@ func (suite *BackwardCompatibilitySuite) SetupSuite() { err = os.Mkdir(suite.runtimeTmpDir, 0700) suite.Require().NoError(err, "error creating tmp dir") - config.Common.TempDirectory = suite.tmpDir - config.Storage.Directory.Path = suite.storageDir - config.Dump.PgDumpOptions.DbName = uri - config.Common.PgBinPath = pgBinPath - - suite.configFilePath = path.Join(suite.tmpDir, "config.json") + suite.configFilePath = path.Join(suite.tmpDir, "config.yaml") confFile, err := os.Create(suite.configFilePath) - suite.Require().NoError(err, "error creating config.yml file") + suite.Require().NoError(err, "error creating config.yaml file") defer confFile.Close() - err = json.NewEncoder(confFile).Encode(config) + err = configStr.Execute( + confFile, + map[string]string{ + "pgBinPath": pgBinPath, + "tmpDir": suite.tmpDir, + "uri": uri, + "storageDir": suite.storageDir, + }) suite.Require().NoError(err, "error encoding config into yaml") suite.conn, err = pgx.Connect(context.Background(), uri) @@ -134,6 +122,12 @@ func (suite *BackwardCompatibilitySuite) SetupSuite() { log.Info().Str("dbname", suite.restorationDbName).Msg("creating database") _, err = suite.conn.Exec(context.Background(), fmt.Sprintf("create database %s", suite.restorationDbName)) suite.Require().NoError(err, "error creating database") + + restoreDbConn, err := pgx.Connect(context.Background(), fmt.Sprintf("%s dbname=%s", uri, suite.restorationDbName)) + suite.Require().NoError(err, "error connecting to restore db") + defer restoreDbConn.Close(context.Background()) + _, err = restoreDbConn.Exec(context.Background(), "drop schema public;") + suite.Require().NoError(err, "error creating database") } func (suite *BackwardCompatibilitySuite) TestGreenmaskCompatibility() { @@ -141,8 +135,10 @@ func (suite *BackwardCompatibilitySuite) TestGreenmaskCompatibility() { cmd := exec.Command(path.Join(greenmaskBinPath, "greenmask"), "--config", suite.configFilePath, "dump", ) + log.Debug().Str("cmd", cmd.String()).Msg("running greenmask") cmd.Stderr = os.Stderr cmd.Stdout = os.Stdout + log.Info().Str("cmd", cmd.String()).Msg("greenmask stdout and stderr forwarding") err := cmd.Run() suite.Require().NoError(err, "error running greenmask") @@ -158,6 +154,7 @@ func (suite *BackwardCompatibilitySuite) TestGreenmaskCompatibility() { cmd := exec.Command(path.Join(pgBinPath, "pg_restore"), "-l", path.Join(suite.storageDir, lastDump.Name()), ) + log.Info().Str("cmd", cmd.String()).Msg("running pg_restore list") out, err := cmd.Output() if len(out) > 0 { log.Info().Msg("pg_restore stout forwarding") @@ -187,6 +184,7 @@ func (suite *BackwardCompatibilitySuite) TestGreenmaskCompatibility() { "-v", path.Join(suite.storageDir, lastDump.Name()), ) + log.Info().Str("cmd", cmd.String()).Msg("running pg_restore") cmd.Stderr = os.Stderr cmd.Stdout = os.Stdout log.Info().Str("cmd", cmd.String()).Msg("pg_restore stdout and stderr forwarding") From a6bd578aaae690f9192f7c5fc5e8fc7b3fd2ceee Mon Sep 17 00:00:00 2001 From: Vadim Voitenko Date: Mon, 29 Apr 2024 22:13:16 +0300 Subject: [PATCH 10/13] fix: Fixed wrong behavior of schema and data toc entries merge --- internal/db/postgres/cmd/dump.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/internal/db/postgres/cmd/dump.go b/internal/db/postgres/cmd/dump.go index 55bb7c1e..7834b0f7 100644 --- a/internal/db/postgres/cmd/dump.go +++ b/internal/db/postgres/cmd/dump.go @@ -446,6 +446,11 @@ func (d *Dump) Run(ctx context.Context) (err error) { func (d *Dump) MergeTocEntries(schemaEntries []*toc.Entry, dataEntries []*toc.Entry) ( []*toc.Entry, error, ) { + if len(dataEntries) == 0 { + // No data entries, just return schema entries + return schemaEntries, nil + } + // TODO: Assign dependencies and sort entries in the same order res := make([]*toc.Entry, 0, len(schemaEntries)+len(dataEntries)) From 86a45ec7d373f9cd669d7d477f202e02852c277e Mon Sep 17 00:00:00 2001 From: Vadim Voitenko Date: Mon, 29 Apr 2024 22:37:12 +0300 Subject: [PATCH 11/13] fix: Fixed integration test for s3 storage --- tests/integration/storages/s3_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/storages/s3_test.go b/tests/integration/storages/s3_test.go index 987a8c14..0bc51c56 100644 --- a/tests/integration/storages/s3_test.go +++ b/tests/integration/storages/s3_test.go @@ -21,10 +21,10 @@ import ( "path" "slices" - "github.com/greenmaskio/greenmask/internal/storages" "github.com/rs/zerolog" "github.com/stretchr/testify/suite" + "github.com/greenmaskio/greenmask/internal/storages" "github.com/greenmaskio/greenmask/internal/storages/s3" ) @@ -90,7 +90,7 @@ func (suite *S3StorageSuite) TestS3Ops() { suite.Require().Len(dirs, 1) suite.Require().Equal("test.txt", files[0]) s3Dir := dirs[0].(*s3.Storage) - suite.Require().Equal(path.Join(suite.cfg.Bucket, suite.cfg.Prefix, "testdb")+"/", s3Dir.GetCwd()) + suite.Require().Equal(path.Join(suite.cfg.Prefix, "testdb")+"/", s3Dir.GetCwd()) nextDir := dirs[0] files, dirs, err = nextDir.ListDir(context.Background()) From 096d96f627a0a1384853a2758fee44c819961193 Mon Sep 17 00:00:00 2001 From: Vadim Voitenko Date: Mon, 29 Apr 2024 22:55:42 +0300 Subject: [PATCH 12/13] docs: Added changelog for v0.1.12 --- docs/mkdocs.yml | 1 + docs/overrides/main.html | 2 +- docs/release_notes/greenmask_0_1_12.md | 14 ++++++++++++++ mkdocs.yml | 1 + 4 files changed, 17 insertions(+), 1 deletion(-) create mode 100644 docs/release_notes/greenmask_0_1_12.md diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index af78f3f0..6bdd85f3 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -117,6 +117,7 @@ nav: - Core custom functions: built_in_transformers/advanced_transformers/custom_functions/core_functions.md - Faker function: built_in_transformers/advanced_transformers/custom_functions/faker_function.md - Release notes: + - Greenmask 0.1.12: release_notes/greenmask_0_1_12.md - Greenmask 0.1.11: release_notes/greenmask_0_1_11.md - Greenmask 0.1.10: release_notes/greenmask_0_1_10.md - Greenmask 0.1.9: release_notes/greenmask_0_1_9.md diff --git a/docs/overrides/main.html b/docs/overrides/main.html index ee9fa273..8b6265e6 100644 --- a/docs/overrides/main.html +++ b/docs/overrides/main.html @@ -1,5 +1,5 @@ {% extends "base.html" %} {% block announce %} - Version 0.1.11 is released + Version 0.1.12 is released {% endblock %} diff --git a/docs/release_notes/greenmask_0_1_12.md b/docs/release_notes/greenmask_0_1_12.md new file mode 100644 index 00000000..cd9620a3 --- /dev/null +++ b/docs/release_notes/greenmask_0_1_12.md @@ -0,0 +1,14 @@ +# Greenmask 0.1.12 + +This release introduces improvements and bug fixes + +## Changes + +* Fixed config decoding issue caused +* Fixed TOC entries merge behavior when data section is empty +* Fixed integration tests for S3 storage + +## Assets + +To download the Greenmask binary compatible with your system, see +the [release's assets list](https://github.com/GreenmaskIO/greenmask/releases/tag/v0.1.12). diff --git a/mkdocs.yml b/mkdocs.yml index 28c5072d..8f4b225e 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -117,6 +117,7 @@ nav: - Core custom functions: built_in_transformers/advanced_transformers/custom_functions/core_functions.md - Faker function: built_in_transformers/advanced_transformers/custom_functions/faker_function.md - Release notes: + - Greenmask 0.1.12: release_notes/greenmask_0_1_12.md - Greenmask 0.1.11: release_notes/greenmask_0_1_11.md - Greenmask 0.1.10: release_notes/greenmask_0_1_10.md - Greenmask 0.1.9: release_notes/greenmask_0_1_9.md From 374fd505eea16e0209846101fcf31e24c0146ff4 Mon Sep 17 00:00:00 2001 From: Vladimir Tarbaev Date: Tue, 30 Apr 2024 20:50:25 +0300 Subject: [PATCH 13/13] added a condition for exiting the integration test launch script --- docker/integration/tests/Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker/integration/tests/Dockerfile b/docker/integration/tests/Dockerfile index cdfb5f4a..36ef8717 100644 --- a/docker/integration/tests/Dockerfile +++ b/docker/integration/tests/Dockerfile @@ -31,6 +31,8 @@ for pg_version in ${PG_VERSIONS_CHECK[@]}; do \n\ echo "### SUCCESSFUL CHECK COMPATIBILITY WITH POSTGRESQL ${pg_version} ###" \n\ else \n\ echo "### FAIL CHECK COMPATIBILITY WITH POSTGRESQL ${pg_version} ###" \n\ + echo "### EXIT SCRIPT ###" \n\ + exit 2 \n\ fi \n\ done \n' > /docker-entrypoint.sh \ && chmod +x /docker-entrypoint.sh