diff --git a/internal/db/postgres/transformers/random_faker.go b/internal/db/postgres/transformers/random_faker.go new file mode 100644 index 00000000..c7efde8f --- /dev/null +++ b/internal/db/postgres/transformers/random_faker.go @@ -0,0 +1,336 @@ +// Copyright 2023 Greenmask +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package transformers + +import ( + "context" + "fmt" + + "github.com/go-faker/faker/v4" + "github.com/go-faker/faker/v4/pkg/options" + "github.com/greenmaskio/greenmask/internal/db/postgres/transformers/utils" + "github.com/greenmaskio/greenmask/pkg/toolkit" +) + +type FakerFunc func(opts ...options.OptionFunc) string + +type FakerTransformerDef struct { + SupportedTypes []string + Description string + Generator FakerFunc +} + +var FakerTransformersDes = map[string]*FakerTransformerDef{ + // Faker geo + "RandomLatitude": { + Generator: func(opts ...options.OptionFunc) string { + return fmt.Sprintf("%f", faker.Latitude()) + }, + SupportedTypes: []string{"float4", "float8", "numeric"}, + Description: "Generates a random latitude value.", + }, + "RandomLongitude": { + Generator: func(opts ...options.OptionFunc) string { + return fmt.Sprintf("%f", faker.Longitude()) + }, + SupportedTypes: []string{"float4", "float8", "numeric"}, + Description: "Generates a random longitude value.", + }, + + // Faker Datetime + "RandomUnixTime": { + Generator: func(opts ...options.OptionFunc) string { + return fmt.Sprintf("%d", faker.UnixTime()) + }, + SupportedTypes: []string{"int4", "int8", "numeric"}, + Description: "Generates a random Unix timestamp.", + }, + "RandomMonthName": { + Generator: faker.MonthName, + SupportedTypes: []string{"text", "varchar"}, + Description: "Generates the name of a random month.", + }, + "RandomYearString": { + Generator: faker.YearString, + SupportedTypes: []string{"text", "varchar", "int2", "int4", "int8", "numeric"}, + Description: "Generates a random year as a string.", + }, + "RandomDayOfWeek": { + Generator: faker.DayOfWeek, + SupportedTypes: []string{"text", "varchar"}, + Description: "Generates a random day of the week.", + }, + "RandomDayOfMonth": { + Generator: faker.DayOfMonth, + SupportedTypes: []string{"text", "varchar", "int2", "int4", "int8", "numeric"}, + Description: "Generates a random day of the month.", + }, + "RandomCentury": { + Generator: faker.Century, + SupportedTypes: []string{"text", "varchar"}, + Description: "Generates a random century.", + }, + "RandomTimezone": { + Generator: faker.Timezone, + SupportedTypes: []string{"text", "varchar"}, + Description: "Generates a random timezone.", + }, + + // Faker Internet + "RandomEmail": { + Generator: faker.Email, + SupportedTypes: []string{"text", "varchar"}, + Description: "Generates a random email address.", + }, + "RandomMacAddress": { + Generator: faker.MacAddress, + SupportedTypes: []string{"text", "varchar", "macaddr", "macaddr8"}, + Description: "Generates a random MAC address.", + }, + "RandomDomainName": { + Generator: faker.DomainName, + SupportedTypes: []string{"text", "varchar"}, + Description: "Generates a random domain name.", + }, + "RandomURL": { + Generator: faker.URL, + SupportedTypes: []string{"text", "varchar"}, + Description: "Generates a random URL.", + }, + "RandomUsername": { + Generator: faker.Username, + SupportedTypes: []string{"text", "varchar"}, + Description: "Generates a random username.", + }, + "RandomIPv4": { + Generator: faker.IPv4, + SupportedTypes: []string{"text", "varchar", "inet"}, + Description: "Generates a random IPv4 address.", + }, + "RandomIPv6": { + Generator: faker.IPv6, + SupportedTypes: []string{"text", "varchar", "inet"}, + Description: "Generates a random IPv6 address.", + }, + "RandomPassword": { + Generator: faker.Password, + SupportedTypes: []string{"text", "varchar"}, + Description: "Generates a random password.", + }, + + // Faker words and Sentences + "RandomWord": { + Generator: faker.Word, + SupportedTypes: []string{"text", "varchar"}, + Description: "Generates a random word.", + }, + "RandomSentence": { + Generator: faker.Sentence, + SupportedTypes: []string{"text", "varchar"}, + Description: "Generates a random sentence.", + }, + "RandomParagraph": { + Generator: faker.Paragraph, + SupportedTypes: []string{"text", "varchar"}, + Description: "Generates a random paragraph.", + }, + + // Faker Payment + "RandomCCType": { + Generator: faker.CCType, + SupportedTypes: []string{"text", "varchar"}, + Description: "Generates a random credit card type.", + }, + "RandomCCNumber": { + Generator: faker.CCNumber, + SupportedTypes: []string{"text", "varchar", "int4", "int8", "numeric"}, + Description: "Generates a random credit card number.", + }, + "RandomCurrency": { + Generator: faker.Currency, + SupportedTypes: []string{"text", "varchar"}, + Description: "Generates a random currency code.", + }, + "RandomAmountWithCurrency": { + Generator: faker.AmountWithCurrency, + SupportedTypes: []string{"text", "varchar"}, + Description: "Generates a random monetary amount with currency.", + }, + + // Faker Person + "RandomTitleMale": { + Generator: faker.TitleMale, + SupportedTypes: []string{"text", "varchar"}, + Description: "Generates a random title for males.", + }, + "RandomTitleFemale": { + Generator: faker.TitleFemale, + SupportedTypes: []string{"text", "varchar"}, + Description: "Generates a random title for females.", + }, + "RandomFirstName": { + Generator: faker.FirstName, + SupportedTypes: []string{"text", "varchar"}, + Description: "Generates a random first name.", + }, + "RandomFirstNameMale": { + Generator: faker.FirstNameMale, + SupportedTypes: []string{"text", "varchar"}, + Description: "Generates a random male first name.", + }, + "RandomFirstNameFemale": { + Generator: faker.FirstNameFemale, + SupportedTypes: []string{"text", "varchar"}, + Description: "Generates a random female first name.", + }, + "RandomLastName": { + Generator: faker.LastName, + SupportedTypes: []string{"text", "varchar"}, + Description: "Generates a random last name.", + }, + "RandomName": { + Generator: faker.Name, + SupportedTypes: []string{"text", "varchar"}, + Description: "Generates a full random name.", + }, + + // Faker Phone + "RandomPhoneNumber": { + Generator: faker.Phonenumber, + SupportedTypes: []string{"text", "varchar"}, + Description: "Generates a random phone number.", + }, + "RandomTollFreePhoneNumber": { + Generator: faker.TollFreePhoneNumber, + SupportedTypes: []string{"text", "varchar"}, + Description: "Generates a random toll-free phone number.", + }, + "RandomE164PhoneNumber": { + Generator: faker.E164PhoneNumber, + SupportedTypes: []string{"text", "varchar"}, + Description: "Generates a random phone number in E.164 format.", + }, +} + +func generateFakerTransformers(registry *utils.TransformerRegistry) { + + for name, def := range FakerTransformersDes { + + td := utils.NewDefinition( + utils.NewTransformerProperties( + name, + def.Description, + ), + MakeNewFakeTransformerFunction(def.Generator), + toolkit.MustNewParameter( + "column", + "column name", + ).SetIsColumn(toolkit.NewColumnProperties(). + SetAffected(true). + SetAllowedColumnTypes(def.SupportedTypes...), + ).SetRequired(true), + toolkit.MustNewParameter( + "keep_null", + "indicates that NULL values must not be replaced with transformed values", + ).SetDefaultValue( + toolkit.ParamsValue("true"), + ), + ) + + utils.DefaultTransformerRegistry.MustRegister(td) + } + +} + +type FakeTransformer struct { + columnName string + keepNull bool + columnIdx int + affectedColumns map[int]string + generate FakerFunc +} + +func MakeNewFakeTransformerFunction(generator FakerFunc) utils.NewTransformerFunc { + return func(ctx context.Context, driver *toolkit.Driver, parameters map[string]*toolkit.Parameter) (utils.Transformer, toolkit.ValidationWarnings, error) { + return NewFakeTransformer(ctx, driver, parameters, generator) + } +} + +func NewFakeTransformer( + ctx context.Context, driver *toolkit.Driver, parameters map[string]*toolkit.Parameter, generator FakerFunc, +) (utils.Transformer, toolkit.ValidationWarnings, error) { + p := parameters["column"] + var columnName string + var keepNull bool + if _, err := p.Scan(&columnName); err != nil { + return nil, nil, fmt.Errorf("unable to parse column param: %w", err) + } + + idx, _, ok := driver.GetColumnByName(columnName) + if !ok { + return nil, nil, fmt.Errorf("column with name %s is not found", columnName) + } + + p = parameters["keep_null"] + if _, err := p.Scan(&keepNull); err != nil { + return nil, nil, fmt.Errorf(`unable to scan "keep_null" param: %w`, err) + } + + affectedColumns := make(map[int]string) + affectedColumns[idx] = columnName + + return &FakeTransformer{ + columnName: columnName, + keepNull: keepNull, + columnIdx: idx, + affectedColumns: affectedColumns, + generate: generator, + }, nil, nil +} + +func (fts *FakeTransformer) GetAffectedColumns() map[int]string { + return fts.affectedColumns +} + +func (fts *FakeTransformer) Init(ctx context.Context) error { + return nil +} + +func (fts *FakeTransformer) Done(ctx context.Context) error { + return nil +} + +func (fts *FakeTransformer) Transform(ctx context.Context, r *toolkit.Record) (*toolkit.Record, error) { + valAny, err := r.GetRawColumnValueByIdx(fts.columnIdx) + if err != nil { + return nil, fmt.Errorf("unable to scan value: %w", err) + } + if valAny.IsNull && fts.keepNull { + return r, nil + } + + newValue := toolkit.NewRawValue([]byte(fts.generate()), false) + + if err := r.SetRawColumnValueByIdx(fts.columnIdx, newValue); err != nil { + return nil, fmt.Errorf("unable to set new value: %w", err) + } + + return r, nil + +} + +func init() { + generateFakerTransformers(utils.DefaultTransformerRegistry) +} diff --git a/internal/db/postgres/transformers/real_address.go b/internal/db/postgres/transformers/real_address.go new file mode 100644 index 00000000..d8108730 --- /dev/null +++ b/internal/db/postgres/transformers/real_address.go @@ -0,0 +1,199 @@ +// Copyright 2023 Greenmask +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package transformers + +import ( + "bytes" + "context" + "fmt" + "slices" + "text/template" + + "github.com/go-faker/faker/v4" + "github.com/greenmaskio/greenmask/internal/db/postgres/transformers/utils" + "github.com/greenmaskio/greenmask/pkg/toolkit" +) + +var RealAddressTransformerDefinition = utils.NewDefinition( + utils.NewTransformerProperties( + "RealAddress", + "Generate a real address", + ), + + NewRealAddressTransformer, + + toolkit.MustNewParameter( + "columns", + "affected column names."+ + "The structure:"+ + `{`+ + `"name": "type:string, required:true, description: column Name",`+ + `"template": "type:string, required:true, description: gotemplate with real address attributes injections",`+ + `"keep_null": "type:bool, required:false, description: keep null values",`+ + `}`, + ).SetRequired(true), +) + +type RealAddressTransformer struct { + columns []*RealAddressColumn + affectedColumns map[int]string + buf *bytes.Buffer +} + +type RealAddressColumn struct { + Name string `json:"name"` + KeepNull bool `json:"keep_null"` + Template string `json:"template"` + columnIdx int + tmpl *template.Template +} + +type RealAddressValue struct { + Address string `json:"address1"` + City string `json:"city"` + State string `json:"state"` + PostalCode string `json:"postalCode"` + Latitude float64 `json:"lat"` + Longitude float64 `json:"lng"` +} + +func NewRealAddressTransformer(ctx context.Context, driver *toolkit.Driver, parameters map[string]*toolkit.Parameter) (utils.Transformer, toolkit.ValidationWarnings, error) { + var warnings toolkit.ValidationWarnings + var columns []*RealAddressColumn + + p := parameters["columns"] + if _, err := p.Scan(&columns); err != nil { + return nil, nil, err + } + + affectedColumns := make(map[int]string) + + testBuf := bytes.NewBuffer(nil) + for _, col := range columns { + idx, _, ok := driver.GetColumnByName(col.Name) + if !ok { + return nil, nil, fmt.Errorf("column with name %s is not found", col.Name) + } + col.columnIdx = idx // Set the correct column index + affectedColumns[idx] = col.Name + + if col.Template == "" { + warnings = append(warnings, + toolkit.NewValidationWarning(). + SetMsg("template value must not be empty"). + SetSeverity(toolkit.ErrorValidationSeverity). + AddMeta("ColumnName", col.Name). + AddMeta("ParameterName", "columns"), + ) + continue + } + + tmpl, err := template.New("").Parse(col.Template) + if err != nil { + warnings = append(warnings, + toolkit.NewValidationWarning(). + SetMsg("error parsing template"). + SetSeverity(toolkit.ErrorValidationSeverity). + AddMeta("TemplateString", col.Template). + AddMeta("ColumnName", col.Name). + AddMeta("ParameterName", "columns"). + AddMeta("Error", err.Error()), + ) + continue + } + + testAddress := getRealAddress() + if err = tmpl.Execute(testBuf, testAddress); err != nil { + warnings = append(warnings, + toolkit.NewValidationWarning(). + SetMsg("error validating template"). + SetSeverity(toolkit.ErrorValidationSeverity). + AddMeta("TemplateString", col.Template). + AddMeta("ColumnName", col.Name). + AddMeta("ParameterName", "columns"). + AddMeta("Error", err.Error()), + ) + } + col.tmpl = tmpl + } + + if warnings.IsFatal() { + return nil, warnings, nil + } + + return &RealAddressTransformer{ + columns: columns, + affectedColumns: affectedColumns, + buf: bytes.NewBuffer(nil), + }, warnings, nil +} + +func (rat *RealAddressTransformer) GetAffectedColumns() map[int]string { + return rat.affectedColumns +} + +func (rat *RealAddressTransformer) Init(ctx context.Context) error { + return nil +} + +func (rat *RealAddressTransformer) Done(ctx context.Context) error { + return nil +} + +func (rat *RealAddressTransformer) Transform(ctx context.Context, r *toolkit.Record) (*toolkit.Record, error) { + address := getRealAddress() + + // Iterate over the columns and update the record with generated address data + for _, col := range rat.columns { + rawValue, err := r.GetRawColumnValueByIdx(col.columnIdx) + if err != nil { + return nil, err + } + if rawValue.IsNull && col.KeepNull { + return r, nil + } + + rat.buf.Reset() + if err = col.tmpl.Execute(rat.buf, address); err != nil { + return nil, fmt.Errorf("error executing template for column \"%s\": %w", col.Name, err) + } + + newRawValue := toolkit.NewRawValue(slices.Clone(rat.buf.Bytes()), false) + + // Update the record for the current column with the generated value + if err := r.SetRawColumnValueByIdx(col.columnIdx, newRawValue); err != nil { + return nil, fmt.Errorf("unable to set new value: %w", err) + } + } + + return r, nil +} + +func getRealAddress() *RealAddressValue { + addr := faker.GetRealAddress() + + return &RealAddressValue{ + Address: addr.Address, + City: addr.City, + State: addr.State, + PostalCode: addr.PostalCode, + Latitude: addr.Coordinates.Latitude, + Longitude: addr.Coordinates.Longitude, + } +} + +func init() { + utils.DefaultTransformerRegistry.MustRegister(RealAddressTransformerDefinition) +} diff --git a/internal/db/postgres/transformers/real_address_test.go b/internal/db/postgres/transformers/real_address_test.go new file mode 100644 index 00000000..e4fe2313 --- /dev/null +++ b/internal/db/postgres/transformers/real_address_test.go @@ -0,0 +1,107 @@ +// Copyright 2023 Greenmask +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package transformers + +import ( + "context" + "encoding/json" + "testing" + + "github.com/greenmaskio/greenmask/pkg/toolkit" + "github.com/stretchr/testify/require" +) + +func TestRealAddressTransformer_Transform(t *testing.T) { + driver, record := getDriverAndRecord("data", "somaval") + + columns := []*RealAddressColumn{ + { + Name: "data", + Template: "{{ .Address }} {{ .City }} {{ .State }} {{ .PostalCode }} {{ .Latitude }} {{ .Longitude }}", + }, + } + + rawData, err := json.Marshal(columns) + require.NoError(t, err) + + transformer, warnings, err := RealAddressTransformerDefinition.Instance( + context.Background(), + driver, + map[string]toolkit.ParamsValue{ + "columns": rawData, + }, + nil, + ) + + require.NoError(t, err) + require.Empty(t, warnings) + + _, err = transformer.Transform(context.Background(), record) + require.NoError(t, err) + rawValue, err := record.GetRawColumnValueByName("data") + require.NoError(t, err) + require.False(t, rawValue.IsNull) + require.Regexp(t, `.* \d+ \-?\d+.\d+ \-?\d+.\d+`, string(rawValue.Data)) +} + +func TestMakeNewFakeTransformerFunction_parsing_error(t *testing.T) { + driver, _ := getDriverAndRecord("data", "somaval") + + columns := []*RealAddressColumn{ + { + Name: "data", + Template: "{{ .Address }", + }, + } + + rawData, err := json.Marshal(columns) + require.NoError(t, err) + + _, warnings, err := RealAddressTransformerDefinition.Instance( + context.Background(), + driver, + map[string]toolkit.ParamsValue{ + "columns": rawData, + }, + nil, + ) + require.Len(t, warnings, 1) + require.Equal(t, "error parsing template", warnings[0].Msg) +} + +func TestMakeNewFakeTransformerFunction_validation_error(t *testing.T) { + driver, _ := getDriverAndRecord("data", "somaval") + + columns := []*RealAddressColumn{ + { + Name: "data", + Template: "{{ .Address1 }}", + }, + } + + rawData, err := json.Marshal(columns) + require.NoError(t, err) + + _, warnings, err := RealAddressTransformerDefinition.Instance( + context.Background(), + driver, + map[string]toolkit.ParamsValue{ + "columns": rawData, + }, + nil, + ) + require.Len(t, warnings, 1) + require.Equal(t, "error validating template", warnings[0].Msg) +}