Skip to content

Commit

Permalink
Merge pull request #24 from GreenmaskIO/hash_fixes_v0.1.7
Browse files Browse the repository at this point in the history
Improved Hash transformer
  • Loading branch information
wwoytenko authored Mar 15, 2024
2 parents 75168c5 + b847d3a commit 7206a9a
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 14 deletions.
15 changes: 11 additions & 4 deletions docs/built_in_transformers/standard_transformers/hash.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,20 @@ Generate a hash of the text value using the `Scrypt` hash function under the hoo
| Name | Description | Default | Required | Supported DB types |
|------------|---------------------------------------------------------------------------------------------------------------------------------------|---------|----------|--------------------|
| column | The name of the column to be affected | | Yes | text, varchar |
| function | Hash algorithm to obfuscate data. Can be any of `md5`, `sha1`, `sha256`, `sha512`. | `sha1` | No | - |
| salt | Hex encoded salt string. This value may be provided via environment variable `GREENMASK_GLOBAL_SALT` | | Yes | text, varchar |
| function | Hash algorithm to obfuscate data. Can be any of `md5`, `sha1`, `sha256`, `sha512`, `sha3-224`, `sha3-254`, `sha3-384`, `sha3-512`. | `sha1` | No | - |
| max_length | Indicates whether to truncate the hash tail and specifies at what length. Can be any integer number, where `0` means "no truncation". | `0` | No | - |

## Example: Generate hash from job title

The following example generates a hash from the `jobtitle` into sha1 and truncates the results after the 10th character.

We can set the salt via the environment variable `GREENMASK_GLOBAL_SALT`:

```shell
export GREENMASK_GLOBAL_SALT="12343567baaa"
```

```yaml title="Hash transformer example"
- schema: "humanresources"
name: "employee"
Expand All @@ -25,8 +32,8 @@ The following example generates a hash from the `jobtitle` into sha1 and truncat
```bash title="Expected result"

| column name | original value | transformed |
|-------------|--------------------------|-------------|
| jobtitle | Research and Development | Zpmfe8F+LV |
| column name | original value | transformed |
|-------------|----------------------------------|-------------|
| jobtitle | Research and Development Manager | 3a456da5c5 |

```
66 changes: 57 additions & 9 deletions internal/db/postgres/transformers/hash.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ import (
"fmt"
"hash"
"strconv"
"strings"

"golang.org/x/crypto/sha3"

"github.com/greenmaskio/greenmask/internal/db/postgres/transformers/utils"
"github.com/greenmaskio/greenmask/pkg/toolkit"
Expand All @@ -45,10 +48,20 @@ var HashTransformerDefinition = utils.NewDefinition(
SetAllowedColumnTypes("text", "varchar"),
).SetRequired(true),

toolkit.MustNewParameter(
"salt",
"hex encoded salt string. This value may be provided via environment variable GREENMASK_GLOBAL_SALT",
).SetGetFromGlobalEnvVariable("GREENMASK_GLOBAL_SALT"),

toolkit.MustNewParameter(
"function",
"hash function name. Possible values sha1, sha256, sha512, md5",
).SetDefaultValue([]byte("sha1")).
fmt.Sprintf("hash function name. Possible values: %s",
strings.Join(
[]string{sha1Name, sha256Name, sha512Name, sha3224Name, sha3256Name, sha384Name, sha5124Name, md5Name},
", ",
),
),
).SetDefaultValue([]byte(sha3256Name)).
SetRawValueValidator(validateHashFunctionsParameter),

toolkit.MustNewParameter(
Expand All @@ -58,6 +71,17 @@ var HashTransformerDefinition = utils.NewDefinition(
SetRawValueValidator(validateMaxLengthParameter),
)

const (
sha1Name = "sha1"
sha256Name = "sha256"
sha512Name = "sha512"
sha3224Name = "sha3-224"
sha3256Name = "sha3-254"
sha384Name = "sha3-384"
sha5124Name = "sha3-512"
md5Name = "md5"
)

type HashTransformer struct {
columnName string
affectedColumns map[int]string
Expand All @@ -67,6 +91,7 @@ type HashTransformer struct {
encodedOutputLength int
hashBuf []byte
resultBuf []byte
salt []byte
}

func NewHashTransformer(
Expand Down Expand Up @@ -100,29 +125,48 @@ func NewHashTransformer(
var h hash.Hash
var hashFunctionLength int
switch hashFunctionName {
case "md5":
case md5Name:
h = md5.New()
hashFunctionLength = 16
case "sha1":
case sha1Name:
h = sha1.New()
hashFunctionLength = 20
case "sha256":
case sha256Name:
h = sha256.New()
hashFunctionLength = 32
case "sha512":
case sha512Name:
h = sha512.New()
hashFunctionLength = 64
case sha3224Name:
h = sha3.New224()
hashFunctionLength = 28
case sha3256Name:
h = sha3.New256()
hashFunctionLength = 32
case sha384Name:
h = sha3.New384()
hashFunctionLength = 48
case sha5124Name:
h = sha3.New512()
hashFunctionLength = 64
default:
return nil, nil, fmt.Errorf("unknown hash function \"%s\"", hashFunctionName)
}

p = parameters["salt"]
var salt string
if _, err := p.Scan(&salt); err != nil {
return nil, nil, fmt.Errorf("unable to scan \"salt\" parameter: %w", err)
}

return &HashTransformer{
columnName: columnName,
affectedColumns: affectedColumns,
columnIdx: idx,
maxLength: maxLength,
hashBuf: make([]byte, 0, hashFunctionLength),
resultBuf: make([]byte, hex.EncodedLen(hashFunctionLength)),
salt: []byte(salt),
encodedOutputLength: hex.EncodedLen(hashFunctionLength),
h: h,
}, nil, nil
Expand Down Expand Up @@ -150,6 +194,10 @@ func (ht *HashTransformer) Transform(ctx context.Context, r *toolkit.Record) (*t
}

defer ht.h.Reset()
_, err = ht.h.Write(ht.salt)
if err != nil {
return nil, fmt.Errorf("unable to write salt into writer: %w", err)
}
_, err = ht.h.Write(val.Data)
if err != nil {
return nil, fmt.Errorf("unable to write raw data into writer: %w", err)
Expand All @@ -174,7 +222,7 @@ func (ht *HashTransformer) Transform(ctx context.Context, r *toolkit.Record) (*t
func validateHashFunctionsParameter(p *toolkit.Parameter, v toolkit.ParamsValue) (toolkit.ValidationWarnings, error) {
functionName := string(v)
switch functionName {
case "md5", "sha1", "sha256", "sha512":
case sha1Name, sha256Name, sha512Name, sha3224Name, sha3256Name, sha384Name, sha5124Name, md5Name:
return nil, nil
}
return toolkit.ValidationWarnings{
Expand All @@ -185,11 +233,11 @@ func validateHashFunctionsParameter(p *toolkit.Parameter, v toolkit.ParamsValue)
}

func validateMaxLengthParameter(p *toolkit.Parameter, v toolkit.ParamsValue) (toolkit.ValidationWarnings, error) {
max_length, err := strconv.ParseInt(string(v), 10, 32)
maxLength, err := strconv.ParseInt(string(v), 10, 32)
if err != nil {
return nil, fmt.Errorf("error parsing \"max_length\" as integer: %w", err)
}
if max_length >= 0 {
if maxLength >= 0 {
return nil, nil
}
return toolkit.ValidationWarnings{
Expand Down
15 changes: 14 additions & 1 deletion pkg/toolkit/parameter.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"encoding/json"
"errors"
"fmt"
"os"
"slices"
"time"

Expand Down Expand Up @@ -127,6 +128,8 @@ type Parameter struct {
// ColumnProperties - detail info about expected column properties that may help to diagnose the table schema
// and perform validation procedure Plays only with IsColumn
ColumnProperties *ColumnProperties `mapstructure:"column_properties" json:"column_properties,omitempty"`
// GlobalEnvVariable - the nane of the global environment variable that can be used on empty input
GetFromGlobalEnvVariable string `mapstructure:"get_from_global_env_variable" json:"get_from_global_env_variable,omitempty"`
// Unmarshaller - unmarshal function for the parameter raw data []byte. Using by default json.Unmarshal function
Unmarshaller Unmarshaller `json:"-"`
// RawValueValidator - raw value validator function that performs assertion and cause ValidationWarnings if it
Expand Down Expand Up @@ -330,6 +333,11 @@ func (p *Parameter) SetDefaultValue(v ParamsValue) *Parameter {
return p
}

func (p *Parameter) SetGetFromGlobalEnvVariable(v string) *Parameter {
p.GetFromGlobalEnvVariable = v
return p
}

func (p *Parameter) Copy() *Parameter {
cp := *p
cp.value = nil
Expand All @@ -341,7 +349,12 @@ func (p *Parameter) Init(driver *Driver, types []*Type, params []*Parameter, raw
var warnings ValidationWarnings
p.Driver = driver
p.rawValue = nil
p.rawValue = slices.Clone(rawValue)
if p.GetFromGlobalEnvVariable != "" {
p.rawValue = []byte(os.Getenv(p.GetFromGlobalEnvVariable))
}
if rawValue != nil {
p.rawValue = slices.Clone(rawValue)
}

if rawValue == nil {
if p.Required {
Expand Down

0 comments on commit 7206a9a

Please sign in to comment.