-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add integration test for the combinations of input/output datatypes (#…
…352) * Add integration test for the combinations of input/output datatypes * Run pre-commit
- Loading branch information
Showing
11 changed files
with
192 additions
and
181 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
90 changes: 0 additions & 90 deletions
90
tests/data/datadoc/expected_metadata_test_pseudonymize_default_encryption_all_fields.json
This file was deleted.
Oops, something went wrong.
29 changes: 29 additions & 0 deletions
29
tests/data/datadoc/expected_metadata_test_pseudonymize_papis_compatible_encryption.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
{ | ||
"document_version": "0.0.1", | ||
"datadoc": null, | ||
"pseudonymization": { | ||
"document_version": "0.1.0", | ||
"pseudo_dataset": null, | ||
"pseudo_variables": [ | ||
{ | ||
"short_name": "fnr", | ||
"data_element_path": "fnr", | ||
"data_element_pattern": "**", | ||
"stable_identifier_type": null, | ||
"stable_identifier_version": null, | ||
"encryption_algorithm": "TINK-FPE", | ||
"encryption_key_reference": "papis-common-key-1", | ||
"encryption_algorithm_parameters": [ | ||
{ | ||
"keyId": "papis-common-key-1" | ||
}, | ||
{ | ||
"strategy": "skip" | ||
} | ||
], | ||
"source_variable": null, | ||
"source_variable_datatype": null | ||
} | ||
] | ||
} | ||
} |
File renamed without changes.
77 changes: 23 additions & 54 deletions
77
tests/data/personer_pseudonymized_default_encryption.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,54 +1,23 @@ | ||
{ | ||
"columns": [ | ||
{ | ||
"name": "fnr", | ||
"datatype": "String", | ||
"bit_settings": "", | ||
"values": [ | ||
"AWIRfKLSNfR0ID+wBzogEcUT7JQPayk7Gosij6SXr8s=", | ||
"AWIRfKKLagk0LqYCKpiC4xfPkHqIWGVfc3wg5gUwRNE=", | ||
"AWIRfKIzL1T9iZqt+pLjNbHMsLa0aKSszsRrLiLSAAg=" | ||
] | ||
}, | ||
{ | ||
"name": "fornavn", | ||
"datatype": "String", | ||
"bit_settings": "", | ||
"values": [ | ||
"AWIRfKKWWRC1hURqsYw4S/h/NitvuP6bO/R7", | ||
"AWIRfKJuYBaBQIXIprRO9UFDXNLd4YXcHtY=", | ||
"AWIRfKKsIDQgWLnpsSln38z1RSfHjjL8FS4=" | ||
] | ||
}, | ||
{ | ||
"name": "etternavn", | ||
"datatype": "String", | ||
"bit_settings": "", | ||
"values": [ | ||
"AWIRfKIKAGiRoGTd/Cid5gxsIDx4H1ya6w==", | ||
"AWIRfKLzOfzOw+Bdo9zIa4savOOeAiEr", | ||
"AWIRfKIKAGiRoGTd/Cid5gxsIDx4H1ya6w==" | ||
] | ||
}, | ||
{ | ||
"name": "kjonn", | ||
"datatype": "String", | ||
"bit_settings": "", | ||
"values": [ | ||
"AWIRfKJuDy4LnWA7y/9fGHhJg3hZ0Q==", | ||
"AWIRfKJuDy4LnWA7y/9fGHhJg3hZ0Q==", | ||
"AWIRfKJuDy4LnWA7y/9fGHhJg3hZ0Q==" | ||
] | ||
}, | ||
{ | ||
"name": "fodselsdato", | ||
"datatype": "String", | ||
"bit_settings": "", | ||
"values": [ | ||
"AWIRfKJ0wod0IfL/dSrzF7pPLIgDSyNCofor", | ||
"AWIRfKJNjb7vLdKwCLHuJPIQUUqdP8/nDYU0", | ||
"AWIRfKKqsp5+sjjRDpFwUfMdo16j6URwkFuE" | ||
] | ||
} | ||
] | ||
} | ||
[ | ||
{ | ||
"fnr": "AWIRfKLSNfR0ID+wBzogEcUT7JQPayk7Gosij6SXr8s=", | ||
"fornavn": "Donald", | ||
"etternavn": "Duck", | ||
"kjonn": "M", | ||
"fodselsdato": "020995" | ||
}, | ||
{ | ||
"fnr": "AWIRfKKLagk0LqYCKpiC4xfPkHqIWGVfc3wg5gUwRNE=", | ||
"fornavn": "Mikke", | ||
"etternavn": "Mus", | ||
"kjonn": "M", | ||
"fodselsdato": "060970" | ||
}, | ||
{ | ||
"fnr": "AWIRfKIzL1T9iZqt+pLjNbHMsLa0aKSszsRrLiLSAAg=", | ||
"fornavn": "Anton", | ||
"etternavn": "Duck", | ||
"kjonn": "M", | ||
"fodselsdato": "180999" | ||
} | ||
] |
6 changes: 3 additions & 3 deletions
6
tests/data/personer_pseudonymized.json → ...onymized_papis_compatible_encryption.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
import json | ||
import typing as t | ||
from collections.abc import Generator | ||
from pathlib import Path | ||
|
||
import pandas as pd | ||
import polars as pl | ||
import pytest | ||
|
||
from dapla_pseudo import Pseudonymize | ||
from tests.integration.utils import df_pandas_personer_fnr_daead_encrypted | ||
from tests.integration.utils import df_personer | ||
from tests.integration.utils import df_personer_fnr_daead_encrypted | ||
from tests.integration.utils import df_personer_pandas | ||
from tests.integration.utils import integration_test | ||
from tests.integration.utils import personer_file_path | ||
from tests.integration.utils import setup | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"output_func", | ||
[("file"), ("pandas"), ("polars")], | ||
) | ||
@pytest.mark.parametrize( | ||
"input_func", | ||
[("file"), ("pandas"), ("polars")], | ||
) | ||
@integration_test() | ||
def test_pseudonymize_input_output_funcs( | ||
setup: Generator[None, None, None], | ||
input_func: t.Literal["file", "pandas", "polars"], | ||
output_func: t.Literal["file", "pandas", "polars"], | ||
tmp_path: Path, | ||
personer_file_path: str, | ||
df_personer_pandas: pd.DataFrame, | ||
df_personer: pl.DataFrame, | ||
df_personer_fnr_daead_encrypted: pl.DataFrame, | ||
df_pandas_personer_fnr_daead_encrypted: pd.DataFrame, | ||
) -> None: | ||
"""This test runs several times, once for every combination of the possible input and output datatypes. | ||
It is intended to end-to-end-test for the conversion between data types, e.g. Polars DataFrame -> File. | ||
""" | ||
match input_func: | ||
case "file": | ||
pseudonymizer = Pseudonymize.from_file(personer_file_path) | ||
case "pandas": | ||
pseudonymizer = Pseudonymize.from_pandas(df_personer_pandas) | ||
case "polars": | ||
pseudonymizer = Pseudonymize.from_polars(df_personer) | ||
|
||
result = pseudonymizer.on_fields("fnr").with_default_encryption().run() | ||
|
||
match output_func: | ||
case "file": | ||
file_path = tmp_path / "personer_pseudo.json" | ||
result.to_file(str(file_path)) | ||
|
||
expected = json.loads( | ||
open("tests/data/personer_pseudonymized_default_encryption.json").read() | ||
) | ||
actual = json.loads(file_path.open().read()) | ||
|
||
assert expected == actual | ||
case "pandas": | ||
df_pandas = result.to_pandas() | ||
assert df_pandas_personer_fnr_daead_encrypted.equals(df_pandas) | ||
case "polars": | ||
df_polars = result.to_polars() | ||
assert df_personer_fnr_daead_encrypted.equals(df_polars) |
Oops, something went wrong.