Skip to content

Commit

Permalink
Implement support for depseudo functionality (#359)
Browse files Browse the repository at this point in the history
* Implemented support for map-sid for depseudo
* Added tests for depseudo
  • Loading branch information
RupinderKaurSSB authored Mar 22, 2024
1 parent 5b9b998 commit f75249c
Show file tree
Hide file tree
Showing 6 changed files with 147 additions and 0 deletions.
35 changes: 35 additions & 0 deletions src/dapla_pseudo/v1/depseudo.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import typing as t
from concurrent.futures import ThreadPoolExecutor
from concurrent.futures import as_completed
from datetime import date
from typing import Optional

import pandas as pd
Expand All @@ -14,10 +15,12 @@
from dapla_pseudo.constants import PredefinedKeys
from dapla_pseudo.constants import PseudoFunctionTypes
from dapla_pseudo.types import FileLikeDatasetDecl
from dapla_pseudo.utils import convert_to_date
from dapla_pseudo.v1.api_models import DaeadKeywordArgs
from dapla_pseudo.v1.api_models import DepseudonymizeFileRequest
from dapla_pseudo.v1.api_models import FF31KeywordArgs
from dapla_pseudo.v1.api_models import KeyWrapper
from dapla_pseudo.v1.api_models import MapSidKeywordArgs
from dapla_pseudo.v1.api_models import Mimetypes
from dapla_pseudo.v1.api_models import PseudoConfig
from dapla_pseudo.v1.api_models import PseudoFunction
Expand Down Expand Up @@ -231,6 +234,38 @@ def __init__(
self._fields = fields
self._existing_rules = [] if rules is None else rules

def with_stable_id(
self,
sid_snapshot_date: Optional[str | date] = None,
custom_key: Optional[str] = None,
) -> "Depseudonymize._Depseudonymizer":
"""Depseudonymize the selected fields with the default encryption algorithm (DAEAD).
1) Decrypt stable-id
2) Then map decrypted stable-id to fnr and return original fnr.
Args:
sid_snapshot_date (Optional[str | date], optional): Date representing SID-catalogue version to use.
Latest if unspecified. Format: YYYY-MM-DD
custom_key (Optional[PredefinedKeys | str], optional): Override the key to use for pseudonymization.
Must be one of the keys defined in PredefinedKeys. If not defined, uses the default key for this function (papis-common-key-1)
Returns:
Self: The object configured to be mapped to fnr
"""
kwargs = (
MapSidKeywordArgs(
key_id=custom_key,
snapshot_date=convert_to_date(sid_snapshot_date),
)
if custom_key
else MapSidKeywordArgs(snapshot_date=convert_to_date(sid_snapshot_date))
)
function = PseudoFunction(
function_type=PseudoFunctionTypes.MAP_SID, kwargs=kwargs
)
return self._rule_constructor(function)

def with_default_encryption(
self, custom_key: Optional[PredefinedKeys | str] = None
) -> "Depseudonymize._Depseudonymizer":
Expand Down
30 changes: 30 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,36 @@ def df_personer_fnr_daead_encrypted() -> pl.DataFrame:
)


@pytest.fixture
def df_personer_pseudo_stable_id() -> pl.DataFrame:
JSON_FILE = "tests/data/person_3_sid_deid.json"
return pl.read_json(
JSON_FILE,
schema={
"fnr": pl.String,
"fornavn": pl.String,
"etternavn": pl.String,
"kjonn": pl.String,
"fodselsdato": pl.String,
},
)


@pytest.fixture
def df_personer_depseudo_stable_id() -> pl.DataFrame:
JSON_FILE = "tests/data/person_3_sid.json"
return pl.read_json(
JSON_FILE,
schema={
"fnr": pl.String,
"fornavn": pl.String,
"etternavn": pl.String,
"kjonn": pl.String,
"fodselsdato": pl.String,
},
)


@pytest.fixture
def df_pandas_personer_fnr_daead_encrypted() -> pd.DataFrame:
JSON_FILE = "tests/data/personer_pseudonymized_default_encryption.json"
Expand Down
23 changes: 23 additions & 0 deletions tests/data/person_3_sid.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
[
{
"fnr": "11854898347",
"fornavn": "Mathias",
"etternavn": "Holm",
"kjonn": "M",
"fodselsdato": "020995"
},
{
"fnr": "01839899544",
"fornavn": "Gunnar",
"etternavn": "Jørgensen",
"kjonn": "M",
"fodselsdato": "060970"
},
{
"fnr": "02812289295",
"fornavn": "Kristoffer",
"etternavn": "Pedersen",
"kjonn": "M",
"fodselsdato": "180999"
}
]
23 changes: 23 additions & 0 deletions tests/data/person_3_sid_deid.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
[
{
"fnr": "jJuuj0i",
"fornavn": "Mathias",
"etternavn": "Holm",
"kjonn": "M",
"fodselsdato": "020995"
},
{
"fnr": "ylc9488",
"fornavn": "Gunnar",
"etternavn": "Jørgensen",
"kjonn": "M",
"fodselsdato": "060970"
},
{
"fnr": "mprMeNQ",
"fornavn": "Kristoffer",
"etternavn": "Pedersen",
"kjonn": "M",
"fodselsdato": "180999"
}
]
17 changes: 17 additions & 0 deletions tests/integration/test_integration_deseudonymize.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,20 @@ def test_depseudonymize_default_encryption(
.to_polars()
)
assert result.equals(df_personer)


@integration_test()
def test_depseudonymize_sid(
setup: Generator[None, None, None],
df_personer: pl.DataFrame,
df_personer_pseudo_stable_id: pl.DataFrame,
df_personer_depseudo_stable_id: pl.DataFrame,
) -> None:
result = (
Depseudonymize.from_polars(df_personer_pseudo_stable_id)
.on_fields("fnr")
.with_stable_id()
.run()
.to_polars()
)
assert result.equals(df_personer_depseudo_stable_id)
19 changes: 19 additions & 0 deletions tests/v1/test_depseudo.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,25 @@ def test_builder_fields_selector_multiple_fields(
]


@patch(f"{PKG}.pseudonymize_operation_field")
def test_builder_depseudo_function_selector_with_sid(
patch_depseudonymize_operation_field: MagicMock, df_personer: pl.DataFrame
) -> None:
mock_return_pseudonymize_operation_field(patch_depseudonymize_operation_field)
Depseudonymize.from_polars(df_personer).on_fields("fnr").with_stable_id().run()
patch_depseudonymize_operation_field.assert_called_once_with(
path="depseudonymize/field",
values=df_personer["fnr"].to_list(),
field_name="fnr",
pseudo_func=PseudoFunction(
function_type=PseudoFunctionTypes.MAP_SID, kwargs=MapSidKeywordArgs()
),
timeout=TIMEOUT_DEFAULT,
pseudo_client=ANY,
keyset=None,
)


@patch(f"{PKG}.pseudo_operation_file")
def test_builder_file_default(
patched_pseudo_operation_file: MagicMock, personer_pseudonymized_file_path: str
Expand Down

0 comments on commit f75249c

Please sign in to comment.