Skip to content

Commit

Permalink
Tighten polars version (#421)
Browse files Browse the repository at this point in the history
* Use Polars >= v1

* Update poetry.lock. Bump version

* Remove NumPy constraint

* Loosen PyArrow version

* Downgrade Polars version. Add tests for nulled structs

* make pre-commit happy

* Make mypy happy
  • Loading branch information
mallport authored Nov 26, 2024
1 parent a2c0709 commit 5348268
Show file tree
Hide file tree
Showing 10 changed files with 803 additions and 672 deletions.
1,332 changes: 671 additions & 661 deletions poetry.lock

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "dapla-toolbelt-pseudo"
version = "2.2.3"
version = "2.2.4"
description = "Pseudonymization extensions for Dapla"
authors = ["Dapla Developers <[email protected]>"]
license = "MIT"
Expand All @@ -26,7 +26,7 @@ python-ulid = ">=2.2.0"
cryptography = ">41.0.0"
dapla-toolbelt = ">=1.7.0"
fsspec = ">=2023.5.0"
polars = ">=1.0.0"
polars = ">=1.0.0, <1.2"
pygments = ">2.15.0"
click = ">=8.0.1"
ssb-datadoc-model = ">=5.0.0"
Expand Down
8 changes: 4 additions & 4 deletions src/dapla_pseudo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,12 @@
from dapla_pseudo.v1.validation import Validator

__all__ = [
"PseudoClient",
"Pseudonymize",
"Depseudonymize",
"Repseudonymize",
"Validator",
"PseudoClient",
"PseudoKeyset",
"PseudoRule",
"Pseudonymize",
"Repseudonymize",
"SchemaTraverser",
"Validator",
]
6 changes: 3 additions & 3 deletions src/dapla_pseudo/v1/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@
from dapla_pseudo.v1.validation import Validator

__all__ = [
"Depseudonymize",
"PseudoClient",
"PseudoKeyset",
"PseudoRule",
"Pseudonymize",
"Depseudonymize",
"Repseudonymize",
"Validator",
"PseudoKeyset",
"PseudoRule",
]
4 changes: 2 additions & 2 deletions src/dapla_pseudo/v1/supported_file_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def write_from_dicts(
case SupportedOutputFileFormat.PARQUET:
df = pl.DataFrame(data)
# type hints lying
df.write_parquet(file_like)
df.write_parquet(file_like) # type: ignore[arg-type]
case SupportedOutputFileFormat.CSV:
df = pl.DataFrame(data)
df.write_csv(file_like)
Expand Down Expand Up @@ -123,4 +123,4 @@ def write_from_df(
case SupportedOutputFileFormat.XML:
df.to_pandas().to_xml(file_like, **kwargs)
case SupportedOutputFileFormat.PARQUET:
df.write_parquet(file_like, **kwargs)
df.write_parquet(file_like, **kwargs) # type: ignore[arg-type]
16 changes: 16 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,22 @@ def df_personer_hierarchical_redacted() -> pl.DataFrame:
)


@pytest_cases.fixture()
def df_personer_hierarchical_null() -> pl.DataFrame:
JSON_FILE = "tests/data/personer_hierarchical_null.json"
return pl.read_json(
JSON_FILE,
)


@pytest_cases.fixture()
def df_personer_hierarchical_null_pseudonymized() -> pl.DataFrame:
JSON_FILE = "tests/data/personer_hierarchical_null_pseudonymized.json"
return pl.read_json(
JSON_FILE,
)


@pytest_cases.fixture()
def df_personer_hierarchical_inner_list() -> pl.DataFrame:
JSON_FILE = "tests/data/personer_hierarchical_inner_list.json"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"document_version": "0.0.1",
"pseudonymization": {
"document_version": "0.1.0",
"pseudo_variables": [
{
"short_name": "fnr",
"data_element_path": "person_info.fnr",
"data_element_pattern": "**/person_info/fnr",
"encryption_algorithm": "TINK-DAEAD",
"encryption_key_reference": "ssb-common-key-1",
"encryption_algorithm_parameters": [
{
"keyId": "ssb-common-key-1"
}
]
}
]
}
}
25 changes: 25 additions & 0 deletions tests/data/personer_hierarchical_null.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
[
{
"person_info": null,
"kjonn": "M",
"fodselsdato": "020995"
},
{
"person_info": {
"fnr": "01839899544",
"fornavn": "Mikke",
"etternavn": "Mus"
},
"kjonn": "M",
"fodselsdato": "060970"
},
{
"person_info": {
"fnr": "16910599481",
"fornavn": "Anton",
"etternavn": "Duck"
},
"kjonn": "M",
"fodselsdato": "180999"
}
]
29 changes: 29 additions & 0 deletions tests/data/personer_hierarchical_null_pseudonymized.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
[
{
"person_info": {
"fnr": null,
"fornavn": null,
"etternavn": null
},
"kjonn": "M",
"fodselsdato": "020995"
},
{
"person_info": {
"fnr": "AWIRfKKLagk0LqYCKpiC4xfPkHqIWGVfc3wg5gUwRNE=",
"fornavn": "Mikke",
"etternavn": "Mus"
},
"kjonn": "M",
"fodselsdato": "060970"
},
{
"person_info": {
"fnr": "AWIRfKIzL1T9iZqt+pLjNbHMsLa0aKSszsRrLiLSAAg=",
"fornavn": "Anton",
"etternavn": "Duck"
},
"kjonn": "M",
"fodselsdato": "180999"
}
]
31 changes: 31 additions & 0 deletions tests/v1/integration/test_pseudonymize.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,37 @@ def test_pseudonymize_hierarchical(
assert_frame_equal(result.to_polars(), df_personer_hierarchical_pseudonymized)


@pytest.mark.usefixtures("setup")
@integration_test()
def test_pseudonymize_hierarchical_null(
df_personer_hierarchical_null: pl.DataFrame,
df_personer_hierarchical_null_pseudonymized: pl.DataFrame,
) -> None:
rule = PseudoRule(
name="my-rule",
func=PseudoFunction(
function_type=PseudoFunctionTypes.DAEAD, kwargs=DaeadKeywordArgs()
),
pattern="**/person_info/fnr",
path="person_info/fnr",
)
result = (
Pseudonymize.from_polars(df_personer_hierarchical_null)
.add_rules(rule)
.run(hierarchical=True)
)

current_function_name = get_calling_function_name()
expected_metadata_container = get_expected_datadoc_metadata_container(
current_function_name
)

assert result.datadoc == expected_metadata_container.model_dump_json(
exclude_none=True
)
assert_frame_equal(result.to_polars(), df_personer_hierarchical_null_pseudonymized)


@pytest.mark.usefixtures("setup")
@integration_test()
def test_pseudonymize_hierarchical_redact(
Expand Down

0 comments on commit 5348268

Please sign in to comment.