Skip to content

Commit

Permalink
Merge pull request #21 from HenningTimm/conversion_and_unit_tests
Browse files Browse the repository at this point in the history
Conversion and unit tests
  • Loading branch information
HenningTimm authored Dec 10, 2024
2 parents 6ad0265 + 33d0fa8 commit 397a498
Show file tree
Hide file tree
Showing 12 changed files with 187 additions and 31 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ yml2block.egg-info
poetry.lock
yml2block/__pycache__/
tests/__pycache__/
tests/valid/*_converted.tsv
6 changes: 4 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
# Changelog

## Version 0.7.0 (2024-11-??)
## Version 0.7.0 (2024-12-10)

- Fixed bug where converting files without lint violations would result in an error (#16). Thanks @Athemis
- Fixed bug where non-string watermarks caused an error (#19). Thanks @Athemis

- Added minimal test for conversion function.
- Add tests to PRs.
- Add lint `b003` that checks if all titles within the DatasetField block are unique.

## Version 0.6.0 (2024-03-18)

Expand Down
30 changes: 28 additions & 2 deletions tests/integration_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,42 @@ def test_basic_execution_works():
result = runner.invoke(yml2block.__main__.main, ["--help"])
assert result.exit_code == 0, result

result = runner.invoke(yml2block.__main__.main, ["check", "--help"])
assert result.exit_code == 0, result

def test_minimal_valid_example():
"""This test ensures that a valid file is translated without throwing an error."""
result = runner.invoke(yml2block.__main__.main, ["convert", "--help"])
assert result.exit_code == 0, result


def test_minimal_valid_example_check():
"""This test ensures that a valid files do not throw errors during check."""
runner = CliRunner()
result = runner.invoke(
yml2block.__main__.main, ["check", "tests/valid/minimal_working_example.yml"]
)
assert result.exit_code == 0, result


def test_minimal_valid_example_convert():
"""This test ensures that a valid file is translated without throwing an error."""
runner = CliRunner()
path_expected = "tests/valid/minimal_working_example_expected.tsv"
path_output = "/tmp/y2b_mwe.tsv"
result = runner.invoke(
yml2block.__main__.main,
["convert", "tests/valid/minimal_working_example.yml", "-o", path_output],
)
assert result.exit_code == 0, result.stdout
with open(path_output, "r") as converted_file, open(
path_expected, "r"
) as expected_file:
converted_tsv = converted_file.read()
expected_tsv = expected_file.read()
assert converted_tsv == expected_tsv
assert len(converted_tsv) > 0
assert len(expected_tsv) > 0


def test_duplicate_names_detected():
"""This test ensures that duplicate names are detected."""
runner = CliRunner()
Expand Down
4 changes: 4 additions & 0 deletions tests/snippets/four_item_snippet.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#metadataBlock
#datasetField
#controlledVocabulary
#controlledVocabulary
2 changes: 2 additions & 0 deletions tests/snippets/minimal_snippet.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#metadataBlock
#datasetField
1 change: 1 addition & 0 deletions tests/snippets/one_item_snippet.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
#metadataBlock
3 changes: 3 additions & 0 deletions tests/snippets/three_item_snippet.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#metadataBlock
#datasetField
#controlledVocabulary
70 changes: 69 additions & 1 deletion tests/unit_tests.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from yml2block.__main__ import guess_input_type
from yml2block.rules import Level
from yml2block.tsv_input import _identify_break_points


def test_input_guessing_valid_tsv():
Expand Down Expand Up @@ -116,4 +117,71 @@ def test_input_guessing_invalid_extension():
guessed_type, violations = guess_input_type(path)
assert len(violations) == 1
assert violations[0].level == Level.ERROR
assert guessed_type == False
assert guessed_type is False


def test_breakpoint_identification():
""" """
test_cases = [
{
"file": "tests/valid/minimal_working_example_expected.tsv",
"expected_blocks": (
"#metadataBlock\tname\tdataverseAlias\tdisplayName\t\t\t\t\t\t\t\t\t\t\t\t\n\tValidExample\t\tValid\t\t\t\t\t\t\t\t\t\t\t\t\n",
"#datasetField\tname\ttitle\tdescription\twatermark\tfieldType\tdisplayOrder\tdisplayFormat\tadvancedSearchField\tallowControlledVocabulary\tallowmultiples\tfacetable\tdisplayoncreate\trequired\tparent\tmetadatablock_id\n\tDescription\tDescription\tThis field describes.\t\ttextbox\t\t\tTRUE\tFALSE\tFALSE\tFALSE\tTRUE\tTRUE\t\tValidExample\n\tAnswer\tAnswer\t\t\ttext\t\t\tTRUE\tTRUE\tTRUE\tTRUE\tTRUE\tTRUE\t\tValidExample\n",
"#controlledVocabulary\tDatasetField\tValue\tidentifier\tdisplayOrder\t\t\t\t\t\t\t\t\t\t\t\n\tAnswerYes\tYes\tanswer_positive\t\t\t\t\t\t\t\t\t\t\t\t\n\tAnswerNo\tNo\tanswer_negative\t\t\t\t\t\t\t\t\t\t\t\t\n\tAnswerMaybeSo\tMaybe\tanswer_unclear\t\t\t\t\t\t\t\t\t\t\t\t\n",
),
"expected_violations": [],
},
{
"file": "tests/snippets/minimal_snippet.tsv",
"expected_blocks": (
"#metadataBlock\n",
"#datasetField\n",
None,
),
"expected_violations": [],
},
{
"file": "tests/snippets/three_item_snippet.tsv",
"expected_blocks": (
"#metadataBlock\n",
"#datasetField\n",
"#controlledVocabulary\n",
),
"expected_violations": [],
},
{
"file": "tests/snippets/one_item_snippet.tsv",
"expected_blocks": "#metadataBlock\n",
"expected_violations": [
{
"level": Level.WARNING,
"rule": "identify_break_points",
}
],
},
{
"file": "tests/snippets/four_item_snippet.tsv",
"expected_blocks": "#metadataBlock\n#datasetField\n#controlledVocabulary\n#controlledVocabulary\n",
"expected_violations": [
{
"level": Level.WARNING,
"rule": "identify_break_points",
}
],
},
]

for test_case in test_cases:
with open(test_case["file"], "r") as case_file:
split_blocks, violations = _identify_break_points(case_file.read())

# Ensure the expected blocks are returned
# and that the correct number is returned
assert split_blocks == test_case["expected_blocks"]
assert len(violations) == len(test_case["expected_violations"])

# Ensure the expected error are detected
for vio, exp_vio in zip(violations, test_case["expected_violations"]):
assert vio.level == exp_vio["level"]
assert vio.rule == exp_vio["rule"]
9 changes: 9 additions & 0 deletions tests/valid/minimal_working_example_expected.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#metadataBlock name dataverseAlias displayName
ValidExample Valid
#datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable displayoncreate required parent metadatablock_id
Description Description This field describes. textbox TRUE FALSE FALSE FALSE TRUE TRUE ValidExample
Answer Answer text TRUE TRUE TRUE TRUE TRUE TRUE ValidExample
#controlledVocabulary DatasetField Value identifier displayOrder
AnswerYes Yes answer_positive
AnswerNo No answer_negative
AnswerMaybeSo Maybe answer_unclear
70 changes: 44 additions & 26 deletions yml2block/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,29 +55,37 @@ def total_violations(self):
def __iter__(self):
"""Iterate over violations and max severity level per file."""
for filename, violations in self.violations.items():
yield (filename, violations, min(violations, key=lambda x: x.level).level)
yield (filename, violations, self.max_severity(filename))

def max_severity(self, file_path):
"""Get the highest error severity level for the file and Level.NONE
if the file has no violations.
"""
try:
violation_list = self.violations[file_path]
if len(violation_list) == 0:
# Catch empty violation lists for well-behaved files
return Level.NONE
else:
return min(violation_list, key=lambda x: x.level).level
except KeyError:
print(f"The file {file_path} is not present in this list of files.")
raise

def safe_conversion_possible(self, file_path, strict=False):
"""Check if the file can be safely converted to tsv."""
if self.violations:
try:
max_severity = min(
self.violations[file_path], key=lambda x: x.level
).level
if max_severity == Level.ERROR:
return False
elif max_severity == Level.WARNING:
if strict:
return False
else:
return True
else:
return True
except KeyError:
print(f"The file {file_path} is not present in this list of files.")
raise
else:
return True

max_severity = self.max_severity(file_path)

match max_severity:
case Level.NONE:
return True
case Level.WARNING:
return False if strict else True
case Level.ERROR:
return False
case _:
raise ValueError(f"Unexpected severity level {max_severity}.")


def guess_input_type(input_path):
Expand Down Expand Up @@ -117,7 +125,7 @@ def return_violations(lint_violations, warn_ec, verbose):

if len(lint_violations) == 0:
if verbose:
print("\nAll Checks passed!\n\n")
print("\nAll Checks passed! 🎉\n\n")
sys.exit(0)
else:
max_severity = None
Expand All @@ -127,15 +135,25 @@ def return_violations(lint_violations, warn_ec, verbose):
print()
print(file_path)
print(100 * "-")
print(f"A total of {len(violations)} lint(s) failed.")
print(f"Highest error level was '{max_severity.name}'")
for violation in violations:
print(violation)
print("Errors detected. File(s) cannot safely be converted to TSV.")
if violations:
print(f"A total of {len(violations)} lint(s) failed.")
print(f"Highest error level was '{max_severity.name}'")
for violation in violations:
print(violation)
else:
print(f"All checks passed for {file_path}! 🎉")

if max_severity == Level.ERROR:
print("Errors detected. File(s) cannot safely be converted to TSV.")
sys.exit(1)
elif max_severity == Level.WARNING:
print(
"Warnings detected. File(s) can probably not be safely converted to TSV."
)
sys.exit(warn_ec)
elif max_severity == Level.NONE:
print("\nAll Checks passed! 🎉 Safe conversion is possible.\n\n")
sys.exit(0)
else:
sys.exit(1)

Expand Down
1 change: 1 addition & 0 deletions yml2block/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ def skip(self, lint):
class Level(IntEnum):
"""Provide numeric error levels."""

NONE = 3
WARNING = 2
ERROR = 1

Expand Down
21 changes: 21 additions & 0 deletions yml2block/suggestions.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,24 @@ def fix_required_keys_present(missing_keys, list_item, tsv_keyword):
"""Return list of missing keys."""
name = identify_entry(list_item, tsv_keyword)
return f"Missing keys '{missing_keys}' for '{name}' in block '{tsv_keyword}'."


def fix_identify_breaking_points(full_file, break_points):
"""Suggest fixes for too little or too many detected blocks."""
match len(break_points):
case 0:
# No line starts with #
# are you passing the right file?
...
case 1:
# Only one line starts with #
# At least two blocks are required for a reasonable metadata schema
...
case 2 | 3:
# This case should never be reached, those files are fine
# Raise an exception if I get here in error handling
...
case _:
# More than the required number of blocks is present.
# Identify what is going on.
...

0 comments on commit 397a498

Please sign in to comment.