From 979e25bd6cf3a89ddfcbd3988eaf2abfaf17740b Mon Sep 17 00:00:00 2001 From: amc-corey-cox <69321580+amc-corey-cox@users.noreply.github.com> Date: Thu, 7 Nov 2024 15:07:07 -0600 Subject: [PATCH] Add tests for extra params --- src/koza/io/writer/writer.py | 4 +- ...st_tsvwriter_node_and_edge_extra_params.py | 190 ++++++++++++++++++ .../test_tsvwriter_node_only_extra_params.py | 44 ++++ 3 files changed, 235 insertions(+), 3 deletions(-) create mode 100644 tests/unit/test_tsvwriter_node_and_edge_extra_params.py create mode 100644 tests/unit/test_tsvwriter_node_only_extra_params.py diff --git a/src/koza/io/writer/writer.py b/src/koza/io/writer/writer.py index abd638f..7ef5063 100644 --- a/src/koza/io/writer/writer.py +++ b/src/koza/io/writer/writer.py @@ -52,9 +52,7 @@ def check_extra_fields(row_keys: Tuple, columns: Tuple) -> None: extra_fields = not set(row_keys).issubset(set(columns)) if extra_fields: - raise ValueError(f"Extra fields found in row: {set(row_keys) - set(columns)}") - - pass + raise ValueError(f"Extra fields found in row: {sorted(set(row_keys) - set(columns))}") @abstractmethod def write_edge(self, edge: dict): diff --git a/tests/unit/test_tsvwriter_node_and_edge_extra_params.py b/tests/unit/test_tsvwriter_node_and_edge_extra_params.py new file mode 100644 index 0000000..81eecd9 --- /dev/null +++ b/tests/unit/test_tsvwriter_node_and_edge_extra_params.py @@ -0,0 +1,190 @@ +import re + +import pytest +from biolink_model.datamodel.pydanticmodel_v2 import Disease, Gene, GeneToDiseaseAssociation + +from koza.io.writer.tsv_writer import TSVWriter + + +def test_tsv_writer_extra_node_params(): + """ + Writes a test tsv file + """ + g = Gene(id="HGNC:11603", in_taxon=["NCBITaxon:9606"], symbol="TBX4") + d = Disease(id="MONDO:0005002", name="chronic obstructive pulmonary disease") + a = GeneToDiseaseAssociation( + id="uuid:5b06e86f-d768-4cd9-ac27-abe31e95ab1e", + subject=g.id, + object=d.id, + predicate="biolink:contributes_to", + knowledge_level="not_provided", + agent_type="not_provided", + has_count=0, + has_total=20, + ) + ent = [g, d, a] + + node_properties = [ + "id", + "category", + "symbol", + "in_taxon", + "provided_by", + "source", + 'has_biological_sequence', + 'type', + 'xref', + 'description', + 'in_taxon_label', + 'synonym', + 'iri', + 'full_name', + ] + edge_properties = [ + "id", + "subject", + "predicate", + "object", + "category" "qualifiers", + "has_count", + "has_total", + "publications", + "provided_by", + 'subject_category', + 'object_direction_qualifier', + 'sex_qualifier', + 'negated', + 'has_percentage', + 'aggregator_knowledge_source', + 'has_evidence', + 'qualified_predicate', + 'qualifiers', + 'object_category', + 'timepoint', + 'subject_label_closure', + 'agent_type', + 'has_attribute', + 'category', + 'original_predicate', + 'iri', + 'frequency_qualifier', + 'type', + 'subject_namespace', + 'subject_closure', + 'object_label_closure', + 'object_namespace', + 'original_object', + 'subject_category_closure', + 'name', + 'has_quotient', + 'knowledge_level', + 'knowledge_source', + 'description', + 'subject_direction_qualifier', + 'deprecated', + 'original_subject', + 'object_category_closure', + ] + + outdir = "output/tests" + outfile = "tsvwriter-node-and-edge" + + t = TSVWriter(outdir, outfile, node_properties, edge_properties) + expected_message = "Extra fields found in row: ['deprecated', 'has_attribute', 'name']" + with pytest.raises(ValueError, match=re.escape(expected_message)): + t.write(ent) + + +def test_tsv_writer_extra_edge_params(): + """ + Writes a test tsv file + """ + g = Gene(id="HGNC:11603", in_taxon=["NCBITaxon:9606"], symbol="TBX4") + d = Disease(id="MONDO:0005002", name="chronic obstructive pulmonary disease") + a = GeneToDiseaseAssociation( + id="uuid:5b06e86f-d768-4cd9-ac27-abe31e95ab1e", + subject=g.id, + object=d.id, + predicate="biolink:contributes_to", + knowledge_level="not_provided", + agent_type="not_provided", + has_count=0, + has_total=20, + ) + ent = [g, d, a] + + node_properties = [ + "id", + "category", + "symbol", + "in_taxon", + "provided_by", + "source", + 'has_biological_sequence', + 'type', + 'xref', + 'description', + 'in_taxon_label', + 'synonym', + 'iri', + 'full_name', + 'deprecated', + 'has_attribute', + 'name', + ] + edge_properties = [ + "id", + "subject", + "predicate", + "object", + "category" "qualifiers", + "has_count", + "has_total", + "publications", + "provided_by", + 'subject_category', + 'object_direction_qualifier', + 'sex_qualifier', + 'negated', + 'has_percentage', + 'aggregator_knowledge_source', + 'has_evidence', + 'qualified_predicate', + 'qualifiers', + 'object_category', + 'timepoint', + 'subject_label_closure', + 'agent_type', + 'has_attribute', + 'category', + 'original_predicate', + 'iri', + 'frequency_qualifier', + 'type', + 'subject_namespace', + 'subject_closure', + 'object_label_closure', + 'object_namespace', + 'original_object', + 'subject_category_closure', + 'name', + 'has_quotient', + 'knowledge_level', + 'knowledge_source', + 'description', + 'subject_direction_qualifier', + 'deprecated', + 'original_subject', + 'object_category_closure', + 'object_aspect_qualifier', + 'object_closure', + 'primary_knowledge_source', + ] + + outdir = "output/tests" + outfile = "tsvwriter-node-and-edge" + + t = TSVWriter(outdir, outfile, node_properties, edge_properties) + expected_message = "Extra fields found in row: ['qualifier', 'retrieval_source_ids', 'subject_aspect_qualifier']" + with pytest.raises(ValueError, match=re.escape(expected_message)): + t.write(ent) diff --git a/tests/unit/test_tsvwriter_node_only_extra_params.py b/tests/unit/test_tsvwriter_node_only_extra_params.py new file mode 100644 index 0000000..77a703c --- /dev/null +++ b/tests/unit/test_tsvwriter_node_only_extra_params.py @@ -0,0 +1,44 @@ +import re + +import pytest +from biolink_model.datamodel.pydanticmodel_v2 import Disease, Gene + +from koza.io.writer.tsv_writer import TSVWriter + + +def test_tsv_writer(): + """ + Writes a test tsv file + """ + g = Gene(id="HGNC:11603", name="TBX4") + d = Disease(id="MONDO:0005002", name="chronic obstructive pulmonary disease") + + ent = [g, d] + + node_properties = [ + 'id', + 'category', + 'symbol', + 'in_taxon', + 'provided_by', + 'source', + 'has_biological_sequence', + 'iri', + 'type', + 'xref', + 'description', + 'synonym', + 'in_taxon_label', + 'deprecated', + 'full_name', + ] + + outdir = "output/tests" + outfile = "tsvwriter-node-only" + + t = TSVWriter(outdir, outfile, node_properties) + + t = TSVWriter(outdir, outfile, node_properties) + expected_message = "Extra fields found in row: ['has_attribute', 'name']" + with pytest.raises(ValueError, match=re.escape(expected_message)): + t.write(ent)