Skip to content

Commit

Permalink
Add tests for extra params
Browse files Browse the repository at this point in the history
  • Loading branch information
amc-corey-cox committed Nov 7, 2024
1 parent 6c87e0f commit 979e25b
Show file tree
Hide file tree
Showing 3 changed files with 235 additions and 3 deletions.
4 changes: 1 addition & 3 deletions src/koza/io/writer/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,7 @@ def check_extra_fields(row_keys: Tuple, columns: Tuple) -> None:

extra_fields = not set(row_keys).issubset(set(columns))
if extra_fields:
raise ValueError(f"Extra fields found in row: {set(row_keys) - set(columns)}")

pass
raise ValueError(f"Extra fields found in row: {sorted(set(row_keys) - set(columns))}")

@abstractmethod
def write_edge(self, edge: dict):
Expand Down
190 changes: 190 additions & 0 deletions tests/unit/test_tsvwriter_node_and_edge_extra_params.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
import re

import pytest
from biolink_model.datamodel.pydanticmodel_v2 import Disease, Gene, GeneToDiseaseAssociation

from koza.io.writer.tsv_writer import TSVWriter


def test_tsv_writer_extra_node_params():
"""
Writes a test tsv file
"""
g = Gene(id="HGNC:11603", in_taxon=["NCBITaxon:9606"], symbol="TBX4")
d = Disease(id="MONDO:0005002", name="chronic obstructive pulmonary disease")
a = GeneToDiseaseAssociation(
id="uuid:5b06e86f-d768-4cd9-ac27-abe31e95ab1e",
subject=g.id,
object=d.id,
predicate="biolink:contributes_to",
knowledge_level="not_provided",
agent_type="not_provided",
has_count=0,
has_total=20,
)
ent = [g, d, a]

node_properties = [
"id",
"category",
"symbol",
"in_taxon",
"provided_by",
"source",
'has_biological_sequence',
'type',
'xref',
'description',
'in_taxon_label',
'synonym',
'iri',
'full_name',
]
edge_properties = [
"id",
"subject",
"predicate",
"object",
"category" "qualifiers",
"has_count",
"has_total",
"publications",
"provided_by",
'subject_category',
'object_direction_qualifier',
'sex_qualifier',
'negated',
'has_percentage',
'aggregator_knowledge_source',
'has_evidence',
'qualified_predicate',
'qualifiers',
'object_category',
'timepoint',
'subject_label_closure',
'agent_type',
'has_attribute',
'category',
'original_predicate',
'iri',
'frequency_qualifier',
'type',
'subject_namespace',
'subject_closure',
'object_label_closure',
'object_namespace',
'original_object',
'subject_category_closure',
'name',
'has_quotient',
'knowledge_level',
'knowledge_source',
'description',
'subject_direction_qualifier',
'deprecated',
'original_subject',
'object_category_closure',
]

outdir = "output/tests"
outfile = "tsvwriter-node-and-edge"

t = TSVWriter(outdir, outfile, node_properties, edge_properties)
expected_message = "Extra fields found in row: ['deprecated', 'has_attribute', 'name']"
with pytest.raises(ValueError, match=re.escape(expected_message)):
t.write(ent)


def test_tsv_writer_extra_edge_params():
"""
Writes a test tsv file
"""
g = Gene(id="HGNC:11603", in_taxon=["NCBITaxon:9606"], symbol="TBX4")
d = Disease(id="MONDO:0005002", name="chronic obstructive pulmonary disease")
a = GeneToDiseaseAssociation(
id="uuid:5b06e86f-d768-4cd9-ac27-abe31e95ab1e",
subject=g.id,
object=d.id,
predicate="biolink:contributes_to",
knowledge_level="not_provided",
agent_type="not_provided",
has_count=0,
has_total=20,
)
ent = [g, d, a]

node_properties = [
"id",
"category",
"symbol",
"in_taxon",
"provided_by",
"source",
'has_biological_sequence',
'type',
'xref',
'description',
'in_taxon_label',
'synonym',
'iri',
'full_name',
'deprecated',
'has_attribute',
'name',
]
edge_properties = [
"id",
"subject",
"predicate",
"object",
"category" "qualifiers",
"has_count",
"has_total",
"publications",
"provided_by",
'subject_category',
'object_direction_qualifier',
'sex_qualifier',
'negated',
'has_percentage',
'aggregator_knowledge_source',
'has_evidence',
'qualified_predicate',
'qualifiers',
'object_category',
'timepoint',
'subject_label_closure',
'agent_type',
'has_attribute',
'category',
'original_predicate',
'iri',
'frequency_qualifier',
'type',
'subject_namespace',
'subject_closure',
'object_label_closure',
'object_namespace',
'original_object',
'subject_category_closure',
'name',
'has_quotient',
'knowledge_level',
'knowledge_source',
'description',
'subject_direction_qualifier',
'deprecated',
'original_subject',
'object_category_closure',
'object_aspect_qualifier',
'object_closure',
'primary_knowledge_source',
]

outdir = "output/tests"
outfile = "tsvwriter-node-and-edge"

t = TSVWriter(outdir, outfile, node_properties, edge_properties)
expected_message = "Extra fields found in row: ['qualifier', 'retrieval_source_ids', 'subject_aspect_qualifier']"
with pytest.raises(ValueError, match=re.escape(expected_message)):
t.write(ent)
44 changes: 44 additions & 0 deletions tests/unit/test_tsvwriter_node_only_extra_params.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import re

import pytest
from biolink_model.datamodel.pydanticmodel_v2 import Disease, Gene

from koza.io.writer.tsv_writer import TSVWriter


def test_tsv_writer():
"""
Writes a test tsv file
"""
g = Gene(id="HGNC:11603", name="TBX4")
d = Disease(id="MONDO:0005002", name="chronic obstructive pulmonary disease")

ent = [g, d]

node_properties = [
'id',
'category',
'symbol',
'in_taxon',
'provided_by',
'source',
'has_biological_sequence',
'iri',
'type',
'xref',
'description',
'synonym',
'in_taxon_label',
'deprecated',
'full_name',
]

outdir = "output/tests"
outfile = "tsvwriter-node-only"

t = TSVWriter(outdir, outfile, node_properties)

t = TSVWriter(outdir, outfile, node_properties)
expected_message = "Extra fields found in row: ['has_attribute', 'name']"
with pytest.raises(ValueError, match=re.escape(expected_message)):
t.write(ent)

0 comments on commit 979e25b

Please sign in to comment.