diff --git a/easyliftover/lifters/abstract.py b/easyliftover/lifters/abstract.py index ce5f134..5b240c5 100644 --- a/easyliftover/lifters/abstract.py +++ b/easyliftover/lifters/abstract.py @@ -11,15 +11,17 @@ def __init__(self, from_ga: str, to_ga: str): """Initializes the lifter.""" self.lo = LiftOver(from_ga, to_ga) - def convert_coordinate(self, chromosome: str, position: int) -> "Tuple[str, int] | None": + def convert_coordinate( + self, chromosome: str, position: int + ) -> "Tuple[str, int] | None": lifted = self.lo.convert_coordinate(chromosome, position) - + if lifted is None: return None - + if len(lifted) == 0: return None - + return lifted[0][0], lifted[0][1] def convert_region( @@ -35,13 +37,15 @@ def convert_region( if lifted_start is None or lifted_end is None: print(f"Could not lift {chromosome}:{start}-{end}") return None - + if lifted_start[0] != lifted_end[0]: print(f"Chromosome changed from {chromosome} to {lifted_start[0]}") return None - + if lifted_start[1] >= lifted_end[1]: - print(f"Start position {lifted_start[1]} is larger than end position {lifted_end[1]}") + print( + f"Start position {lifted_start[1]} is larger than end position {lifted_end[1]}" + ) return None return lifted_start[0], lifted_start[1], lifted_end[1] @@ -55,26 +59,33 @@ def lift_path(self, path: str) -> str: def lift_url(self, url: str) -> str: """Lifts a URL.""" raise NotImplementedError - + + @staticmethod + @abstractmethod + def supported_formats() -> list: + """Returns the supported formats.""" + raise NotImplementedError + + class AbstractTextLifter(AbstractLifter): """Abstract class for lifters of text-based files.""" - + @abstractmethod def lift_content(self, content: str) -> str: """Lifts the content of a file.""" raise NotImplementedError - + def lift_path(self, path: str) -> str: """Lifts a path.""" with open(path) as f: content = f.read() - + return self.lift_content(content) - + def lift_url(self, url: str) -> str: """Lifts a URL.""" response = requests.get(url) - + return self.lift_content(response.text) diff --git a/easyliftover/lifters/bed.py b/easyliftover/lifters/bed.py index 861f53a..8e4aad7 100644 --- a/easyliftover/lifters/bed.py +++ b/easyliftover/lifters/bed.py @@ -24,3 +24,8 @@ def __lift_row__(self, row: str) -> "str | None": ) else: return None + + @staticmethod + def supported_formats() -> list: + """Returns the supported formats.""" + return ["bed"] diff --git a/easyliftover/lifters/bedgraph.py b/easyliftover/lifters/bedgraph.py index 78f8e07..fa4baf1 100644 --- a/easyliftover/lifters/bedgraph.py +++ b/easyliftover/lifters/bedgraph.py @@ -24,3 +24,8 @@ def __lift_row__(self, row: str) -> "str | None": ) else: return None + + @staticmethod + def supported_formats() -> list: + """Returns the supported formats.""" + return ["bedgraph", "bg"] diff --git a/easyliftover/lifters/bigwig.py b/easyliftover/lifters/bigwig.py index 686d6d4..632977d 100644 --- a/easyliftover/lifters/bigwig.py +++ b/easyliftover/lifters/bigwig.py @@ -46,3 +46,8 @@ def lift_bw(self, bw) -> str: wig += f"{lifted_start}\t{rounded_value}\n" return wig + + @staticmethod + def supported_formats() -> list: + """Returns the supported formats.""" + return ["bigwig", "bw"] diff --git a/easyliftover/lifters/gff.py b/easyliftover/lifters/gff.py index 6ba8abe..58469f2 100644 --- a/easyliftover/lifters/gff.py +++ b/easyliftover/lifters/gff.py @@ -24,3 +24,8 @@ def __lift_row__(self, row: str) -> "str | None": else: return None + + @staticmethod + def supported_formats() -> list: + """Returns the supported formats.""" + return ["gff", "gff3", "gtf"] diff --git a/easyliftover/lifters/vcf.py b/easyliftover/lifters/vcf.py index 878b4b5..786007a 100644 --- a/easyliftover/lifters/vcf.py +++ b/easyliftover/lifters/vcf.py @@ -23,3 +23,8 @@ def __lift_row__(self, row: str) -> "str | None": ) else: return None + + @staticmethod + def supported_formats() -> list: + """Returns the supported formats.""" + return ["vcf"] diff --git a/easyliftover/lifters/wig.py b/easyliftover/lifters/wig.py index 2b9713e..dab06b4 100644 --- a/easyliftover/lifters/wig.py +++ b/easyliftover/lifters/wig.py @@ -60,3 +60,8 @@ def lift_content(self, content: str) -> str: result.append(line) return "\n".join(result) + + @staticmethod + def supported_formats() -> list: + """Returns the supported formats.""" + return ["wig"] diff --git a/easyliftover/liftover.py b/easyliftover/liftover.py index fb8a2ec..5a5cb71 100644 --- a/easyliftover/liftover.py +++ b/easyliftover/liftover.py @@ -1,37 +1,46 @@ -from .lifters import BedLifter, GffLifter, WigLifter, AbstractLifter, BigWigLifter, BedGraphLifter, VcfLifter +from .lifters import ( + BedLifter, + GffLifter, + WigLifter, + AbstractLifter, + BigWigLifter, + BedGraphLifter, + VcfLifter, +) -def get_lifter(fromGenome: str, toGenome: str, source: str, file_type: "str | None" = None) -> AbstractLifter: +lifters = [BedLifter, GffLifter, WigLifter, BigWigLifter, BedGraphLifter, VcfLifter] + +def get_lifter( + fromGenome: str, toGenome: str, source: str, file_type: "str | None" = None +) -> AbstractLifter: def get_class(c_type): - if c_type == "bed": - return BedLifter - elif c_type in ["gff", "gff3", "gtf"]: - return GffLifter - elif c_type == "wig": - return WigLifter - elif c_type == "bw" or c_type == "bigwig": - return BigWigLifter - elif c_type == "bedgraph" or c_type == "bg": - return BedGraphLifter - elif c_type == "vcf": - return VcfLifter - else: - raise Exception("Unsupported file type") - + for lifter in lifters: + if c_type in lifter.supported_formats(): + return lifter + raise Exception("Unsupported file type: " + c_type) + def __get_type(path: str) -> str: return path.split(".")[-1] - + chosen_type = file_type if file_type is not None else __get_type(source) - - return get_class(chosen_type.lower())(fromGenome, toGenome) + + clazz = get_class(chosen_type.lower()) + + if clazz is None: + raise Exception("Unsupported file type") + + return clazz(fromGenome, toGenome) + def liftover_url( fromGenome: str, toGenome: str, url: str, file_type: "str | None" = None ) -> str: - + lifter = get_lifter(fromGenome, toGenome, url, file_type) - + return lifter.lift_url(url) + def liftover_path( fromGenome: str, toGenome: str, path: str, file_type: "str | None" = None ) -> str: diff --git a/easyliftover/types.json b/easyliftover/types.json deleted file mode 100644 index b8abaaf..0000000 --- a/easyliftover/types.json +++ /dev/null @@ -1,44 +0,0 @@ -[ - { - "name": "Annotation", - "Description": "Non-quantiative annotation file", - "id": "annotation", - "formats": [ - {"name": "bed", "can_be_lifted": true}, - {"name": "gff", "can_be_lifted": true}, - {"name": "gff3", "can_be_lifted": true}, - {"name": "gtf", "can_be_lifted": true}, - {"name": "bedpe", "can_be_lifted": false}, - {"name": "Other", "can_be_lifted": false} - ] - }, - { - "name": "Wiggle", - "Description": "Quantitative genomic data", - "id": "wig", - "formats": [ - {"name": "wig", "can_be_lifted": true}, - {"name": "bigWig", "can_be_lifted": true}, - {"name": "bedGraph", "can_be_lifted": true}, - {"name": "Other", "can_be_lifted": false} - ] - }, - { - "name": "Alignment", - "Description": "Alignment data", - "id": "alignment", - "formats": [ - {"name": "bam", "can_be_lifted": false}, - {"name": "cram", "can_be_lifted": false}, - {"name": "Other", "can_be_lifted": false} - ] - }, - { - "name": "Variant", - "Description": "Variant data", - "id": "variant", - "formats": [ - {"name": "vcf", "can_be_lifted": true} - ] - } -] \ No newline at end of file diff --git a/easyliftover/types.py b/easyliftover/types.py index f164fe3..1ac4083 100644 --- a/easyliftover/types.py +++ b/easyliftover/types.py @@ -1,11 +1,4 @@ -import json -import os +from .liftover import lifters def get_file_types(): - file_name = "types.json" - - script_dir = os.path.dirname(__file__) - - json_object = json.load(open(os.path.join(script_dir, file_name), "r")) - - return json_object \ No newline at end of file + return [format for lifter in lifters for format in lifter.supported_formats()] diff --git a/test/test_lifters.py b/test/test_lifters.py index 12c9dbf..1a7f34b 100644 --- a/test/test_lifters.py +++ b/test/test_lifters.py @@ -1,32 +1,47 @@ from easyliftover import * -def __test__(clazz, assembly1: str, assembly2: str, path_assembly1: str, path_assembly2: str): + +def __test__( + clazz, assembly1: str, assembly2: str, path_assembly1: str, path_assembly2: str +): content_assembly1 = open(path_assembly1).read() content_assembly2 = open(path_assembly2).read() - + lifter: AbstractLifter = clazz(assembly1, assembly2) reversed_lifter: AbstractLifter = clazz(assembly2, assembly1) assert lifter.lift_path(path_assembly1) == content_assembly2 assert reversed_lifter.lift_path(path_assembly2) == content_assembly1 + def test_bed(): - __test__(BedLifter, 'hg19', 'hg38', 'test/data/hg19.bed', 'test/data/hg38.bed') - + __test__(BedLifter, "hg19", "hg38", "test/data/hg19.bed", "test/data/hg38.bed") + + def test_vcf(): - __test__(VcfLifter, 'hg19', 'hg38', 'test/data/hg19.vcf', 'test/data/hg38.vcf') - + __test__(VcfLifter, "hg19", "hg38", "test/data/hg19.vcf", "test/data/hg38.vcf") + + def test_gff(): - __test__(GffLifter, 'hg19', 'hg38', 'test/data/hg19.gff', 'test/data/hg38.gff') - + __test__(GffLifter, "hg19", "hg38", "test/data/hg19.gff", "test/data/hg38.gff") + + def test_wig(): - __test__(WigLifter, 'hg19', 'hg38', 'test/data/hg19.wig', 'test/data/hg38.wig') - + __test__(WigLifter, "hg19", "hg38", "test/data/hg19.wig", "test/data/hg38.wig") + + def test_bedgraph(): - __test__(BedGraphLifter, 'hg19', 'hg38', 'test/data/hg19.bedgraph', 'test/data/hg38.bedgraph') + __test__( + BedGraphLifter, + "hg19", + "hg38", + "test/data/hg19.bedgraph", + "test/data/hg38.bedgraph", + ) + -#def test_bigwig(): +# def test_bigwig(): # lifter = BigWigLifter('hg38', 'hg19') -# +# # assert lifter.lift_url('https://github.com/deeptools/pyBigWig/raw/master/pyBigWigTest/test.bw') == "" -# assert lifter.lift_path('test/data/hg38.bw') == "" \ No newline at end of file +# assert lifter.lift_path('test/data/hg38.bw') == "" diff --git a/test/test_types.py b/test/test_types.py index d7fa6a0..06d74c0 100644 --- a/test/test_types.py +++ b/test/test_types.py @@ -1,6 +1,9 @@ from easyliftover.types import get_file_types + def test_get_file_types(): types = get_file_types() - - assert len(types) > 0 \ No newline at end of file + + expected = ["bed", "gff"] + + assert all([t in types for t in expected])