Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

EVA-3604 - contig alias get methods #58

Merged
merged 2 commits into from
Jul 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 46 additions & 2 deletions ebi_eva_common_pyutils/contig_alias/contig_alias.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,23 @@ class InternalServerError(Exception):
pass


CONTING_ALIAS_URL = 'https://www.ebi.ac.uk/eva/webservices/contig-alias'


# TODO add the get methods
class ContigAliasClient(AppLogger):
"""
Python client for interfacing with the contig alias service.
Authentication is required if using admin endpoints.
"""

def __init__(self, base_url, username=None, password=None):
self.base_url = base_url
def __init__(self, base_url=None, username=None, password=None, default_page_size=1000):
if base_url:
self.base_url = base_url
else:
self.base_url = os.environ.get('CONTING_ALIAS_URL') or CONTING_ALIAS_URL
# Used for get method
self.default_page_size=default_page_size
# Only required for admin endpoints
self.username = username
self.password = password
Expand Down Expand Up @@ -69,3 +77,39 @@ def delete_assembly(self, assembly):
raise InternalServerError
else:
self.error(f'Assembly accession {assembly} could not be deleted. Response: {response.text}')

@retry(tries=3, delay=2, backoff=1.2, jitter=(1, 3))
def _get_page_for_contig_alias_url(self, sub_url, page=0):
"""queries the contig alias to retrieve the page of the provided url"""
url = f'{self.base_url}/{sub_url}?page={page}&size={self.default_page_size}'
response = requests.get(url, headers={'accept': 'application/json'})
response.raise_for_status()
response_json = response.json()
return response_json

def _depaginate_iter(self, sub_url, entity_to_retrieve):
"""Generator that provides the contigs in the assembly requested."""
page = 0
response_json = self._get_page_for_contig_alias_url(sub_url, page=page)
for entity in response_json.get('_embedded', {}).get(entity_to_retrieve, []):
yield entity
while 'next' in response_json['_links']:
page += 1
response_json = self._get_page_for_contig_alias_url(sub_url, page=page)
for entity in response_json.get('_embedded', {}).get(entity_to_retrieve, []):
yield entity

def assembly_contig_iter(self, assembly_accession):
"""Generator that provides the contigs in the assembly requested."""
sub_url = f'v1/assemblies/{assembly_accession}/chromosomes'
return self._depaginate_iter(sub_url, 'chromosomeEntities')

def assembly(self, assembly_accession):
"""provides the description of the requested assembly."""
sub_url = f'v1/assemblies/{assembly_accession}'
response_json = self._get_page_for_contig_alias_url(sub_url)
return response_json.get('_embedded', {}).get('assemblyEntities', [])[0]

def contig_iter(self, insdc_accession):
sub_url = f'v1/chromosomes/genbank/{insdc_accession}'
return self._depaginate_iter(sub_url, 'chromosomeEntities')
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
name='ebi_eva_common_pyutils',
scripts=[os.path.join(os.path.dirname(__file__), 'ebi_eva_internal_pyutils', 'archive_directory.py')],
packages=find_packages(),
version='0.6.8.dev0',
version='0.6.8.dev1',
license='Apache',
description='EBI EVA - Common Python Utilities',
url='https://github.com/EBIVariation/eva-common-pyutils',
Expand Down
59 changes: 59 additions & 0 deletions tests/common/test_contig_alias.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import os
from collections.abc import Iterable
from unittest import TestCase

from ebi_eva_common_pyutils.contig_alias.contig_alias import ContigAliasClient


class TestContigAliasClient(TestCase):
resources = os.path.join(os.path.dirname(__file__), 'resources')

def setUp(self) -> None:
self.assembly_accession = 'GCA_000002945.2'
self.client = ContigAliasClient()

def test_assembly_contig_iter(self):
iterator = self.client.assembly_contig_iter(self.assembly_accession)
assert isinstance(iterator, Iterable)
# print(list(iterator))
assert [e.get('genbankSequenceName') for e in iterator] == ['I', 'II', 'III', 'MT']

def test_assembly(self):
assembly = self.client.assembly(self.assembly_accession)
assert assembly == {
'insdcAccession': 'GCA_000002945.2',
'name': 'ASM294v2',
'organism': 'Schizosaccharomyces pombe (fission yeast)',
'taxid': 4896,
'refseq': 'GCF_000002945.1',
'md5checksum': None,
'trunc512checksum': None,
'genbankRefseqIdentical': True
}

def test_contig_iter(self):
iterator = self.client.contig_iter('CU329670.1')
assert isinstance(iterator, Iterable)
contig = next(iterator)
assert contig == {
'genbankSequenceName': 'I',
'enaSequenceName': 'I',
'insdcAccession': 'CU329670.1',
'refseq': 'NC_003424.3',
'seqLength': 5579133,
'ucscName': None,
'md5checksum': 'a5bc80a74aae8fd7622290b11dbc8ab3',
'trunc512checksum': None,
'contigType': 'CHROMOSOME',
'assembly': {
'insdcAccession': 'GCA_000002945.2',
'name': 'ASM294v2',
'organism': 'Schizosaccharomyces pombe (fission yeast)',
'taxid': 4896,
'refseq': 'GCF_000002945.1',
'md5checksum': None,
'trunc512checksum': None,
'genbankRefseqIdentical': True
}
}

Loading