From 7cc7f374ad359ba6654e501c5ed92e8e18ef8f17 Mon Sep 17 00:00:00 2001 From: Reece Hart Date: Sat, 16 Sep 2023 21:41:03 -0700 Subject: [PATCH] reformatted with black and isort --- src/biocommons/seqrepo/cli.py | 113 +++++------------- src/biocommons/seqrepo/dataproxy.py | 4 +- src/biocommons/seqrepo/fastadir/fabgz.py | 20 +--- .../seqrepo/seqaliasdb/seqaliasdb.py | 12 +- src/biocommons/seqrepo/seqrepo.py | 32 ++--- tests/conftest.py | 4 +- tests/test_cli.py | 4 +- tests/test_seqaliasdb.py | 8 +- tests/test_seqrepo.py | 5 +- tests/test_utils.py | 12 +- 10 files changed, 56 insertions(+), 158 deletions(-) diff --git a/src/biocommons/seqrepo/cli.py b/src/biocommons/seqrepo/cli.py index d82a07b..b505aa8 100644 --- a/src/biocommons/seqrepo/cli.py +++ b/src/biocommons/seqrepo/cli.py @@ -44,9 +44,7 @@ instance_name_new_re = re.compile( r"^20[12]\d-\d\d-\d\d$" ) # smells like a new datestamp, 2017-01-17 -instance_name_old_re = re.compile( - r"^20[12]1\d\d\d\d\d$" -) # smells like an old datestamp, 20170117 +instance_name_old_re = re.compile(r"^20[12]1\d\d\d\d\d$") # smells like an old datestamp, 20170117 instance_name_re = re.compile( r"^20[12]\d-?\d\d-?\d\d$" ) # smells like a datestamp, 20170117 or 2017-01-17 @@ -55,9 +53,7 @@ def _get_remote_instances(opts): - line_re = re.compile( - r"d[-rwx]{9}\s+[\d,]+ \d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2} (.+)" - ) + line_re = re.compile(r"d[-rwx]{9}\s+[\d,]+ \d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2} (.+)") rsync_cmd = [ opts.rsync_exe, "--no-motd", @@ -93,18 +89,14 @@ def parse_arguments(): + ". See https://github.com/biocommons/biocommons.seqrepo for more information", ) top_p.add_argument("--dry-run", "-n", default=False, action="store_true") - top_p.add_argument( - "--remote-host", default="dl.biocommons.org", help="rsync server host" - ) + top_p.add_argument("--remote-host", default="dl.biocommons.org", help="rsync server host") top_p.add_argument( "--root-directory", "-r", default=SEQREPO_ROOT_DIR, help="seqrepo root directory (SEQREPO_ROOT_DIR)", ) - top_p.add_argument( - "--rsync-exe", default="/usr/bin/rsync", help="path to rsync executable" - ) + top_p.add_argument("--rsync-exe", default="/usr/bin/rsync", help="path to rsync executable") top_p.add_argument( "--verbose", "-v", @@ -152,9 +144,7 @@ def parse_arguments(): ap = subparsers.add_parser("export", help="export sequences") ap.set_defaults(func=export) ap.add_argument("ALIASES", nargs="*", help="specific aliases to export") - ap.add_argument( - "--instance-name", "-i", default=DEFAULT_INSTANCE_NAME_RO, help="instance name" - ) + ap.add_argument("--instance-name", "-i", default=DEFAULT_INSTANCE_NAME_RO, help="instance name") ap.add_argument( "--namespace", "-n", @@ -164,9 +154,7 @@ def parse_arguments(): # export aliases ap = subparsers.add_parser("export-aliases", help="export aliases") ap.set_defaults(func=export_aliases) - ap.add_argument( - "--instance-name", "-i", default=DEFAULT_INSTANCE_NAME_RO, help="instance name" - ) + ap.add_argument("--instance-name", "-i", default=DEFAULT_INSTANCE_NAME_RO, help="instance name") ap.add_argument( "--namespace", "-n", @@ -208,15 +196,11 @@ def parse_arguments(): ) # list-local-instances - ap = subparsers.add_parser( - "list-local-instances", help="list local seqrepo instances" - ) + ap = subparsers.add_parser("list-local-instances", help="list local seqrepo instances") ap.set_defaults(func=list_local_instances) # list-remote-instances - ap = subparsers.add_parser( - "list-remote-instances", help="list remote seqrepo instances" - ) + ap = subparsers.add_parser("list-remote-instances", help="list remote seqrepo instances") ap.set_defaults(func=list_remote_instances) # load @@ -241,9 +225,7 @@ def parse_arguments(): ) # pull - ap = subparsers.add_parser( - "pull", help="pull incremental update from seqrepo mirror" - ) + ap = subparsers.add_parser("pull", help="pull incremental update from seqrepo mirror") ap.set_defaults(func=pull) ap.add_argument("--instance-name", "-i", default=None, help="instance name") ap.add_argument( @@ -257,14 +239,10 @@ def parse_arguments(): # show-status ap = subparsers.add_parser("show-status", help="show seqrepo status") ap.set_defaults(func=show_status) - ap.add_argument( - "--instance-name", "-i", default=DEFAULT_INSTANCE_NAME_RO, help="instance name" - ) + ap.add_argument("--instance-name", "-i", default=DEFAULT_INSTANCE_NAME_RO, help="instance name") # snapshot - ap = subparsers.add_parser( - "snapshot", help="create a new read-only seqrepo snapshot" - ) + ap = subparsers.add_parser("snapshot", help="create a new read-only seqrepo snapshot") ap.set_defaults(func=snapshot) ap.add_argument( "--instance-name", @@ -284,9 +262,7 @@ def parse_arguments(): "start-shell", help="start interactive shell with initialized seqrepo" ) ap.set_defaults(func=start_shell) - ap.add_argument( - "--instance-name", "-i", default=DEFAULT_INSTANCE_NAME_RO, help="instance name" - ) + ap.add_argument("--instance-name", "-i", default=DEFAULT_INSTANCE_NAME_RO, help="instance name") # upgrade ap = subparsers.add_parser("upgrade", help="upgrade seqrepo database and directory") @@ -299,9 +275,7 @@ def parse_arguments(): ) # update digests - ap = subparsers.add_parser( - "update-digests", help="update computed digests in place" - ) + ap = subparsers.add_parser("update-digests", help="update computed digests in place") ap.set_defaults(func=update_digests) ap.add_argument( "--instance-name", @@ -369,9 +343,7 @@ def add_assembly_names(opts): sequences = assemblies[assy_name]["sequences"] eq_sequences = [s for s in sequences if s["relationship"] in ("=", "<>")] if not eq_sequences: - _logger.info( - "No '=' sequences to load for {an}; skipping".format(an=assy_name) - ) + _logger.info("No '=' sequences to load for {an}; skipping".format(an=assy_name)) continue # all assembled-molecules (1..22, X, Y, MT) have ncbi aliases in seqrepo @@ -389,23 +361,17 @@ def add_assembly_names(opts): ) ) if not opts.partial_load: - _logger.warning( - "Skipping {an} (-p to enable partial loading)".format(an=assy_name) - ) + _logger.warning("Skipping {an} (-p to enable partial loading)".format(an=assy_name)) continue eq_sequences = [es for es in eq_sequences if es["refseq_ac"] in ncbi_alias_map] _logger.info( - "Loading {n} new accessions for assembly {an}".format( - an=assy_name, n=len(eq_sequences) - ) + "Loading {n} new accessions for assembly {an}".format(an=assy_name, n=len(eq_sequences)) ) for s in eq_sequences: seq_id = ncbi_alias_map[s["refseq_ac"]] - aliases = [ - {"namespace": assy_name, "alias": a} for a in [s["name"]] + s["aliases"] - ] + aliases = [{"namespace": assy_name, "alias": a} for a in [s["name"]] + s["aliases"]] for alias in aliases: sr.aliases.store_alias(seq_id=seq_id, **alias) _logger.debug( @@ -463,9 +429,7 @@ def _rec_iterator(): for srec, arecs in _rec_iterator(): nsad = _convert_alias_records_to_ns_dict(arecs) aliases = [ - "{ns}:{a}".format(ns=ns, a=a) - for ns, aliases in sorted(nsad.items()) - for a in aliases + "{ns}:{a}".format(ns=ns, a=a) for ns, aliases in sorted(nsad.items()) for a in aliases ] print(">" + " ".join(aliases)) for l in _wrap_lines(srec["seq"], 100): @@ -476,9 +440,7 @@ def export_aliases(opts): seqrepo_dir = os.path.join(opts.root_directory, opts.instance_name) sr = SeqRepo(seqrepo_dir) alias_iterator = sr.aliases.find_aliases(translate_ncbi_namespace=True) - grouped_alias_iterator = itertools.groupby( - alias_iterator, key=lambda arec: (arec["seq_id"]) - ) + grouped_alias_iterator = itertools.groupby(alias_iterator, key=lambda arec: (arec["seq_id"])) for _, arecs in grouped_alias_iterator: if opts.namespace: if not any(arec for arec in arecs if arec["namespace"] == opts.namespace): @@ -514,9 +476,7 @@ def fetch_load(opts): def init(opts): seqrepo_dir = os.path.join(opts.root_directory, opts.instance_name) if os.path.exists(seqrepo_dir) and len(os.listdir(seqrepo_dir)) > 0: - raise IOError( - "{seqrepo_dir} exists and is not empty".format(seqrepo_dir=seqrepo_dir) - ) + raise IOError("{seqrepo_dir} exists and is not empty".format(seqrepo_dir=seqrepo_dir)) sr = SeqRepo(seqrepo_dir, writeable=True) # flake8: noqa @@ -559,9 +519,7 @@ def load(opts): else: fh = io.open(fn, mode="rt", encoding="ascii") _logger.info("Opened " + fn) - seq_bar = tqdm.tqdm( - FastaIter(fh), unit=" seqs", disable=disable_bar, leave=False - ) + seq_bar = tqdm.tqdm(FastaIter(fh), unit=" seqs", disable=disable_bar, leave=False) for defline, seq in seq_bar: n_seqs_seen += 1 seq_bar.set_description( @@ -582,9 +540,7 @@ def pull(opts): if opts.instance_name: instance_name = opts.instance_name if instance_name not in remote_instances: - raise KeyError( - "{}: not in list of remote instance names".format(instance_name) - ) + raise KeyError("{}: not in list of remote instance names".format(instance_name)) else: instance_name = remote_instances[-1] _logger.info("most recent seqrepo instance is " + instance_name) @@ -600,11 +556,7 @@ def pull(opts): cmd = [opts.rsync_exe, "-aHP", "--no-motd"] if local_instances: latest_local_instance = local_instances[-1] - cmd += [ - "--link-dest=" - + os.path.join(opts.root_directory, latest_local_instance) - + "/" - ] + cmd += ["--link-dest=" + os.path.join(opts.root_directory, latest_local_instance) + "/"] cmd += ["{h}::seqrepo/{i}/".format(h=opts.remote_host, i=instance_name), tmp_dir] _logger.debug("Executing: " + " ".join(cmd)) @@ -627,11 +579,7 @@ def show_status(opts): sr = SeqRepo(seqrepo_dir) print("seqrepo {version}".format(version=__version__)) - print( - "instance directory: {sr._root_dir}, {ts:.1f} GB".format( - sr=sr, ts=tot_size / 1e9 - ) - ) + print("instance directory: {sr._root_dir}, {ts:.1f} GB".format(sr=sr, ts=tot_size / 1e9)) print( "backends: fastadir (schema {fd_v}), seqaliasdb (schema {sa_v}) ".format( fd_v=sr.sequences.schema_version(), sa_v=sr.aliases.schema_version() @@ -667,8 +615,7 @@ def snapshot(opts): if os.path.commonpath([src_dir, dst_dir]).startswith(src_dir): raise RuntimeError( - "Cannot nest seqrepo directories " - "({} is within {})".format(dst_dir, src_dir) + "Cannot nest seqrepo directories " "({} is within {})".format(dst_dir, src_dir) ) if os.path.exists(dst_dir): @@ -763,9 +710,7 @@ def update_latest(opts, mri=None): if not mri: instances = _get_local_instances(opts) if not instances: - _logger.error( - "No seqrepo instances in {opts.root_directory}".format(opts=opts) - ) + _logger.error("No seqrepo instances in {opts.root_directory}".format(opts=opts)) return mri = instances[-1] dst = os.path.join(opts.root_directory, "latest") @@ -781,11 +726,7 @@ def main(): opts = parse_arguments() verbose_log_level = ( - logging.WARN - if opts.verbose == 0 - else logging.INFO - if opts.verbose == 1 - else logging.DEBUG + logging.WARN if opts.verbose == 0 else logging.INFO if opts.verbose == 1 else logging.DEBUG ) logging.basicConfig(level=verbose_log_level) opts.func(opts) diff --git a/src/biocommons/seqrepo/dataproxy.py b/src/biocommons/seqrepo/dataproxy.py index bb5e8cd..5222716 100644 --- a/src/biocommons/seqrepo/dataproxy.py +++ b/src/biocommons/seqrepo/dataproxy.py @@ -214,9 +214,7 @@ def create_dataproxy(uri: str = None) -> _DataProxy: scheme = parsed_uri.scheme if "+" not in scheme: - raise ValueError( - "create_dataproxy scheme must include provider (e.g., `seqrepo+http:...`)" - ) + raise ValueError("create_dataproxy scheme must include provider (e.g., `seqrepo+http:...`)") provider, proto = scheme.split("+") diff --git a/src/biocommons/seqrepo/fastadir/fabgz.py b/src/biocommons/seqrepo/fastadir/fabgz.py index 44b3145..7f87c2e 100644 --- a/src/biocommons/seqrepo/fastadir/fabgz.py +++ b/src/biocommons/seqrepo/fastadir/fabgz.py @@ -36,9 +36,7 @@ def _get_bgzip_version(exe): ) output = p.communicate() version_line = output[0].splitlines()[1] - version = re.match( - r"(?:Version:|bgzip \(htslib\))\s+(\d+\.\d+(\.\d+)?)", version_line - ).group(1) + version = re.match(r"(?:Version:|bgzip \(htslib\))\s+(\d+\.\d+(\.\d+)?)", version_line).group(1) return version @@ -46,16 +44,12 @@ def _find_bgzip(): """return path to bgzip if found and meets version requirements, else exception""" missing_file_exception = OSError if six.PY2 else FileNotFoundError min_bgzip_version = ".".join(map(str, min_bgzip_version_info)) - exe = os.environ.get( - "SEQREPO_BGZIP_PATH", shutil.which("bgzip") or "/usr/bin/bgzip" - ) + exe = os.environ.get("SEQREPO_BGZIP_PATH", shutil.which("bgzip") or "/usr/bin/bgzip") try: bgzip_version = _get_bgzip_version(exe) except AttributeError: - raise RuntimeError( - "Didn't find version string in bgzip executable ({exe})".format(exe=exe) - ) + raise RuntimeError("Didn't find version string in bgzip executable ({exe})".format(exe=exe)) except missing_file_exception: raise RuntimeError( "{exe} doesn't exist; you need to install htslib and tabix (See https://github.com/biocommons/biocommons.seqrepo#requirements)".format( @@ -151,15 +145,11 @@ def close(self): os.chmod(self.filename + ".fai", stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH) os.chmod(self.filename + ".gzi", stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH) - _logger.info( - "{} written; added {} sequences".format(self.filename, len(self._added)) - ) + _logger.info("{} written; added {} sequences".format(self.filename, len(self._added))) def __del__(self): if self._fh is not None: _logger.error( - "FabgzWriter({}) was not explicitly closed; data may be lost".format( - self.filename - ) + "FabgzWriter({}) was not explicitly closed; data may be lost".format(self.filename) ) self.close() diff --git a/src/biocommons/seqrepo/seqaliasdb/seqaliasdb.py b/src/biocommons/seqrepo/seqaliasdb/seqaliasdb.py index 8a5123e..ae1ee8e 100644 --- a/src/biocommons/seqrepo/seqaliasdb/seqaliasdb.py +++ b/src/biocommons/seqrepo/seqaliasdb/seqaliasdb.py @@ -55,9 +55,7 @@ def __init__( if schema_version != expected_schema_version: # pragma: no cover raise RuntimeError( "Upgrade required: Database schema" - "version is {} and code expects {}".format( - schema_version, expected_schema_version - ) + "version is {} and code expects {}".format(schema_version, expected_schema_version) ) # ############################################################################ @@ -88,9 +86,7 @@ def fetch_aliases(self, seq_id, current_only=True, translate_ncbi_namespace=None _logger.warning( "translate_ncbi_namespace is obsolete; translation is now automatic; this flag will be removed" ) - return [ - dict(r) for r in self.find_aliases(seq_id=seq_id, current_only=current_only) - ] + return [dict(r) for r in self.find_aliases(seq_id=seq_id, current_only=current_only)] def find_aliases( self, @@ -207,9 +203,7 @@ def store_alias(self, seq_id, namespace, alias): return current_rec["seqalias_id"] # otherwise, we're reassigning; deprecate old record, then retry - _logger.debug( - log_pfx + ": collision; deprecating {s1}".format(s1=current_rec["seq_id"]) - ) + _logger.debug(log_pfx + ": collision; deprecating {s1}".format(s1=current_rec["seq_id"])) cursor.execute( "update seqalias set is_current = 0 where seqalias_id = ?", [current_rec["seqalias_id"]], diff --git a/src/biocommons/seqrepo/seqrepo.py b/src/biocommons/seqrepo/seqrepo.py index 62fbef6..a80ed9f 100644 --- a/src/biocommons/seqrepo/seqrepo.py +++ b/src/biocommons/seqrepo/seqrepo.py @@ -162,9 +162,7 @@ def __iter__(self): yield (srec, arecs) def __str__(self): - return "SeqRepo(root_dir={self._root_dir}, writeable={self._writeable})".format( - self=self - ) + return "SeqRepo(root_dir={self._root_dir}, writeable={self._writeable})".format(self=self) def commit(self): self.sequences.commit() @@ -219,16 +217,12 @@ def store(self, seq, nsaliases): # add sequence if not present n_seqs_added = n_aliases_added = 0 - msg = ( - "sh{nsa_sep}{seq_id:.10s}... ({l} residues; {na} aliases {aliases})".format( - seq_id=seq_id, - l=len(seq), - na=len(nsaliases), - nsa_sep=nsa_sep, - aliases=", ".join( - "{nsa[namespace]}:{nsa[alias]}".format(nsa=nsa) for nsa in nsaliases - ), - ) + msg = "sh{nsa_sep}{seq_id:.10s}... ({l} residues; {na} aliases {aliases})".format( + seq_id=seq_id, + l=len(seq), + na=len(nsaliases), + nsa_sep=nsa_sep, + aliases=", ".join("{nsa[namespace]}:{nsa[alias]}".format(nsa=nsa) for nsa in nsaliases), ) if seq_id not in self.sequences: _logger.info("Storing " + msg) @@ -246,17 +240,13 @@ def store(self, seq, nsaliases): # add/update external aliases for new and existing sequences # updating is optimized to load only new tuples existing_aliases = self.aliases.find_aliases(seq_id=seq_id) - ea_tuples = [ - (r["seq_id"], r["namespace"], r["alias"]) for r in existing_aliases - ] + ea_tuples = [(r["seq_id"], r["namespace"], r["alias"]) for r in existing_aliases] new_tuples = [(seq_id, r["namespace"], r["alias"]) for r in nsaliases] upd_tuples = set(new_tuples) - set(ea_tuples) if upd_tuples: _logger.info("{} new aliases for {}".format(len(upd_tuples), msg)) for _, namespace, alias in upd_tuples: - self.aliases.store_alias( - seq_id=seq_id, namespace=namespace, alias=alias - ) + self.aliases.store_alias(seq_id=seq_id, namespace=namespace, alias=alias) self._pending_aliases += len(upd_tuples) n_aliases_added += len(upd_tuples) if ( @@ -332,9 +322,7 @@ def _get_unique_seqid(self, alias, namespace): raise KeyError("Alias {} (namespace: {})".format(alias, namespace)) if len(seq_ids) > 1: # This should only happen when namespace is None - raise KeyError( - "Alias {} (namespace: {}): not unique".format(alias, namespace) - ) + raise KeyError("Alias {} (namespace: {}): not unique".format(alias, namespace)) return seq_ids.pop() def _update_digest_aliases(self, seq_id, seq): diff --git a/tests/conftest.py b/tests/conftest.py index ca4a867..293f0bb 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -8,9 +8,7 @@ @pytest.fixture(scope="session") def dataproxy(): - sr = SeqRepo( - root_dir=os.environ.get("SEQREPO_ROOT_DIR", "/usr/local/share/seqrepo/latest") - ) + sr = SeqRepo(root_dir=os.environ.get("SEQREPO_ROOT_DIR", "/usr/local/share/seqrepo/latest")) return SeqRepoDataProxy(sr) diff --git a/tests/test_cli.py b/tests/test_cli.py index 07898b9..6250f1a 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -21,9 +21,7 @@ class MockOpts(object): test_data_dir = os.path.join(test_dir, "data") opts = MockOpts() - opts.root_directory = os.path.join( - tempfile.mkdtemp(prefix="seqrepo_pytest_"), "seqrepo" - ) + opts.root_directory = os.path.join(tempfile.mkdtemp(prefix="seqrepo_pytest_"), "seqrepo") opts.fasta_files = [os.path.join(test_data_dir, "sequences.fa.gz")] opts.namespace = "test" opts.instance_name = "test" diff --git a/tests/test_seqaliasdb.py b/tests/test_seqaliasdb.py index dbe49c0..c9e5f19 100644 --- a/tests/test_seqaliasdb.py +++ b/tests/test_seqaliasdb.py @@ -33,14 +33,10 @@ def test_seqinfo(): # A:1 -> q2 (reassign) aid = db.store_alias("q2", "A", "1") - assert ( - aid == 4 - ), "should have created a new alias_id on reassignment of new sequence" + assert aid == 4, "should have created a new alias_id on reassignment of new sequence" alias_keys = "seqalias_id seq_id namespace alias is_current".split() - aliases = [ - {k: r[k] for k in alias_keys} for r in db.find_aliases(current_only=False) - ] + aliases = [{k: r[k] for k in alias_keys} for r in db.find_aliases(current_only=False)] aliases.sort( key=lambda r: ( r["seqalias_id"], diff --git a/tests/test_seqrepo.py b/tests/test_seqrepo.py index 3f4b744..8523862 100644 --- a/tests/test_seqrepo.py +++ b/tests/test_seqrepo.py @@ -64,10 +64,7 @@ def test_digests(seqrepo): assert seqrepo.fetch_uri("fr:coin") == "ASINACORNER" assert seqrepo.fetch_uri("MD5:ea81b52627e387fc6edd8b9412cd3a99") == "ASINACORNER" assert seqrepo.fetch_uri("SEGUID:aMQF/cdHkAayLkVYs8XV2u+Hy34") == "ASINACORNER" - assert ( - seqrepo.fetch_uri("SHA1:68c405fdc7479006b22e4558b3c5d5daef87cb7e") - == "ASINACORNER" - ) + assert seqrepo.fetch_uri("SHA1:68c405fdc7479006b22e4558b3c5d5daef87cb7e") == "ASINACORNER" assert seqrepo.fetch_uri("VMC:GS_LDz34B6fA_fLxFoc2agLrXQRYuupOGGM") == "ASINACORNER" diff --git a/tests/test_utils.py b/tests/test_utils.py index b43884c..48221ca 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -19,15 +19,13 @@ def test_parse_defline(): """ - defline = ">NG_007107.2 Homo sapiens methyl-CpG binding protein 2 (MECP2), RefSeqGene on chromosome X" - assert parse_defline(defline, "refseq") == [ - {"namespace": "refseq", "alias": "NG_007107.2"} - ] + defline = ( + ">NG_007107.2 Homo sapiens methyl-CpG binding protein 2 (MECP2), RefSeqGene on chromosome X" + ) + assert parse_defline(defline, "refseq") == [{"namespace": "refseq", "alias": "NG_007107.2"}] defline = ">gi|568815364|ref|NT_077402.3| Homo sapiens chromosome 1 genomic scaffold, GRCh38.p7 Primary Assembly HSCHR1_CTG1" - assert parse_defline(defline, "refseq") == [ - {"namespace": "refseq", "alias": "NT_077402.3"} - ] + assert parse_defline(defline, "refseq") == [{"namespace": "refseq", "alias": "NT_077402.3"}] def test_validate_aliases():