diff --git a/rescript/bv_brc.py b/rescript/bv_brc.py
index 2bad090..c8d5ecb 100644
--- a/rescript/bv_brc.py
+++ b/rescript/bv_brc.py
@@ -8,137 +8,257 @@
 from collections import OrderedDict
 from io import StringIO
 import os
+from typing import Union
+from urllib.parse import quote
+
+import numpy as np
 import qiime2
 import pandas as pd
 import requests
-from q2_types.feature_data import TSVTaxonomyDirectoryFormat, TSVTaxonomyFormat
+from q2_types.feature_data import TSVTaxonomyFormat
 from q2_types.genome_data import (GenomeSequencesDirectoryFormat,
                                   GenesDirectoryFormat,
-                                  ProteinsDirectoryFormat)
+                                  ProteinsDirectoryFormat, LociDirectoryFormat)
 
 from rescript.ncbi import _allowed_ranks, _default_ranks
 import json
 
 
-def fetch_metadata_bv_brc(data_type: str, rql_query: str) -> qiime2.Metadata:
-    # Download data
-    response = download_data(
-        url=f"https://www.bv-brc.org/api/{data_type}/"
-            f"?{rql_query}&http_accept=text/tsv",
-    )
-
-    # Convert data to data frame
-    tsv_data = StringIO(response.text)
-    metadata = pd.read_csv(tsv_data, sep='\t')
+def get_bv_brc_metadata(
+        ids_metadata: Union[qiime2.NumericMetadataColumn,
+        qiime2.CategoricalMetadataColumn] = None,
+        data_type: str = None,
+        rql_query: str = None,
+        data_field: str = None,
+        ids: list = None,
+) -> qiime2.Metadata:
+    # Parameter validation and creation of RQL query
+    rql_query = parameter_validation(rql_query=rql_query,
+                                     ids=ids,
+                                     data_type=data_type,
+                                     data_field=data_field,
+                                     metadata=ids_metadata)
+
+    # Download metadata as df
+    metadata = download_data(data_type=data_type,
+                             query=rql_query,
+                             accept="text/tsv",
+                             )
+
+    # Set index of metadata df
     metadata.index.name = "id"
     metadata.index = metadata.index.astype(str)
 
-    # Return as qiime2 metadata
+    # Replace empty values and values consisting of spaces only with np.nan
+    metadata.replace(to_replace=r'^\s*$', value=np.nan, regex=True,
+                     inplace=True)
+    metadata.replace([None], np.nan, inplace=True)
+
     return qiime2.Metadata(metadata)
 
 
-def fetch_genomes_bv_brc(
+def get_bv_brc_genomes(
+        ids_metadata: Union[qiime2.NumericMetadataColumn,
+        qiime2.CategoricalMetadataColumn] = None,
         rql_query: str = None,
-        genome_ids: list = None,
+        data_field: str = None,
+        ids: list = None,
         ranks: list = None,
+        rank_propagation: bool = True,
 ) -> (GenomeSequencesDirectoryFormat, TSVTaxonomyFormat):
     # Parameter validation
-    rql_query = id_list_handling(rql_query=rql_query,
-                                 ids=genome_ids,
-                                 parameter_name="genome_ids",
-                                 data_field="genome_id"
-                                 )
-
-    # Define output formats
-    genomes = GenomeSequencesDirectoryFormat()
+    rql_query = parameter_validation(rql_query=rql_query,
+                                     ids=ids,
+                                     data_type="genome_sequence",
+                                     data_field=data_field,
+                                     metadata=ids_metadata
+                                     )
 
     # Get requests response for genome sequences
-    response_sequences = download_data(
-        url=f"https://www.bv-brc.org/api/genome_sequence/?{rql_query}",
-    )
-    # Convert sequences in response_sequences JSON to FASTA
-    json_to_fasta(json=response_sequences.json(), output_dir=str(genomes))
+    sequences = download_data(data_type="genome_sequence",
+                              query=rql_query,
+                              accept="application/json",
+                              )
 
-    taxonomy = get_taxonomy(bv_brc_response=response_sequences, ranks=ranks)
+    # Convert sequences in JSON to FASTA file
+    genomes = create_genome_fasta(genome_sequences=sequences)
+
+    # Get taxonomy for sequences
+    taxonomy = get_taxonomy(response_sequences=sequences,
+                            ranks=ranks,
+                            rank_propagation=rank_propagation,
+                            accession_name="accession")
 
     return genomes, taxonomy
 
 
-def fetch_genome_features_bv_brc(
+def get_bv_brc_genome_features(
+        ids_metadata: Union[qiime2.NumericMetadataColumn,
+        qiime2.CategoricalMetadataColumn] = None,
         rql_query: str = None,
-        taxon_ids: list = None,
+        data_field: str = None,
+        ids: list = None,
         ranks: list = None,
+        rank_propagation: bool = True,
 ) -> (
         GenesDirectoryFormat, ProteinsDirectoryFormat,
-        TSVTaxonomyFormat):
+        TSVTaxonomyFormat, LociDirectoryFormat):
     # Parameter validation
-    rql_query = id_list_handling(rql_query=rql_query,
-                                 ids=taxon_ids,
-                                 parameter_name="feature_ids",
-                                 data_field="feature_id")
+    rql_query = parameter_validation(rql_query=rql_query,
+                                     ids=ids,
+                                     data_type="genome_feature",
+                                     data_field=data_field,
+                                     metadata=ids_metadata)
+
+    # Download genome_features data object as JSON
+    genome_features = download_data(data_type="genome_feature",
+                                    query=rql_query,
+                                    accept="application/json",
+                                    select=["genome_id", "feature_id",
+                                            "aa_sequence_md5",
+                                            "na_sequence_md5", "taxon_id"]
+                                    )
+
+    # Download nucleotide and protein sequences for features
+    genes, proteins = get_sequences(genome_features=genome_features)
+
+    # Download taxonomy for feature sequences
+    taxonomy = get_taxonomy(response_sequences=genome_features,
+                            ranks=ranks,
+                            rank_propagation=rank_propagation,
+                            accession_name="feature_id"
+                            )
+
+    # Download GFF files for genome ids
+    loci = get_loci(response_sequences=genome_features)
+
+    return genes, proteins, taxonomy, loci
+
+
+def get_sequences(genome_features):
+    genes = GenesDirectoryFormat()
+    proteins = ProteinsDirectoryFormat()
+
+    # Extract md5 values for nucleotide and protein sequences from
+    # genome_features JSON
+    md5_ids = set([item[key] for item in genome_features for key in
+                   ['aa_sequence_md5', 'na_sequence_md5'] if
+                   key in item])
+
+    # Download sequences corresponding to the md5 values
+    feature_sequences = download_data(data_type="feature_sequence",
+                                      query=f"in(md5,({','.join(md5_ids)}))",
+                                      accept="application/json",
+                                      select=["md5", "sequence"])
+
+    # Create dict with md5 id as keys and sequence as value for faster look up
+    lookup_dict = {entry['md5']: entry['sequence'] for entry in
+                   feature_sequences}
+
+    fasta_genes = {}
+    fasta_proteins = {}
+
+    # Loop through list of dicts
+    for entry in genome_features:
+        # Extract genome_id
+        genome_id = entry["genome_id"]
+
+        if 'na_sequence_md5' in entry:
+            # Create FASTA entry for the sequence
+            fasta_na = (f">{entry['feature_id']}\n"
+                        f"{lookup_dict[entry['na_sequence_md5']].upper()}\n")
+
+            # Add FASTA entry to entries with the same genome id
+            fasta_genes[genome_id] = fasta_genes.get(genome_id, "") + fasta_na
+
+        if 'aa_sequence_md5' in entry:
+            # Create FASTA entry for the sequence
+            fasta_aa = (f">{entry['feature_id']}\n"
+                        f"{lookup_dict[entry['aa_sequence_md5']].upper()}\n")
+
+            # Add FASTA entry to entries with the same genome id
+            fasta_proteins[genome_id] = (fasta_proteins.get(genome_id, "") +
+                                         fasta_aa)
+
+    # Save genes and proteins as FASTA files one file per genome_id
+    for genome_id, fasta_sequences in fasta_genes.items():
+        with open(os.path.join(str(genes), f"{genome_id}.fasta"),
+                  'w') as fasta_file:
+            fasta_file.write(fasta_sequences)
 
-    genes = get_sequences(rql_query=rql_query, sequence_type="genes")
-    proteins = get_sequences(rql_query=rql_query, sequence_type="proteins")
+    for genome_id, fasta_sequences in fasta_proteins.items():
+        with open(os.path.join(str(proteins), f"{genome_id}.fasta"),
+                  'w') as fasta_file:
+            fasta_file.write(fasta_sequences)
 
-    response_features = download_data(
-        url=f"https://www.bv-brc.org/api/genome_feature/?{rql_query}"
-    )
-    taxonomy = get_taxonomy(bv_brc_response=response_features, ranks=ranks)
+    return genes, proteins
 
-    return genes, proteins, taxonomy
 
-def get_sequences(rql_query, sequence_type):
-    base_url = ("https://www.bv-brc.org/api/genome_feature/?"
-               f"&limit(1000000000){rql_query}&http_accept=application/")
+def get_loci(response_sequences):
+    # Init loci dir format
+    loci = LociDirectoryFormat()
 
-    if sequence_type == "genes":
-        dir_format = GenesDirectoryFormat()
-        url = base_url + "dna+fasta"
-    else:
-        dir_format = ProteinsDirectoryFormat()
-        url = base_url + "protein+fasta"
+    # Extract genome ids from genome feature JSON
+    genome_ids = set([str(entry['genome_id']) for entry in response_sequences])
 
-    # Get requests response
-    response = download_data(url=url)
+    # Download GFF files for all genome ids. For every id there is a separate
+    # request
+    for genome_id in genome_ids:
+        gff = download_data(data_type="genome_feature",
+                            query=f"eq(genome_id,{genome_id})",
+                            accept="application/gff",
+                            )
 
-    # Convert all sequences to upper case characters to conform with
-    # QIIME formats
-    sequences_fasta = parse_fasta_to_dict(fasta_string=response.text)
+        # Process the loci data and write to file
+        with open(os.path.join(str(loci), f"{genome_id}.gff"), 'w') as file:
+            file.write(process_loci(gff_string=gff))
 
-    # Save genes and proteins as FASTA files one file per genome_id
-    for genome_id, fasta_sequences in sequences_fasta.items():
-        with open(os.path.join(str(dir_format), f"{genome_id}.fasta"),
-                  'w') as fasta_file:
-            fasta_file.write(fasta_sequences)
+    return loci
 
-    return dir_format
 
-# def get_loci(rql_query):
+def process_loci(gff_string):
+    # Split the string into lines
+    lines = gff_string.splitlines()
+    modified_lines = []
 
+    for line in lines:
+        if line.startswith("#"):
+            # Keep header lines unchanged
+            modified_lines.append(line)
+        else:
+            # Remove the first five characters "accn|"
+            modified_lines.append(line[5:])
 
-def get_taxonomy(bv_brc_response, ranks):
-    # Extract all taxon_ids from bv_brc_response JSON
-    taxon_ids = [str(entry['taxon_id']) for entry in bv_brc_response.json()]
+    # Join the lines back into a single string
+    return "\n".join(modified_lines)
 
-    # Get requests response for taxonomies
-    response_taxonomy = download_data(
-        url="https://www.bv-brc.org/api/taxonomy/"
-            f"?in(taxon_id,({','.join(taxon_ids)}))&http_accept=text/tsv",
-    )
+
+def get_taxonomy(response_sequences, ranks, rank_propagation, accession_name):
+    # Extract all taxon_ids from response_sequences JSON
+    taxon_ids = set([str(entry['taxon_id']) for entry in response_sequences])
+
+    # Download taxonomy for all taxon_ids as df
+    taxonomy = download_data(data_type="taxonomy",
+                             query=f"in(taxon_id,({','.join(taxon_ids)}))",
+                             accept="text/tsv",
+                             select=["taxon_id", "lineage_names",
+                                     "lineage_ranks"])
 
     # Transform the df to conform with TSVTaxonomyFormat
     return create_taxonomy(
-        response_taxonomy=response_taxonomy,
-        response_sequences=bv_brc_response.json(),
-        ranks=ranks
+        taxonomy_bvbrc=taxonomy,
+        response_sequences=response_sequences,
+        ranks=ranks,
+        rank_propagation=rank_propagation,
+        accession_name=accession_name,
     )
 
 
-def parse_lineage_names_with_ranks(
+def create_taxonomy_entry(
         lineage_names,
         lineage_ranks,
         ranks=None,
-        rank_propagation=False
+        rank_propagation=True
 ):
     # Set ranks to default if no list is specified
     if not ranks:
@@ -159,16 +279,19 @@ def parse_lineage_names_with_ranks(
     # Handle genus and species splitting logic
     if 'genus' in ranks and taxonomy.get('species'):
         species = taxonomy.get('species')
+        # If genus includes the species name cut it out and put it into species
         if taxonomy.get('genus'):
             if species.startswith(taxonomy['genus'] + ' '):
                 species = species[len(taxonomy['genus']) + 1:]
                 taxonomy['species'] = species
+        # If species includes the genus name cut it out and put it into genus
         elif ' ' in species:
             genus, species = species.split(' ', 1)
             taxonomy['genus'] = genus
             taxonomy['species'] = species
 
-    # Apply rank propagation if enabled
+    # Apply rank propagation if enabled. Assignes last higher rank to lower
+    # undefined ranks
     if rank_propagation:
         last_label = None
         for rank in taxonomy:
@@ -176,23 +299,23 @@ def parse_lineage_names_with_ranks(
                 taxonomy[rank] = last_label
             last_label = taxonomy[rank]
 
-    result = '; '.join(
+    # Create taxonomy entry
+    taxonomy_entry = '; '.join(
         f"{_allowed_ranks.get(rank, '')}{name if name else ''}" for rank, name
         in taxonomy.items()
     )
-    return result
-
+    return taxonomy_entry
 
-def create_taxonomy(response_taxonomy, response_sequences, ranks, data_type):
-    # Read the taxonomy TSV data into a DataFrame
-    taxonomy_bvbrc = pd.read_csv(StringIO(response_taxonomy.text), sep='\t')
 
-    # Apply the transformation
+def create_taxonomy(taxonomy_bvbrc, response_sequences,
+                    ranks, rank_propagation, accession_name):
+    # Create qiime style taxonomy entries for all rows in taxonomy_bvbrc
     taxonomy_bvbrc['Taxon'] = (
         taxonomy_bvbrc.apply(lambda row:
-                             parse_lineage_names_with_ranks(
+                             create_taxonomy_entry(
                                  lineage_names=row['lineage_names'],
                                  lineage_ranks=row['lineage_ranks'],
+                                 rank_propagation=rank_propagation,
                                  ranks=ranks), axis=1))
 
     taxonomy_df = pd.DataFrame(columns=['Feature ID', 'Taxon'])
@@ -200,11 +323,8 @@ def create_taxonomy(response_taxonomy, response_sequences, ranks, data_type):
     # Loop through each JSON dictionary in the list
     for entry in response_sequences:
         # Get the accession and taxon_id from the JSON dictionary
-        if data_type == "genomes":
-            accession = entry.get('accession')
-        else:
-            accession = entry.get('refseq_locus_tag')
-        taxon_id = entry.get('taxon_id')
+        accession = entry.get(accession_name)
+        taxon_id = str(entry.get('taxon_id'))
 
         # Look up the corresponding taxon in taxonomy_bvbrc using taxon_id
         taxon_name = taxonomy_bvbrc.loc[
@@ -230,77 +350,117 @@ def create_taxonomy(response_taxonomy, response_sequences, ranks, data_type):
     return taxonomy
 
 
-def parse_fasta_to_dict(fasta_string):
-    # Creates a dict with genome_id as keys and the corresponding FASTA
-    # entries in upper case
-    fasta_dict = {}
-
-    genome_id = None
-    for line in fasta_string.splitlines():
-        if line.startswith(">"):
-            # Split the header and rearrange so the NCBI accession is first
-            parts = line.split('|')
-            rearranged_header = f">{parts[2]}|{parts[0][1:]}|{parts[1]}"
-
-            # Append any remaining parts at the end
-            if len(parts) > 3:
-                remaining_parts = '|'.join(parts[3:])
-                rearranged_header = f"{rearranged_header}|{remaining_parts}"
-
-            # Extract the genome ID from the header
-            genome_id = parts[-1][:-1].strip()
-
-            if genome_id not in fasta_dict:
-                # Start a new entry with the header
-                fasta_dict[genome_id] = rearranged_header + "\n"
-            else:
-                # Append the header to the existing entry
-                fasta_dict[genome_id] += rearranged_header + "\n"
-        else:
-            # Append the sequence line in uppercase
-            fasta_dict[genome_id] += line.upper() + "\n"
-
-    return fasta_dict
-
+def create_genome_fasta(genome_sequences):
+    genomes = GenomeSequencesDirectoryFormat()
 
-def json_to_fasta(json, output_dir):
     # Dictionary to hold sequences grouped by genome_id
-    fasta_files = {}
+    fasta_entries = {}
 
-    # Loop over all entries in dict
-    for entry in json:
+    # Loop over all dicts in genome_sequences
+    for entry in genome_sequences:
         genome_id = entry['genome_id']
-        if genome_id not in fasta_files:
-            fasta_files[genome_id] = []
+        if genome_id not in fasta_entries:
+            fasta_entries[genome_id] = []
 
         # Construct FASTA format to be identical to BV-BRC FASTA headers
         header = (f">{entry['accession']}   {entry['description']}   "
                   f"[{entry['genome_name']} | {entry['genome_id']}]")
 
-        fasta_files[genome_id].append(f"{header}\n{entry['sequence'].upper()}")
+        # Append FASTA entry to fasta_entries
+        fasta_entries[genome_id].append(
+            f"{header}\n{entry['sequence'].upper()}")
 
     # Write each genome_id's sequences to a separate FASTA file
-    for genome_id, sequences in fasta_files.items():
+    for genome_id, sequences in fasta_entries.items():
         fasta_content = "\n".join(sequences)
-        fasta_filename = os.path.join(output_dir, f"{genome_id}.fasta")
+        fasta_filename = os.path.join(str(genomes), f"{genome_id}.fasta")
 
         with open(fasta_filename, 'w') as fasta_file:
             fasta_file.write(fasta_content)
 
+    return genomes
+
+
+def download_data(data_type, query, accept, select=None):
+    # URL and headers for requests
+    base_url = "https://www.bv-brc.org/api/"
+    url = base_url + data_type + "/"
+    headers = {'Content-Type': 'application/rqlquery+x-www-form-urlencoded',
+               'ACCEPT': accept}
+
+    results = [] if accept == "application/json" else pd.DataFrame()
+
+    # BV-BRC sets an upper limit of 25000 entries that can be fetched at once
+    batch_size = 25000
+    start = 0
+
+    while True:
+        # Create the data string with current batch start and limit
+        data = f"{query}&limit({batch_size},{start})"
+
+        # Add select parameter to only download specified fields
+        if select:
+            data = data + f"&select({','.join(select)})"
+
+        # POST request
+        response = requests.post(url=url, data=data, headers=headers)
+        a = len(response.json())
+        # If the response is successful, process the data
+        if response.status_code == 200:
+            if accept == "application/json":
+                result = response.json()
+                results.extend(result)  # Add the current batch to the list
+            elif accept == "text/tsv":
+                result = read_tsv_data_with_dtypes(response=response,
+                                                   data_type=data_type)
+
+                results = pd.concat([results, result], ignore_index=True)
+            else:  # application == application/gff
+                return response.text
+
+            # If the number of results is less than the batch size, break the
+            # loop
+            if len(result) < batch_size:
+                break
+
+            # Increment the start for the next batch
+            start += batch_size
+
+        # Handle errors
+        elif response.status_code == 400:
+            error_handling(response=response, data_type=data_type)
+            break
+        else:
+            raise ValueError(response.text)
 
-def download_data(url):
-    # Get requests response
-    response = requests.get(url)
+    return results
 
-    # If response is correct return it
-    if response.status_code == 200:
-        return response
 
-    # Error handling if response incorrect
-    elif response.status_code == 400:
-        error_handling(response=response, data_type=url.split('/')[4])
-    else:
-        raise ValueError(response.text)
+def read_tsv_data_with_dtypes(response, data_type):
+    tsv_data = StringIO(response.text)
+
+    # Read only the header to get a list of the column names
+    columns_in_file = pd.read_csv(tsv_data, sep='\t', nrows=0).columns.tolist()
+
+    # Filter the dtype dictionary to include only the columns that
+    # exist in the file
+    dtype_dict = data_fields_bvbrc.get(data_type, {})
+    filtered_dtype_dict = {col: dtype_dict[col] for col in columns_in_file if
+                           col in dtype_dict}
+
+    # Move the file pointer to the beginning of the file-like object
+    tsv_data.seek(0)
+
+    # Read the entire file with the filtered dtype dictionary
+    df = pd.read_csv(tsv_data, sep='\t', dtype=filtered_dtype_dict)
+
+    # Raise value error if no data was retrieved
+    if len(df) == 0:
+        raise ValueError("No data could be retrieved. Either because of an "
+                         "incorrect RQL query or because no data exists for "
+                         "the query.")
+
+    return df
 
 
 def error_handling(response, data_type):
@@ -328,7 +488,7 @@ def error_handling(response, data_type):
             raise ValueError(
                 f"Error code {response_dict['code']}: {response_dict['msg']}. "
                 f"\nAllowed fields for data type {data_type}: "
-                f"\n{data_fields[data_type]}"
+                f"\n{list(data_fields_bvbrc[data_type].keys())}"
             )
 
         # Handling any other errors that start with "A Database Error Occured:"
@@ -342,1083 +502,1115 @@ def error_handling(response, data_type):
         raise ValueError(response.text)
 
 
-def id_list_handling(rql_query: str, ids: list, parameter_name: str,
-                     data_field: str):
-    # Error if rql_query and ids parameters are given
-    if rql_query and ids:
+def parameter_validation(rql_query=None,
+                         ids=None,
+                         data_type=None,
+                         data_field=None,
+                         metadata=None):
+    # Error if any data_type is None
+    if not data_type:
+        raise ValueError("Parameter 'data-type' has to be specified.")
+
+    local = locals().copy()
+    # Error if any other parameter is specified simultaneously with rql_query
+    # or metadata
+    for parameter_1 in ["rql_query", "metadata"]:
+        for parameter_2, value in local.items():
+            if parameter_2 != parameter_1:
+                if parameter_2 != "data_type":
+                    if local[parameter_1] is not None and value is not None:
+                        raise ValueError(
+                            f"Parameters '{parameter_1}' and '{parameter_2}' "
+                            "can't be used simultaneously.")
+
+    # Error if ids or data_fields is specified without the other
+    if (ids and not data_field) or (data_field and not ids):
         raise ValueError(
-            f"Parameters rql_query and {parameter_name} can't be used "
-            "simultaneously.")
+            "If parameter 'ids' is given, parameter 'data-field' has to be "
+            "specified and vice versa.")
+
+    # Error if rql_query, metadata and ids parameters are not given
+    if not rql_query and not ids and not metadata:
+        raise ValueError("At least one of the parameters 'rql-query', 'ids' "
+                         "or 'ids_metadata' has to be specified.")
 
-    # Error if rql_query and ids parameters are not given
-    elif not rql_query and not ids:
-        raise ValueError("At least one of the parameters rql_query and "
-                         f"{parameter_name} has to be given.")
+    if metadata is not None:
+        data_field = metadata.to_series().name
+        ids = metadata.to_series()
+
+    if (data_field is not None and
+            data_field not in data_fields_bvbrc[data_type].keys()):
+        raise ValueError(
+            f"The data-field '{data_field}' is not permitted for the "
+            f"data-type '{data_type}'.\nAllowed data fields are: "
+            f"{list(data_fields_bvbrc[data_type].keys())}")
 
     # Construct the RQL queries
-    elif ids:
-        rql_query = f"in({data_field},({','.join(map(str, ids))}))"
+    if ids is not None or metadata is not None:
+        # Join the quoted ids with commas
+        joined_ids = ','.join(quote(f'"{str(id_)}"') for id_ in ids)
 
+        # Final result
+        rql_query = f'in({quote(data_field)},({joined_ids}))'
     return rql_query
 
 
-data_fields = {
-    "antibiotics": [
-        "_version_",
-        "antibiotic_name",
-        "atc_classification",
-        "canonical_smiles",
-        "cas_id",
-        "date_inserted",
-        "date_modified",
-        "description",
-        "drugbank_interactions",
-        "inchi_key",
-        "isomeric_smiles",
-        "mechanism_of_action",
-        "molecular_formula",
-        "molecular_weight",
-        "pharmacological_classes",
-        "pharmacology",
-        "pubchem_cid",
-        "pubchem_cid_i",
-        "synonyms"
-    ],
-    "enzyme_class_ref": [
-        "_version_",
-        "date_inserted",
-        "date_modified",
-        "ec_description",
-        "ec_number",
-        "go"
-    ],
-    "epitope": [
-        "_version_",
-        "assay_results",
-        "bcell_assays",
-        "comments",
-        "date_inserted",
-        "date_modified",
-        "end",
-        "epitope_id",
-        "epitope_sequence",
-        "epitope_type",
-        "host_name",
-        "mhc_assays",
-        "organism",
-        "protein_accession",
-        "protein_id",
-        "protein_name",
-        "start",
-        "taxon_id",
-        "taxon_lineage_ids",
-        "taxon_lineage_names",
-        "tcell_assays",
-        "total_assays"
-    ],
-    "epitope_assay": [
-        "_version_",
-        "assay_group",
-        "assay_id",
-        "assay_measurement",
-        "assay_measurement_unit",
-        "assay_method",
-        "assay_result",
-        "assay_type",
-        "authors",
-        "date_inserted",
-        "date_modified",
-        "end",
-        "epitope_id",
-        "epitope_sequence",
-        "epitope_type",
-        "host_name",
-        "host_taxon_id",
-        "mhc_allele",
-        "mhc_allele_class",
-        "organism",
-        "pdb_id",
-        "pmid",
-        "protein_accession",
-        "protein_id",
-        "protein_name",
-        "start",
-        "taxon_id",
-        "taxon_lineage_ids",
-        "taxon_lineage_names",
-        "title"
-    ],
-    "experiment": [
-        "_version_",
-        "additional_data",
-        "additional_metadata",
-        "biosets",
-        "date_inserted",
-        "date_modified",
-        "detection_instrument",
-        "doi",
-        "exp_description",
-        "exp_id",
-        "exp_name",
-        "exp_poc",
-        "exp_protocol",
-        "exp_title",
-        "exp_type",
-        "experimenters",
-        "genome_id",
-        "measurement_technique",
-        "organism",
-        "pmid",
-        "public_identifier",
-        "public_repository",
-        "samples",
-        "strain",
-        "study_description",
-        "study_institution",
-        "study_name",
-        "study_pi",
-        "study_title",
-        "taxon_id",
-        "taxon_lineage_ids",
-        "treatment_amount",
-        "treatment_duration",
-        "treatment_name",
-        "treatment_type"
-    ],
-    "bioset": [
-        "_version_",
-        "additional_data",
-        "additional_metadata",
-        "analysis_group_1",
-        "analysis_group_2",
-        "analysis_method",
-        "bioset_criteria",
-        "bioset_description",
-        "bioset_id",
-        "bioset_name",
-        "bioset_result",
-        "bioset_type",
-        "date_inserted",
-        "date_modified",
-        "entity_count",
-        "entity_type",
-        "exp_id",
-        "exp_name",
-        "exp_title",
-        "exp_type",
-        "genome_id",
-        "organism",
-        "protocol",
-        "result_type",
-        "strain",
-        "study_description",
-        "study_institution",
-        "study_name",
-        "study_pi",
-        "study_title",
-        "taxon_id",
-        "taxon_lineage_ids",
-        "treatment_amount",
-        "treatment_duration",
-        "treatment_name",
-        "treatment_type"
-    ],
-    "bioset_result": [
-        "_version_",
-        "bioset_description",
-        "bioset_id",
-        "bioset_name",
-        "bioset_type",
-        "counts",
-        "date_inserted",
-        "date_modified",
-        "entity_id",
-        "entity_name",
-        "entity_type",
-        "exp_id",
-        "exp_name",
-        "exp_title",
-        "exp_type",
-        "feature_id",
-        "fpkm",
-        "gene",
-        "gene_id",
-        "genome_id",
-        "id",
-        "locus_tag",
-        "log2_fc",
-        "organism",
-        "other_ids",
-        "other_value",
-        "p_value",
-        "patric_id",
-        "product",
-        "protein_id",
-        "result_type",
-        "strain",
-        "taxon_id",
-        "tpm",
-        "treatment_amount",
-        "treatment_duration",
-        "treatment_name",
-        "treatment_type",
-        "uniprot_id",
-        "z_score"
-    ],
-    "gene_ontology_ref": [
-        "_version_",
-        "date_inserted",
-        "date_modified",
-        "definition",
-        "go_id",
-        "go_name",
-        "ontology"
-    ],
-    "genome": [
-        "_version_",
-        "additional_metadata",
-        "altitude",
-        "antimicrobial_resistance",
-        "antimicrobial_resistance_evidence",
-        "assembly_accession",
-        "assembly_method",
-        "authors",
-        "bioproject_accession",
-        "biosample_accession",
-        "biovar",
-        "body_sample_site",
-        "body_sample_subsite",
-        "cds",
-        "cds_ratio",
-        "cell_shape",
-        "checkm_completeness",
-        "checkm_contamination",
-        "chromosomes",
-        "clade",
-        "class",
-        "coarse_consistency",
-        "collection_date",
-        "collection_year",
-        "comments",
-        "common_name",
-        "completion_date",
-        "contig_l50",
-        "contig_n50",
-        "contigs",
-        "core_families",
-        "core_family_ratio",
-        "culture_collection",
-        "date_inserted",
-        "date_modified",
-        "depth",
-        "disease",
-        "family",
-        "fine_consistency",
-        "gc_content",
-        "genbank_accessions",
-        "genome_id",
-        "genome_length",
-        "genome_name",
-        "genome_quality",
-        "genome_quality_flags",
-        "genome_status",
-        "genus",
-        "geographic_group",
-        "geographic_location",
-        "gram_stain",
-        "h1_clade_global",
-        "h1_clade_us",
-        "h3_clade",
-        "h5_clade",
-        "h_type",
-        "habitat",
-        "host_age",
-        "host_common_name",
-        "host_gender",
-        "host_group",
-        "host_health",
-        "host_name",
-        "host_scientific_name",
-        "hypothetical_cds",
-        "hypothetical_cds_ratio",
-        "isolation_comments",
-        "isolation_country",
-        "isolation_site",
-        "isolation_source",
-        "kingdom",
-        "lab_host",
-        "latitude",
-        "lineage",
-        "longitude",
-        "mat_peptide",
-        "missing_core_family_ids",
-        "mlst",
-        "motility",
-        "n_type",
-        "ncbi_project_id",
-        "nearest_genomes",
-        "optimal_temperature",
-        "order",
-        "organism_name",
-        "other_clinical",
-        "other_environmental",
-        "other_names",
-        "other_typing",
-        "outgroup_genomes",
-        "owner",
-        "oxygen_requirement",
-        "p2_genome_id",
-        "partial_cds",
-        "partial_cds_ratio",
-        "passage",
-        "pathovar",
-        "patric_cds",
-        "ph1n1_like",
-        "phenotype",
-        "phylum",
-        "plasmids",
-        "plfam_cds",
-        "plfam_cds_ratio",
-        "public",
-        "publication",
-        "reference_genome",
-        "refseq_accessions",
-        "refseq_cds",
-        "refseq_project_id",
-        "rrna",
-        "salinity",
-        "season",
-        "segment",
-        "segments",
-        "sequencing_centers",
-        "sequencing_depth",
-        "sequencing_platform",
-        "sequencing_status",
-        "serovar",
-        "species",
-        "sporulation",
-        "sra_accession",
-        "state_province",
-        "strain",
-        "subclade",
-        "subtype",
-        "superkingdom",
-        "taxon_id",
-        "taxon_lineage_ids",
-        "taxon_lineage_names",
-        "temperature_range",
-        "trna",
-        "type_strain",
-        "user_read",
-        "user_write"
-    ],
-    "strain": [
-        "1_pb2",
-        "2_pb1",
-        "3_pa",
-        "4_ha",
-        "5_np",
-        "6_na",
-        "7_mp",
-        "8_ns",
-        "_version_",
-        "collection_date",
-        "collection_year",
-        "date_inserted",
-        "date_modified",
-        "family",
-        "genbank_accessions",
-        "genome_ids",
-        "genus",
-        "geographic_group",
-        "h_type",
-        "host_common_name",
-        "host_group",
-        "host_name",
-        "id",
-        "isolation_country",
-        "l",
-        "lab_host",
-        "m",
-        "n_type",
-        "other_segments",
-        "owner",
-        "passage",
-        "public",
-        "s",
-        "season",
-        "segment_count",
-        "species",
-        "status",
-        "strain",
-        "subtype",
-        "taxon_id",
-        "taxon_lineage_ids",
-        "taxon_lineage_names",
-        "user_read",
-        "user_write"
-    ],
-    "genome_amr": [
-        "_version_",
-        "antibiotic",
-        "computational_method",
-        "computational_method_performance",
-        "computational_method_version",
-        "date_inserted",
-        "date_modified",
-        "evidence",
-        "genome_id",
-        "genome_name",
-        "id",
-        "laboratory_typing_method",
-        "laboratory_typing_method_version",
-        "laboratory_typing_platform",
-        "measurement",
-        "measurement_sign",
-        "measurement_unit",
-        "measurement_value",
-        "owner",
-        "pmid",
-        "public",
-        "resistant_phenotype",
-        "source",
-        "taxon_id",
-        "testing_standard",
-        "testing_standard_year",
-        "user_read",
-        "user_write",
-        "vendor"
-    ],
-    "feature_sequence": [
-        "_version_",
-        "date_inserted",
-        "date_modified",
-        "md5",
-        "sequence",
-        "sequence_type"
-    ],
-    "genome_feature": [
-        "aa_length",
-        "aa_sequence_md5",
-        "accession",
-        "alt_locus_tag",
-        "annotation",
-        "brc_id",
-        "classifier_round",
-        "classifier_score",
-        "codon_start",
-        "date_inserted",
-        "date_modified",
-        "end",
-        "feature_id",
-        "feature_type",
-        "figfam_id",
-        "gene",
-        "gene_id",
-        "genome_id",
-        "genome_name",
-        "go",
-        "location",
-        "na_length",
-        "na_sequence_md5",
-        "notes",
-        "og_id",
-        "owner",
-        "p2_feature_id",
-        "patric_id",
-        "pdb_accession",
-        "pgfam_id",
-        "plfam_id",
-        "product",
-        "property",
-        "protein_id",
-        "public",
-        "refseq_locus_tag",
-        "segments",
-        "sequence_id",
-        "sog_id",
-        "start",
-        "strand",
-        "taxon_id",
-        "uniprotkb_accession",
-        "user_read",
-        "user_write"
-    ],
-    "genome_sequence": [
-        "_version_",
-        "accession",
-        "chromosome",
-        "date_inserted",
-        "date_modified",
-        "description",
-        "gc_content",
-        "genome_id",
-        "genome_name",
-        "gi",
-        "length",
-        "mol_type",
-        "owner",
-        "p2_sequence_id",
-        "plasmid",
-        "public",
-        "release_date",
-        "segment",
-        "sequence",
-        "sequence_id",
-        "sequence_md5",
-        "sequence_status",
-        "sequence_type",
-        "taxon_id",
-        "topology",
-        "user_read",
-        "user_write",
-        "version"
-    ],
-    "id_ref": [
-        "_version_",
-        "date_inserted",
-        "date_modified",
-        "id",
-        "id_type",
-        "id_value",
-        "uniprotkb_accession"
-    ],
-    "misc_niaid_sgc": [
-        "_version_",
-        "date_inserted",
-        "date_modified",
-        "gene_symbol_collection",
-        "genus",
-        "has_clones",
-        "has_proteins",
-        "selection_criteria",
-        "species",
-        "strain",
-        "target_id",
-        "target_status"
-    ],
-    "pathway": [
-        "_version_",
-        "accession",
-        "alt_locus_tag",
-        "annotation",
-        "date_inserted",
-        "date_modified",
-        "ec_description",
-        "ec_number",
-        "feature_id",
-        "gene",
-        "genome_ec",
-        "genome_id",
-        "genome_name",
-        "id",
-        "owner",
-        "pathway_class",
-        "pathway_ec",
-        "pathway_id",
-        "pathway_name",
-        "patric_id",
-        "product",
-        "public",
-        "refseq_locus_tag",
-        "sequence_id",
-        "taxon_id",
-        "user_read",
-        "user_write"
-    ],
-    "pathway_ref": [
-        "_version_",
-        "date_inserted",
-        "date_modified",
-        "ec_description",
-        "ec_number",
-        "id",
-        "map_location",
-        "map_name",
-        "map_type",
-        "occurrence",
-        "pathway_class",
-        "pathway_id",
-        "pathway_name"
-    ],
-    "ppi": [
-        "_version_",
-        "category",
-        "date_inserted",
-        "date_modified",
-        "detection_method",
-        "domain_a",
-        "domain_b",
-        "evidence",
-        "feature_id_a",
-        "feature_id_b",
-        "gene_a",
-        "gene_b",
-        "genome_id_a",
-        "genome_id_b",
-        "genome_name_a",
-        "genome_name_b",
-        "id",
-        "interaction_type",
-        "interactor_a",
-        "interactor_b",
-        "interactor_desc_a",
-        "interactor_desc_b",
-        "interactor_type_a",
-        "interactor_type_b",
-        "pmid",
-        "refseq_locus_tag_a",
-        "refseq_locus_tag_b",
-        "score",
-        "source_db",
-        "source_id",
-        "taxon_id_a",
-        "taxon_id_b"
-    ],
-    "protein_family_ref": [
-        "_version_",
-        "date_inserted",
-        "date_modified",
-        "family_id",
-        "family_product",
-        "family_type"
-    ],
-    "sequence_feature": [
-        "aa_sequence_md5",
-        "aa_variant",
-        "additional_metadata",
-        "comments",
-        "date_inserted",
-        "date_modified",
-        "end",
-        "evidence_code",
-        "feature_id",
-        "genbank_accession",
-        "gene",
-        "genome_id",
-        "genome_name",
-        "id",
-        "length",
-        "patric_id",
-        "product",
-        "publication",
-        "refseq_locus_tag",
-        "segment",
-        "segments",
-        "sf_category",
-        "sf_id",
-        "sf_name",
-        "sf_sequence",
-        "sf_sequence_md5",
-        "source",
-        "source_aa_sequence",
-        "source_id",
-        "source_sf_location",
-        "source_strain",
-        "start",
-        "subtype",
-        "taxon_id",
-        "variant_types"
-    ],
-    "sequence_feature_vt": [
-        "additional_metadata",
-        "comments",
-        "date_inserted",
-        "date_modified",
-        "id",
-        "sf_category",
-        "sf_id",
-        "sf_name",
-        "sf_sequence",
-        "sf_sequence_md5",
-        "sfvt_genome_count",
-        "sfvt_genome_ids",
-        "sfvt_id",
-        "sfvt_sequence",
-        "sfvt_sequence_md5",
-        "sfvt_variations"
-    ],
-    "sp_gene": [
-        "_version_",
-        "alt_locus_tag",
-        "antibiotics",
-        "antibiotics_class",
-        "classification",
-        "date_inserted",
-        "date_modified",
-        "e_value",
-        "evidence",
-        "feature_id",
-        "function",
-        "gene",
-        "genome_id",
-        "genome_name",
-        "id",
-        "identity",
-        "organism",
-        "owner",
-        "patric_id",
-        "pmid",
-        "product",
-        "property",
-        "property_source",
-        "public",
-        "query_coverage",
-        "refseq_locus_tag",
-        "same_genome",
-        "same_genus",
-        "same_species",
-        "source",
-        "source_id",
-        "subject_coverage",
-        "taxon_id",
-        "user_read",
-        "user_write"
-    ],
-    "sp_gene_ref": [
-        "_version_",
-        "antibiotics",
-        "antibiotics_class",
-        "assertion",
-        "classification",
-        "date_inserted",
-        "date_modified",
-        "function",
-        "gene_id",
-        "gene_name",
-        "genus",
-        "gi",
-        "id",
-        "locus_tag",
-        "organism",
-        "pmid",
-        "product",
-        "property",
-        "source",
-        "source_id",
-        "species"
-    ],
-    "spike_lineage": [
-        "_version_",
-        "country",
-        "date_inserted",
-        "date_modified",
-        "growth_rate",
-        "id",
-        "lineage",
-        "lineage_count",
-        "lineage_of_concern",
-        "month",
-        "prevalence",
-        "region",
-        "sequence_features",
-        "total_isolates"
-    ],
-    "spike_variant": [
-        "_version_",
-        "aa_variant",
-        "country",
-        "date_inserted",
-        "date_modified",
-        "growth_rate",
-        "id",
-        "lineage_count",
-        "month",
-        "prevalence",
-        "region",
-        "sequence_features",
-        "total_isolates"
-    ],
-    "structured_assertion": [
-        "_version_",
-        "comment",
-        "date_inserted",
-        "date_modified",
-        "evidence_code",
-        "feature_id",
-        "id",
-        "owner",
-        "patric_id",
-        "pmid",
-        "property",
-        "public",
-        "refseq_locus_tag",
-        "score",
-        "source",
-        "user_read",
-        "user_write",
-        "value"
-    ],
-    "subsystem": [
-        "_version_",
-        "active",
-        "class",
-        "date_inserted",
-        "date_modified",
-        "feature_id",
-        "gene",
-        "genome_id",
-        "genome_name",
-        "id",
-        "owner",
-        "patric_id",
-        "product",
-        "public",
-        "refseq_locus_tag",
-        "role_id",
-        "role_name",
-        "subclass",
-        "subsystem_id",
-        "subsystem_name",
-        "superclass",
-        "taxon_id",
-        "user_read",
-        "user_write"
-    ],
-    "subsystem_ref": [
-        "_version_",
-        "class",
-        "date_inserted",
-        "date_modified",
-        "description",
-        "id",
-        "notes",
-        "pmid",
-        "role_id",
-        "role_name",
-        "subclass",
-        "subsystem_id",
-        "subsystem_name",
-        "superclass"
-    ],
-    "taxonomy": [
-        "_version_",
-        "cds_mean",
-        "cds_sd",
-        "core_families",
-        "core_family_ids",
-        "description",
-        "division",
-        "genetic_code",
-        "genome_count",
-        "genome_length_mean",
-        "genome_length_sd",
-        "genomes",
-        "genomes_f",
-        "hypothetical_cds_ratio_mean",
-        "hypothetical_cds_ratio_sd",
-        "lineage",
-        "lineage_ids",
-        "lineage_names",
-        "lineage_ranks",
-        "other_names",
-        "parent_id",
-        "plfam_cds_ratio_mean",
-        "plfam_cds_ratio_sd",
-        "taxon_id",
-        "taxon_id_i",
-        "taxon_name",
-        "taxon_rank"
-    ],
-    "protein_structure": [
-        "alignments",
-        "authors",
-        "date_inserted",
-        "date_modified",
-        "feature_id",
-        "file_path",
-        "gene",
-        "genome_id",
-        "institution",
-        "method",
-        "organism_name",
-        "patric_id",
-        "pdb_id",
-        "pmid",
-        "product",
-        "release_date",
-        "resolution",
-        "sequence",
-        "sequence_md5",
-        "taxon_id",
-        "taxon_lineage_ids",
-        "taxon_lineage_names",
-        "title",
-        "uniprotkb_accession"
-    ],
-    "protein_feature": [
-        "aa_sequence_md5",
-        "classification",
-        "comments",
-        "date_inserted",
-        "date_modified",
-        "description",
-        "e_value",
-        "end",
-        "evidence",
-        "feature_id",
-        "feature_type",
-        "gene",
-        "genome_id",
-        "genome_name",
-        "id",
-        "interpro_description",
-        "interpro_id",
-        "length",
-        "patric_id",
-        "product",
-        "publication",
-        "refseq_locus_tag",
-        "score",
-        "segments",
-        "sequence",
-        "source",
-        "source_id",
-        "start",
-        "taxon_id"
-    ],
-    "surveillance": [
-        "additional_metadata",
-        "alcohol_or_other_drug_dependence",
-        "breastfeeding",
-        "chest_imaging_interpretation",
-        "chronic_conditions",
-        "collection_city",
-        "collection_country",
-        "collection_date",
-        "collection_latitude",
-        "collection_longitude",
-        "collection_poi",
-        "collection_season",
-        "collection_state_province",
-        "collection_year",
-        "collector_institution",
-        "collector_name",
-        "comments",
-        "contact_email_address",
-        "contributing_institution",
-        "date_inserted",
-        "date_modified",
-        "daycare_attendance",
-        "days_elapsed_to_disease_status",
-        "days_elapsed_to_sample_collection",
-        "days_elapsed_to_vaccination",
-        "diagnosis",
-        "dialysis",
-        "disease_severity",
-        "disease_status",
-        "duration_of_exposure",
-        "duration_of_treatment",
-        "ecmo",
-        "education",
-        "embargo_end_date",
-        "exposure",
-        "exposure_type",
-        "genome_id",
-        "geographic_group",
-        "hospitalization_duration",
-        "hospitalized",
-        "host_age",
-        "host_capture_status",
-        "host_common_name",
-        "host_ethnicity",
-        "host_group",
-        "host_habitat",
-        "host_health",
-        "host_height",
-        "host_id_type",
-        "host_identifier",
-        "host_natural_state",
-        "host_race",
-        "host_sex",
-        "host_species",
-        "host_weight",
-        "human_leukocyte_antigens",
-        "id",
-        "infections_within_five_years",
-        "influenza_like_illness_over_the_past_year",
-        "initiation_of_treatment",
-        "intensive_care_unit",
-        "last_update_date",
-        "longitudinal_study",
-        "maintenance_medication",
-        "nursing_home_residence",
-        "onset_hours",
-        "other_vaccinations",
-        "oxygen_saturation",
-        "packs_per_day_for_how_many_years",
-        "pathogen_test_interpretation",
-        "pathogen_test_result",
-        "pathogen_test_type",
-        "pathogen_type",
-        "post_visit_medications",
-        "pre_visit_medications",
-        "pregnancy",
-        "primary_living_situation",
-        "profession",
-        "project_identifier",
-        "sample_accession",
-        "sample_identifier",
-        "sample_material",
-        "sample_receipt_date",
-        "sample_transport_medium",
-        "sequence_accession",
-        "source_of_vaccine_information",
-        "species",
-        "strain",
-        "submission_date",
-        "subtype",
-        "sudden_onset",
-        "symptoms",
-        "taxon_lineage_ids",
-        "tobacco_use",
-        "travel_history",
-        "treatment",
-        "treatment_dosage",
-        "treatment_type",
-        "trimester_of_pregnancy",
-        "types_of_allergies",
-        "use_of_personal_protective_equipment",
-        "vaccination_type",
-        "vaccine_dosage",
-        "vaccine_lot_number",
-        "vaccine_manufacturer",
-        "ventilation"
-    ],
-    "serology": [
-        "additional_metadata",
-        "collection_city",
-        "collection_country",
-        "collection_date",
-        "collection_state",
-        "collection_year",
-        "comments",
-        "contributing_institution",
-        "date_inserted",
-        "date_modified",
-        "genbank_accession",
-        "geographic_group",
-        "host_age",
-        "host_age_group",
-        "host_common_name",
-        "host_health",
-        "host_identifier",
-        "host_sex",
-        "host_species",
-        "host_type",
-        "id",
-        "positive_definition",
-        "project_identifier",
-        "sample_accession",
-        "sample_identifier",
-        "serotype",
-        "strain",
-        "taxon_lineage_ids",
-        "test_antigen",
-        "test_interpretation",
-        "test_pathogen",
-        "test_result",
-        "test_type",
-        "virus_identifier"
-    ]
+data_fields_bvbrc = {
+    "antibiotics": {
+        "_version_": str,
+        "antibiotic_name": str,
+        "atc_classification": str,
+        "canonical_smiles": str,
+        "cas_id": str,
+        "date_inserted": str,
+        "date_modified": str,
+        "description": str,
+        "drugbank_interactions": str,
+        "inchi_key": str,
+        "isomeric_smiles": str,
+        "mechanism_of_action": str,
+        "molecular_formula": str,
+        "molecular_weight": str,
+        "pharmacological_classes": str,
+        "pharmacology": str,
+        "pubchem_cid": str,
+        "pubchem_cid_i": str,
+        "synonyms": str
+    },
+    "enzyme_class_ref": {
+        "_version_": str,
+        "date_inserted": str,
+        "date_modified": str,
+        "ec_number": str,
+        "go": str
+    },
+    "epitope": {
+        "_version_": str,
+        "assay_results": str,
+        "bcell_assays": str,
+        "comments": str,
+        "date_inserted": str,
+        "date_modified": str,
+        "end": float,
+        "epitope_id": str,
+        "epitope_sequence": str,
+        "epitope_type": str,
+        "host_name": str,
+        "mhc_assays": str,
+        "organism": str,
+        "protein_accession": str,
+        "protein_id": str,
+        "protein_name": str,
+        "start": float,
+        "taxon_id": str,
+        "taxon_lineage_ids": str,
+        "taxon_lineage_names": str,
+        "tcell_assays": str,
+        "total_assays": float
+    },
+    "epitope_assay": {
+        "_version_": str,
+        "assay_group": str,
+        "assay_id": str,
+        "assay_measurement": str,
+        "assay_measurement_unit": str,
+        "assay_method": str,
+        "assay_result": str,
+        "assay_type": str,
+        "authors": str,
+        "date_inserted": str,
+        "date_modified": str,
+        "end": float,
+        "epitope_id": str,
+        "epitope_sequence": str,
+        "epitope_type": str,
+        "host_name": str,
+        "host_taxon_id": str,
+        "mhc_allele": str,
+        "mhc_allele_class": str,
+        "organism": str,
+        "pdb_id": str,
+        "pmid": str,
+        "protein_accession": str,
+        "protein_id": str,
+        "protein_name": str,
+        "start": float,
+        "taxon_id": str,
+        "taxon_lineage_ids": str,
+        "taxon_lineage_names": str,
+        "title": str
+    },
+    "experiment": {
+        "_version_": str,
+        "additional_data": str,
+        "additional_metadata": str,
+        "biosets": float,
+        "date_inserted": str,
+        "date_modified": str,
+        "detection_instrument": str,
+        "doi": str,
+        "exp_description": str,
+        "exp_id": str,
+        "exp_name": str,
+        "exp_poc": str,
+        "exp_protocol": str,
+        "exp_title": str,
+        "exp_type": str,
+        "experimenters": str,
+        "genome_id": str,
+        "measurement_technique": str,
+        "organism": str,
+        "pmid": str,
+        "public_identifier": str,
+        "public_repository": str,
+        "samples": float,
+        "strain": str,
+        "study_description": str,
+        "study_institution": str,
+        "study_name": str,
+        "study_pi": str,
+        "study_title": str,
+        "taxon_id": str,
+        "taxon_lineage_ids": str,
+        "treatment_amount": str,
+        "treatment_duration": str,
+        "treatment_name": str,
+        "treatment_type": str
+    },
+    "bioset": {
+        "_version_": str,
+        "additional_data": str,
+        "additional_metadata": str,
+        "analysis_group_1": str,
+        "analysis_group_2": str,
+        "analysis_method": str,
+        "bioset_criteria": str,
+        "bioset_description": str,
+        "bioset_id": str,
+        "bioset_name": str,
+        "bioset_result": str,
+        "bioset_type": str,
+        "date_inserted": str,
+        "date_modified": str,
+        "entity_count": str,
+        "entity_type": str,
+        "exp_id": str,
+        "exp_name": str,
+        "exp_title": str,
+        "exp_type": str,
+        "genome_id": str,
+        "organism": str,
+        "protocol": str,
+        "result_type": str,
+        "strain": str,
+        "study_description": str,
+        "study_institution": str,
+        "study_name": str,
+        "study_pi": str,
+        "study_title": str,
+        "taxon_id": str,
+        "taxon_lineage_ids": str,
+        "treatment_amount": str,
+        "treatment_duration": str,
+        "treatment_name": str,
+        "treatment_type": str
+    },
+    "bioset_result": {
+        "_version_": str,
+        "bioset_description": str,
+        "bioset_id": str,
+        "bioset_name": str,
+        "bioset_type": str,
+        "counts": float,
+        "date_inserted": str,
+        "date_modified": str,
+        "entity_id": str,
+        "entity_name": str,
+        "entity_type": str,
+        "exp_id": str,
+        "exp_name": str,
+        "exp_title": str,
+        "exp_type": str,
+        "feature_id": str,
+        "fpkm": float,
+        "gene": str,
+        "gene_id": str,
+        "genome_id": str,
+        "id": str,
+        "locus_tag": str,
+        "log2_fc": float,
+        "organism": str,
+        "other_ids": str,
+        "other_value": float,
+        "p_value": float,
+        "patric_id": str,
+        "product": str,
+        "protein_id": str,
+        "result_type": str,
+        "strain": str,
+        "taxon_id": str,
+        "tpm": float,
+        "treatment_amount": str,
+        "treatment_duration": str,
+        "treatment_name": str,
+        "treatment_type": str,
+        "uniprot_id": str,
+        "z_score": float
+    },
+    "gene_ontology_ref": {
+        "_version_": str,
+        "date_inserted": str,
+        "date_modified": str,
+        "definition": str,
+        "go_id": str,
+        "go_name": str,
+        "ontology": str
+    },
+    "genome": {
+        "_version_": str,
+        "additional_metadata": str,
+        "altitude": str,
+        "antimicrobial_resistance": str,
+        "antimicrobial_resistance_evidence": str,
+        "assembly_accession": str,
+        "assembly_method": str,
+        "authors": str,
+        "bioproject_accession": str,
+        "biosample_accession": str,
+        "biovar": str,
+        "body_sample_site": str,
+        "body_sample_subsite": str,
+        "cds": float,
+        "cds_ratio": float,
+        "cell_shape": str,
+        "checkm_completeness": float,
+        "checkm_contamination": float,
+        "chromosomes": float,
+        "clade": str,
+        "class": str,
+        "coarse_consistency": float,
+        "collection_date": str,
+        "collection_year": float,
+        "comments": str,
+        "common_name": str,
+        "completion_date": str,
+        "contig_l50": float,
+        "contig_n50": float,
+        "contigs": float,
+        "core_families": float,
+        "core_family_ratio": float,
+        "culture_collection": str,
+        "date_inserted": str,
+        "date_modified": str,
+        "depth": str,
+        "disease": str,
+        "family": str,
+        "fine_consistency": float,
+        "gc_content": float,
+        "genbank_accessions": str,
+        "genome_id": str,
+        "genome_length": float,
+        "genome_name": str,
+        "genome_quality": str,
+        "genome_quality_flags": str,
+        "genome_status": str,
+        "genus": str,
+        "geographic_group": str,
+        "geographic_location": str,
+        "gram_stain": str,
+        "h1_clade_global": str,
+        "h1_clade_us": str,
+        "h3_clade": str,
+        "h5_clade": str,
+        "h_type": float,
+        "habitat": str,
+        "host_age": str,
+        "host_common_name": str,
+        "host_gender": str,
+        "host_group": str,
+        "host_health": str,
+        "host_name": str,
+        "host_scientific_name": str,
+        "hypothetical_cds": float,
+        "hypothetical_cds_ratio": float,
+        "isolation_comments": str,
+        "isolation_country": str,
+        "isolation_site": str,
+        "isolation_source": str,
+        "kingdom": str,
+        "lab_host": str,
+        "latitude": str,
+        "lineage": str,
+        "longitude": str,
+        "mat_peptide": float,
+        "missing_core_family_ids": str,
+        "mlst": str,
+        "motility": str,
+        "n_type": float,
+        "ncbi_project_id": str,
+        "nearest_genomes": str,
+        "optimal_temperature": str,
+        "order": str,
+        "organism_name": str,
+        "other_clinical": str,
+        "other_environmental": str,
+        "other_names": str,
+        "other_typing": str,
+        "outgroup_genomes": str,
+        "owner": str,
+        "oxygen_requirement": str,
+        "p2_genome_id": str,
+        "partial_cds": float,
+        "partial_cds_ratio": float,
+        "passage": str,
+        "pathovar": str,
+        "patric_cds": float,
+        "ph1n1_like": str,
+        "phenotype": str,
+        "phylum": str,
+        "plasmids": float,
+        "plfam_cds": float,
+        "plfam_cds_ratio": float,
+        "public": str,
+        "publication": str,
+        "reference_genome": str,
+        "refseq_accessions": str,
+        "refseq_cds": float,
+        "refseq_project_id": str,
+        "rrna": float,
+        "salinity": str,
+        "season": str,
+        "segment": str,
+        "segments": float,
+        "sequencing_centers": str,
+        "sequencing_depth": str,
+        "sequencing_platform": str,
+        "sequencing_status": str,
+        "serovar": str,
+        "species": str,
+        "sporulation": str,
+        "sra_accession": str,
+        "state_province": str,
+        "strain": str,
+        "subclade": str,
+        "subtype": str,
+        "superkingdom": str,
+        "taxon_id": str,
+        "taxon_lineage_ids": str,
+        "taxon_lineage_names": str,
+        "temperature_range": str,
+        "trna": float,
+        "type_strain": str,
+        "user_read": str,
+        "user_write": str
+    },
+    "strain": {
+        "1_pb2": str,
+        "2_pb1": str,
+        "3_pa": str,
+        "4_ha": str,
+        "5_np": str,
+        "6_na": str,
+        "7_mp": str,
+        "8_ns": str,
+        "_version_": str,
+        "collection_date": str,
+        "collection_year": float,
+        "date_inserted": str,
+        "date_modified": str,
+        "family": str,
+        "genbank_accessions": str,
+        "genome_ids": str,
+        "genus": str,
+        "geographic_group": str,
+        "h_type": float,
+        "host_common_name": str,
+        "host_group": str,
+        "host_name": str,
+        "id": str,
+        "isolation_country": str,
+        "l": str,
+        "lab_host": str,
+        "m": str,
+        "n_type": float,
+        "other_segments": str,
+        "owner": str,
+        "passage": str,
+        "public": str,
+        "s": str,
+        "season": str,
+        "segment_count": float,
+        "species": str,
+        "status": str,
+        "strain": str,
+        "subtype": str,
+        "taxon_id": str,
+        "taxon_lineage_ids": str,
+        "taxon_lineage_names": str,
+        "user_read": str,
+        "user_write": str
+    },
+    "genome_amr": {
+        "_version_": str,
+        "antibiotic": str,
+        "computational_method": str,
+        "computational_method_performance": str,
+        "computational_method_version": str,
+        "date_inserted": str,
+        "date_modified": str,
+        "evidence": str,
+        "genome_id": str,
+        "genome_name": str,
+        "id": str,
+        "laboratory_typing_method": str,
+        "laboratory_typing_method_version": str,
+        "laboratory_typing_platform": str,
+        "measurement": str,
+        "measurement_sign": str,
+        "measurement_unit": str,
+        "measurement_value": str,
+        "owner": str,
+        "pmid": str,
+        "public": str,
+        "resistant_phenotype": str,
+        "source": str,
+        "taxon_id": str,
+        "testing_standard": str,
+        "testing_standard_year": float,
+        "user_read": str,
+        "user_write": str,
+        "vendor": str
+    },
+    "feature_sequence": {
+        "_version_": str,
+        "date_inserted": str,
+        "date_modified": str,
+        "md5": str,
+        "sequence": str,
+        "sequence_type": str
+    },
+    "genome_feature": {
+        "aa_length": float,
+        "aa_sequence_md5": str,
+        "accession": str,
+        "alt_locus_tag": str,
+        "annotation": str,
+        "brc_id": str,
+        "classifier_round": float,
+        "classifier_score": float,
+        "codon_start": float,
+        "date_inserted": str,
+        "date_modified": str,
+        "end": float,
+        "feature_id": str,
+        "feature_type": str,
+        "figfam_id": str,
+        "gene": str,
+        "gene_id": str,
+        "genome_id": str,
+        "genome_name": str,
+        "go": str,
+        "location": str,
+        "na_length": float,
+        "na_sequence_md5": str,
+        "notes": str,
+        "og_id": str,
+        "owner": str,
+        "p2_feature_id": str,
+        "patric_id": str,
+        "pdb_accession": str,
+        "pgfam_id": str,
+        "plfam_id": str,
+        "product": str,
+        "property": str,
+        "protein_id": str,
+        "public": str,
+        "refseq_locus_tag": str,
+        "segments": str,
+        "sequence_id": str,
+        "sog_id": str,
+        "start": float,
+        "strand": str,
+        "taxon_id": str,
+        "uniprotkb_accession": str,
+        "user_read": str,
+        "user_write": str
+    },
+    "genome_sequence": {
+        "_version_": str,
+        "accession": str,
+        "chromosome": str,
+        "date_inserted": str,
+        "date_modified": str,
+        "description": str,
+        "gc_content": float,
+        "genome_id": str,
+        "genome_name": str,
+        "gi": float,
+        "length": float,
+        "mol_type": str,
+        "owner": str,
+        "p2_sequence_id": str,
+        "plasmid": str,
+        "public": str,
+        "release_date": str,
+        "segment": str,
+        "sequence": str,
+        "sequence_id": str,
+        "sequence_md5": str,
+        "sequence_status": str,
+        "sequence_type": str,
+        "taxon_id": str,
+        "topology": str,
+        "user_read": str,
+        "user_write": str,
+        "version": str
+    },
+    "id_ref": {
+        "_version_": str,
+        "date_inserted": str,
+        "date_modified": str,
+        "id": str,
+        "id_type": str,
+        "id_value": str,
+        "uniprotkb_accession": str
+    },
+    "misc_niaid_sgc": {
+        "_version_": str,
+        "date_inserted": str,
+        "date_modified": str,
+        "gene_symbol_collection": str,
+        "genus": str,
+        "has_clones": str,
+        "has_proteins": str,
+        "selection_criteria": str,
+        "species": str,
+        "strain": str,
+        "target_id": str,
+        "target_status": str
+    },
+    "pathway": {
+        "_version_": str,
+        "accession": str,
+        "alt_locus_tag": str,
+        "annotation": str,
+        "date_inserted": str,
+        "date_modified": str,
+        "ec_description": str,
+        "ec_number": str,
+        "feature_id": str,
+        "gene": str,
+        "genome_ec": str,
+        "genome_id": str,
+        "genome_name": str,
+        "id": str,
+        "owner": str,
+        "pathway_class": str,
+        "pathway_ec": str,
+        "pathway_id": str,
+        "pathway_name": str,
+        "patric_id": str,
+        "product": str,
+        "public": str,
+        "refseq_locus_tag": str,
+        "sequence_id": str,
+        "taxon_id": str,
+        "user_read": str,
+        "user_write": str
+    },
+    "pathway_ref": {
+        "_version_": str,
+        "date_inserted": str,
+        "date_modified": str,
+        "ec_description": str,
+        "ec_number": str,
+        "id": str,
+        "map_location": str,
+        "map_name": str,
+        "map_type": str,
+        "occurrence": float,
+        "pathway_class": str,
+        "pathway_id": str,
+        "pathway_name": str
+    },
+    "ppi": {
+        "_version_": str,
+        "category": str,
+        "date_inserted": str,
+        "date_modified": str,
+        "detection_method": str,
+        "domain_a": str,
+        "domain_b": str,
+        "evidence": str,
+        "feature_id_a": str,
+        "feature_id_b": str,
+        "gene_a": str,
+        "gene_b": str,
+        "genome_id_a": str,
+        "genome_id_b": str,
+        "genome_name_a": str,
+        "genome_name_b": str,
+        "id": str,
+        "interaction_type": str,
+        "interactor_a": str,
+        "interactor_b": str,
+        "interactor_desc_a": str,
+        "interactor_desc_b": str,
+        "interactor_type_a": str,
+        "interactor_type_b": str,
+        "pmid": str,
+        "refseq_locus_tag_a": str,
+        "refseq_locus_tag_b": str,
+        "score": str,
+        "source_db": str,
+        "source_id": str,
+        "taxon_id_a": str,
+        "taxon_id_b": str
+    },
+    "protein_family_ref": {
+        "_version_": str,
+        "date_inserted": str,
+        "date_modified": str,
+        "family_id": str,
+        "family_product": str,
+        "family_type": str
+    },
+    "sequence_feature": {
+        "aa_sequence_md5": str,
+        "aa_variant": str,
+        "additional_metadata": str,
+        "comments": str,
+        "date_inserted": str,
+        "date_modified": str,
+        "end": float,
+        "evidence_code": str,
+        "feature_id": str,
+        "genbank_accession": str,
+        "gene": str,
+        "genome_id": str,
+        "genome_name": str,
+        "id": str,
+        "length": float,
+        "patric_id": str,
+        "product": str,
+        "publication": str,
+        "refseq_locus_tag": str,
+        "segment": str,
+        "segments": str,
+        "sf_category": str,
+        "sf_id": str,
+        "sf_name": str,
+        "sf_sequence": str,
+        "sf_sequence_md5": str,
+        "source": str,
+        "source_aa_sequence": str,
+        "source_id": str,
+        "source_sf_location": str,
+        "source_strain": str,
+        "start": float,
+        "subtype": str,
+        "taxon_id": str,
+        "variant_types": str
+    },
+    "sequence_feature_vt": {
+        "additional_metadata": str,
+        "comments": str,
+        "date_inserted": str,
+        "date_modified": str,
+        "id": str,
+        "sf_category": str,
+        "sf_id": str,
+        "sf_name": str,
+        "sf_sequence": str,
+        "sf_sequence_md5": str,
+        "sfvt_genome_count": str,
+        "sfvt_genome_ids": str,
+        "sfvt_id": str,
+        "sfvt_sequence": str,
+        "sfvt_sequence_md5": str,
+        "sfvt_variations": str
+    },
+    "sp_gene": {
+        "_version_": str,
+        "alt_locus_tag": str,
+        "antibiotics": str,
+        "antibiotics_class": str,
+        "classification": str,
+        "date_inserted": str,
+        "date_modified": str,
+        "e_value": str,
+        "evidence": str,
+        "feature_id": str,
+        "function": str,
+        "gene": str,
+        "genome_id": str,
+        "genome_name": str,
+        "id": str,
+        "identity": float,
+        "organism": str,
+        "owner": str,
+        "patric_id": str,
+        "pmid": str,
+        "product": str,
+        "property": str,
+        "property_source": str,
+        "public": str,
+        "query_coverage": float,
+        "refseq_locus_tag": str,
+        "same_genome": float,
+        "same_genus": float,
+        "same_species": float,
+        "source": str,
+        "source_id": str,
+        "subject_coverage": float,
+        "taxon_id": str,
+        "user_read": str,
+        "user_write": str
+    },
+    "sp_gene_ref": {
+        "_version_": str,
+        "antibiotics": str,
+        "antibiotics_class": str,
+        "assertion": str,
+        "classification": str,
+        "date_inserted": str,
+        "date_modified": str,
+        "function": str,
+        "gene_id": str,
+        "gene_name": str,
+        "genus": str,
+        "gi": str,
+        "id": str,
+        "locus_tag": str,
+        "organism": str,
+        "pmid": str,
+        "product": str,
+        "property": str,
+        "source": str,
+        "source_id": str,
+        "species": str
+    },
+    "spike_lineage": {
+        "_version_": str,
+        "country": str,
+        "date_inserted": str,
+        "date_modified": str,
+        "growth_rate": float,
+        "id": str,
+        "lineage": str,
+        "lineage_count": float,
+        "lineage_of_concern": str,
+        "month": str,
+        "prevalence": float,
+        "region": str,
+        "sequence_features": str,
+        "total_isolates": float
+    },
+    "spike_variant": {
+        "_version_": str,
+        "aa_variant": str,
+        "country": str,
+        "date_inserted": str,
+        "date_modified": str,
+        "growth_rate": float,
+        "id": str,
+        "lineage_count": float,
+        "month": str,
+        "prevalence": float,
+        "region": str,
+        "sequence_features": str,
+        "total_isolates": float
+    },
+    "structured_assertion": {
+        "_version_": str,
+        "comment": str,
+        "date_inserted": str,
+        "date_modified": str,
+        "evidence_code": str,
+        "feature_id": str,
+        "id": str,
+        "owner": str,
+        "patric_id": str,
+        "pmid": str,
+        "property": str,
+        "public": str,
+        "refseq_locus_tag": str,
+        "score": str,
+        "source": str,
+        "user_read": str,
+        "user_write": str,
+        "value": str
+    },
+    "subsystem": {
+        "_version_": str,
+        "active": str,
+        "class": str,
+        "date_inserted": str,
+        "date_modified": str,
+        "feature_id": str,
+        "gene": str,
+        "genome_id": str,
+        "genome_name": str,
+        "id": str,
+        "owner": str,
+        "patric_id": str,
+        "product": str,
+        "public": str,
+        "refseq_locus_tag": str,
+        "role_id": str,
+        "role_name": str,
+        "subclass": str,
+        "subsystem_id": str,
+        "subsystem_name": str,
+        "superclass": str,
+        "taxon_id": str,
+        "user_read": str,
+        "user_write": str
+    },
+    "subsystem_ref": {
+        "_version_": str,
+        "class": str,
+        "date_inserted": str,
+        "date_modified": str,
+        "description": str,
+        "id": str,
+        "notes": str,
+        "pmid": str,
+        "role_id": str,
+        "role_name": str,
+        "subclass": str,
+        "subsystem_id": str,
+        "subsystem_name": str,
+        "superclass": str
+    },
+    "taxonomy": {
+        "_version_": str,
+        "cds_mean": float,
+        "cds_sd": float,
+        "core_families": float,
+        "core_family_ids": str,
+        "description": str,
+        "division": str,
+        "genetic_code": str,
+        "genome_count": float,
+        "genome_length_mean": float,
+        "genome_length_sd": float,
+        "genomes": float,
+        "genomes_f": str,
+        "hypothetical_cds_ratio_mean": float,
+        "hypothetical_cds_ratio_sd": float,
+        "lineage": str,
+        "lineage_ids": str,
+        "lineage_names": str,
+        "lineage_ranks": str,
+        "other_names": str,
+        "parent_id": str,
+        "plfam_cds_ratio_mean": float,
+        "plfam_cds_ratio_sd": float,
+        "taxon_id": str,
+        "taxon_id_i": str,
+        "taxon_name": str,
+        "taxon_rank": str
+    },
+    "protein_structure": {
+        "alignments": str,
+        "authors": str,
+        "date_inserted": str,
+        "date_modified": str,
+        "feature_id": str,
+        "file_path": str,
+        "gene": str,
+        "genome_id": str,
+        "institution": str,
+        "method": str,
+        "organism_name": str,
+        "patric_id": str,
+        "pdb_id": str,
+        "pmid": str,
+        "product": str,
+        "release_date": str,
+        "resolution": str,
+        "sequence": str,
+        "sequence_md5": str,
+        "taxon_id": str,
+        "taxon_lineage_ids": str,
+        "taxon_lineage_names": str,
+        "title": str,
+        "uniprotkb_accession": str
+    },
+    "protein_feature": {
+        "aa_sequence_md5": str,
+        "classification": str,
+        "comments": str,
+        "date_inserted": str,
+        "date_modified": str,
+        "description": str,
+        "e_value": str,
+        "end": float,
+        "evidence": str,
+        "feature_id": str,
+        "feature_type": str,
+        "gene": str,
+        "genome_id": str,
+        "genome_name": str,
+        "id": str,
+        "interpro_description": str,
+        "interpro_id": str,
+        "length": float,
+        "patric_id": str,
+        "product": str,
+        "publication": str,
+        "refseq_locus_tag": str,
+        "score": float,
+        "segments": str,
+        "sequence": str,
+        "source": str,
+        "source_id": str,
+        "start": float,
+        "taxon_id": str
+    },
+    "surveillance": {
+        "additional_metadata": str,
+        "alcohol_or_other_drug_dependence": str,
+        "breastfeeding": str,
+        "chest_imaging_interpretation": str,
+        "chronic_conditions": str,
+        "collection_city": str,
+        "collection_country": str,
+        "collection_date": str,
+        "collection_latitude": float,
+        "collection_longitude": float,
+        "collection_poi": str,
+        "collection_season": str,
+        "collection_state_province": str,
+        "collection_year": str,
+        "collector_institution": str,
+        "collector_name": str,
+        "comments": str,
+        "contact_email_address": str,
+        "contributing_institution": str,
+        "date_inserted": str,
+        "date_modified": str,
+        "daycare_attendance": str,
+        "days_elapsed_to_disease_status": str,
+        "days_elapsed_to_sample_collection": str,
+        "days_elapsed_to_vaccination": str,
+        "diagnosis": str,
+        "dialysis": str,
+        "disease_severity": str,
+        "disease_status": str,
+        "duration_of_exposure": str,
+        "duration_of_treatment": str,
+        "ecmo": str,
+        "education": str,
+        "embargo_end_date": str,
+        "exposure": str,
+        "exposure_type": str,
+        "genome_id": str,
+        "geographic_group": str,
+        "hospitalization_duration": str,
+        "hospitalized": str,
+        "host_age": str,
+        "host_capture_status": str,
+        "host_common_name": str,
+        "host_ethnicity": str,
+        "host_group": str,
+        "host_habitat": str,
+        "host_health": str,
+        "host_height": str,
+        "host_id_type": str,
+        "host_identifier": str,
+        "host_natural_state": str,
+        "host_race": str,
+        "host_sex": str,
+        "host_species": str,
+        "host_weight": str,
+        "human_leukocyte_antigens": str,
+        "id": str,
+        "infections_within_five_years": str,
+        "influenza_like_illness_over_the_past_year": str,
+        "initiation_of_treatment": str,
+        "intensive_care_unit": str,
+        "last_update_date": str,
+        "longitudinal_study": str,
+        "maintenance_medication": str,
+        "nursing_home_residence": str,
+        "onset_hours": str,
+        "other_vaccinations": str,
+        "oxygen_saturation": str,
+        "packs_per_day_for_how_many_years": str,
+        "pathogen_test_interpretation": str,
+        "pathogen_test_result": str,
+        "pathogen_test_type": str,
+        "pathogen_type": str,
+        "post_visit_medications": str,
+        "pre_visit_medications": str,
+        "pregnancy": str,
+        "primary_living_situation": str,
+        "profession": str,
+        "project_identifier": str,
+        "sample_accession": str,
+        "sample_identifier": str,
+        "sample_material": str,
+        "sample_receipt_date": str,
+        "sample_transport_medium": str,
+        "sequence_accession": str,
+        "source_of_vaccine_information": str,
+        "species": str,
+        "strain": str,
+        "submission_date": str,
+        "subtype": str,
+        "sudden_onset": str,
+        "symptoms": str,
+        "taxon_lineage_ids": str,
+        "tobacco_use": str,
+        "travel_history": str,
+        "treatment": str,
+        "treatment_dosage": str,
+        "treatment_type": str,
+        "trimester_of_pregnancy": str,
+        "types_of_allergies": str,
+        "use_of_personal_protective_equipment": str,
+        "vaccination_type": str,
+        "vaccine_dosage": str,
+        "vaccine_lot_number": str,
+        "vaccine_manufacturer": str,
+        "ventilation": str
+    },
+    "serology": {
+        "additional_metadata": str,
+        "collection_city": str,
+        "collection_country": str,
+        "collection_date": str,
+        "collection_state": str,
+        "collection_year": str,
+        "comments": str,
+        "contributing_institution": str,
+        "date_inserted": str,
+        "date_modified": str,
+        "genbank_accession": str,
+        "geographic_group": str,
+        "host_age": str,
+        "host_age_group": str,
+        "host_common_name": str,
+        "host_health": str,
+        "host_identifier": str,
+        "host_sex": str,
+        "host_species": str,
+        "host_type": str,
+        "id": str,
+        "positive_definition": str,
+        "project_identifier": str,
+        "sample_accession": str,
+        "sample_identifier": str,
+        "serotype": str,
+        "strain": str,
+        "taxon_lineage_ids": str,
+        "test_antigen": str,
+        "test_interpretation": str,
+        "test_pathogen": str,
+        "test_result": str,
+        "test_type": str,
+        "virus_identifier": str
+    }
 }
diff --git a/rescript/plugin_setup.py b/rescript/plugin_setup.py
index 45b8c8e..5961cc0 100644
--- a/rescript/plugin_setup.py
+++ b/rescript/plugin_setup.py
@@ -13,10 +13,10 @@
 from qiime2.core.type import TypeMatch
 from qiime2.plugin import (Str, Plugin, Choices, List, Citations, Range, Int,
                            Float, Visualization, Bool, TypeMap, Metadata,
-                           MetadataColumn, Categorical)
+                           MetadataColumn, Categorical, Numeric)
 
-from .bv_brc import fetch_genomes_bv_brc, fetch_metadata_bv_brc, \
-    fetch_genome_features_bv_brc
+from .bv_brc import get_bv_brc_genomes, get_bv_brc_metadata, \
+    get_bv_brc_genome_features, data_fields_bvbrc
 from .subsample import subsample_fasta
 from .trim_alignment import trim_alignment
 from .merge import merge_taxa
@@ -1239,129 +1239,182 @@
 )
 
 
-datatypes_metadata = [
-    "antibiotics",
-    "enzyme_class_ref",
-    "epitope",
-    "epitope_assay",
-    "experiment",
-    "bioset",
-    "bioset_result",
-    "gene_ontology_ref",
-    "genome",
-    "strain",
-    "genome_amr",
-    "feature_sequence",
-    "genome_feature",
-    "genome_sequence",
-    "id_ref",
-    "misc_niaid_sgc",
-    "pathway",
-    "pathway_ref",
-    "ppi",
-    "protein_family_ref",
-    "sequence_feature",
-    "sequence_feature_vt",
-    "sp_gene",
-    "sp_gene_ref",
-    "spike_lineage",
-    "spike_variant",
-    "structured_assertion",
-    "subsystem",
-    "subsystem_ref",
-    "taxonomy",
-    "protein_structure",
-    "protein_feature",
-    "surveillance",
-    "serology"
-]
+bv_brc_rql_query = ('Query in RQL format. To download all data '
+                    'for genome_ids "224308.43" and "2030927.4755", the RQL '
+                    'query looks like this: "in(genome_id,(224308.43,'
+                    '2030927.4755))". While "in" is an RQL operator, '
+                    '"genome_id" is a data field and "224308.43,'
+                    '2030927.4755" are the values. It is important to percent '
+                    'encode values if they contain illegal characters like '
+                    'spaces. The values "Bacillus subtilis" and '
+                    '"Bacteroidales bacterium" have to be provided with '
+                    'percent encoded quotes (%22) and spaces (%20) like '
+                    'this: "in(species,(%22Bacillus%20subtilis%22,'
+                    '%22Bacteroidales%20bacterium%22))". Check '
+                    'https://www.bv-brc.org/api/doc/ for documentation on '
+                    'data types and corresponding data fields.')
+
+bv_brc_ids_metadata = ('A metadata column obtained with the action '
+                       'get-bv-brc-metadata that can be used as a query.')
+bv_brc_ids = ('IDs/values of the corresponding data field. This parameter can '
+              'only be used in conjunction with the "data-field" parameter. '
+              'Retrieves all data associated with these IDs/values in the '
+              'specified data field.')
+bv_brc_data_field = ('Data field of the corrsponding data type. This '
+                     'parameter can only be used in conjunction with the '
+                     '"ids" parameter. Retrieves all data associated with '
+                     'the IDs/values specified in parameter "ids" in this '
+                     'data field.')
+
 
 plugin.methods.register_function(
-    function=fetch_genomes_bv_brc,
+    function=get_bv_brc_genomes,
     inputs={},
     parameters={
+        'ids_metadata': MetadataColumn[Numeric | Categorical],
         'rql_query': Str,
-        'genome_ids': List[Str],
+        'data_field': Str,
+        'ids': Str,
         'ranks': List[Str % Choices(_allowed_ranks)],
+        'rank_propagation': Bool,
     },
     outputs=[('genomes', GenomeData[DNASequence]),
              ('taxonomy', FeatureData[Taxonomy])],
     input_descriptions={},
     parameter_descriptions={
-        'rql_query': 'Query in RQL format. Check '
-                     'https://www.bv-brc.org/api/doc/genome_sequence '
-                     'for documentation.',
-        'genome_ids': 'List of genome IDs from BV-BRC.',
-        'ranks': 'List of taxonomic ranks for building a taxonomy from the '
-                 "NCBI Taxonomy database. [default: '" +
+        'ids_metadata': bv_brc_ids_metadata,
+        'rql_query': bv_brc_rql_query,
+        'data_field': 'Data field of the data type "genome_sequence". This '
+                      'parameter can only be used in conjunction with the '
+                      '"ids" parameter. Retrieves all genomes associated '
+                      'with the IDs/values specified in parameter "ids" in '
+                      'this data field. Check '
+                      'https://www.bv-brc.org/api/doc/genome_sequence for '
+                      'allowed data fields.',
+        'ids': bv_brc_ids,
+        'ranks': 'List of taxonomic ranks for building a taxonomy. '
+                 "[default: '" +
                  "', '".join(_default_ranks) + "']",
+        'rank_propagation': RANK_PROPAGATE_DESCRIPTION,
     },
     output_descriptions={
-        'genomes': 'genomes',
-        'taxonomy': 'Taxonomy data.'
+        'genomes': 'Genome sequences for specified query.',
+        'taxonomy': 'Taxonomy data for all sequences.'
     },
-    name='fetch genomes',
-    description="fetch genomes",
+    name='Get genome sequences from the BV-BRC database.',
+    description="Fetch genome sequences from BV-BRC. BV-BRC (Bacterial and "
+                "Viral Bioinformatics Resource Center) is a database for "
+                "bacterial and viral genomes, annotations, and metadata. "
+                "There are three ways to query data: You can use an RQL "
+                "query to refine your search and get targeted genomes. By "
+                "providing IDs/values and a corresponding data field, "
+                "you can retrieve all genomes associated with those specific "
+                "values in that data field. And as a third option a metadata "
+                "column can be provided, to use metadata obtained with the "
+                "action get-bv-brc-metadata as a new query. Check "
+                "https://www.bv-brc.org/api/doc/ for documentation.",
     citations=[citations['olson2023introducing']]
 )
 
 
 plugin.methods.register_function(
-    function=fetch_metadata_bv_brc,
+    function=get_bv_brc_metadata,
     inputs={},
     parameters={
-        'data_type': Str % Choices(datatypes_metadata),
-        'rql_query': Str
+        'ids_metadata': MetadataColumn[Numeric | Categorical],
+        'data_type': Str % Choices(list(data_fields_bvbrc.keys())),
+        'rql_query': Str,
+        'data_field': Str,
+        'ids': Str,
     },
     outputs=[('metadata', ImmutableMetadata)],
     input_descriptions={},
     parameter_descriptions={
-        'data_type': 'BV-BCR data type. Check https://www.bv-brc.org/api/doc/ '
+        'ids_metadata': bv_brc_ids_metadata,
+        'data_type': 'BV-BCR data type for which metadata should be '
+                     'downloaded. Check https://www.bv-brc.org/api/doc/ '
                      'for documentation.',
-        'rql_query': 'Query in RQL format. Check '
-                     'https://www.bv-brc.org/api/doc/ for documentation.'
+        'rql_query': bv_brc_rql_query,
+        'data_field': 'Data field of the specified "data-type". This '
+                      'parameter can only be used in conjunction with the '
+                      '"ids" parameter. Retrieves metadata associated '
+                      'with the IDs/values specified in parameter "ids" in '
+                      'this data field. Check '
+                      'https://www.bv-brc.org/api/doc/ for allowed data '
+                      'fields in the specified "data-type".',
+        'ids': bv_brc_ids,
     },
     output_descriptions={
-        'metadata': 'metadata'},
+        'metadata': 'BV-BCR metadata of specified data type.'
+    },
     name='Fetch BV-BCR metadata.',
-    description="Fetch BV-BCR metadata for a specific data type with an RQL "
-                "query.",
+    description="Fetch BV-BCR metadata for a specific data type. BV-BRC ("
+                "Bacterial and Viral Bioinformatics Resource Center) is a "
+                "database for bacterial and viral genomes, annotations, "
+                "and metadata. There are three ways to query data: You can "
+                "use an RQL query to refine your search and get targeted "
+                "results. By providing IDs/values and a corresponding data "
+                "field, you can retrieve all metadata associated with those "
+                "specific values in that data field. And as a third option a "
+                "metadata column can be provided, to use the results from "
+                "other data types as a new query. Check "
+                "https://www.bv-brc.org/api/doc/ for documentation.",
     citations=[citations['olson2023introducing']]
 )
 
 
 plugin.methods.register_function(
-    function=fetch_genome_features_bv_brc,
+    function=get_bv_brc_genome_features,
     inputs={},
     parameters={
+        'ids_metadata': MetadataColumn[Numeric | Categorical],
         'rql_query': Str,
+        'data_field': Str,
+        'ids': Str,
         'ranks': List[Str % Choices(_allowed_ranks)],
-        'taxon_ids': List[Str],
-
+        'rank_propagation': Bool,
     },
     outputs=[
         ('genes', GenomeData[Genes]),
         ('proteins', GenomeData[Proteins]),
-        ('taxonomy', FeatureData[Taxonomy])
+        ('taxonomy', FeatureData[Taxonomy]),
+        ('loci', GenomeData[Loci])
     ],
     input_descriptions={},
     parameter_descriptions={
-        'rql_query': 'Query in RQL format. Check '
-                     'https://www.bv-brc.org/api/doc/genome_feature '
-                     'for documentation.',
-        'taxon_ids': 'List of taxon IDs from BV-BRC.',
-        'ranks': 'List of taxonomic ranks for building a taxonomy from the '
-                 "NCBI Taxonomy database. [default: '" +
-                 "', '".join(_default_ranks) + "']",
+        'ids_metadata': bv_brc_ids_metadata,
+        'rql_query': bv_brc_rql_query,
+        'data_field': 'Data field of the data type "genome_feature". This '
+                      'parameter can only be used in conjunction with the '
+                      '"ids" parameter. Retrieves all data associated with '
+                      'the IDs/values specified in parameter "ids" in this '
+                      'data field. Check '
+                      'https://www.bv-brc.org/api/doc/genome_feature for '
+                      'allowed data fields.',
+        'ids': bv_brc_ids,
+        'ranks': 'List of taxonomic ranks for building a taxonomy '
+                 "[default: '" + ', '.join(_default_ranks) + "']",
+        'rank_propagation': RANK_PROPAGATE_DESCRIPTION,
     },
     output_descriptions={
-        'genes': 'genes',
+        'genes': 'Gene',
         'proteins': 'proteins',
-        'taxonomy': 'taxonomy',
+        'taxonomy': 'Taxonomy data for all sequences.',
+        'loci': 'loci',
     },
     name='Fetch genome features from BV-BRC.',
     description='Fetch DNA and protein sequences of genome features from '
-                'BV-BRC.',
+                'BV-BRC. BV-BRC (Bacterial and Viral Bioinformatics Resource '
+                'Center) is a database for bacterial and viral genomes, '
+                'annotations, and metadata. There are three ways to query '
+                'data: You can use an RQL query to refine your search and '
+                'get targeted features. By providing IDs/values and a '
+                'corresponding data field, you can retrieve all features '
+                'associated with those specific values in that data field. '
+                'And as a third option a metadata column can be provided, '
+                'to use metadata obtained with the action '
+                'get-bv-brc-metadata as a new query. Check '
+                'https://www.bv-brc.org/api/doc/ for documentation.',
     citations=[citations['olson2023introducing']]
 )
 
diff --git a/rescript/tests/test_bv_brc.py b/rescript/tests/test_bv_brc.py
index 75939e4..3a82e58 100644
--- a/rescript/tests/test_bv_brc.py
+++ b/rescript/tests/test_bv_brc.py
@@ -5,51 +5,91 @@
 #
 # The full license is in the file LICENSE, distributed with this software.
 # ----------------------------------------------------------------------------
-import os
 import unittest
 from unittest.mock import Mock, patch, mock_open, MagicMock
 
 import pandas as pd
-from q2_types.feature_data import TSVTaxonomyDirectoryFormat
-from q2_types.genome_data import GenomeSequencesDirectoryFormat, \
-    GenesDirectoryFormat, ProteinsDirectoryFormat
+import qiime2
 from qiime2.plugin.testing import TestPluginBase
 
-from rescript.bv_brc import fetch_genomes_bv_brc, fetch_metadata_bv_brc, \
-    fetch_genome_features_bv_brc, id_list_handling, \
-    error_handling, download_data, json_to_fasta, \
-    parse_lineage_names_with_ranks, parse_fasta_to_dict
+from rescript.bv_brc import get_bv_brc_genomes, get_bv_brc_metadata, \
+    get_bv_brc_genome_features, parameter_validation, \
+    error_handling, download_data, create_genome_fasta, \
+    create_taxonomy_entry, get_loci, read_tsv_data_with_dtypes, process_loci, \
+    get_sequences, get_taxonomy, create_taxonomy
 
 
 class TestIDListHandling(TestPluginBase):
     package = 'rescript.tests'
 
-    def test_error_both_parameters_given(self):
+    def test_missing_data_type(self):
+        # Test when data_type is None
+        with self.assertRaisesRegex(ValueError, "data-type"):
+            parameter_validation()
+
+    def test_rql_query_and_other_params(self):
+        # Test when rql_query is specified with other conflicting parameters
         with self.assertRaisesRegex(ValueError,
-                                    "Parameters rql_query and ids can't be "
-                                    "used simultaneously."):
-            id_list_handling(rql_query="some_query",
-                             ids=[1, 2, 3],
-                             parameter_name="ids",
-                             data_field="id")
-
-    def test_error_neither_parameter_given(self):
+                                    "rql_query.*can't.*simultaneously"):
+            parameter_validation(rql_query="some_query", ids=[1, 2],
+                                 data_field="genome_id", data_type="genome")
+
+    def test_metadata_and_other_params(self):
+        # Test when metadata is specified with other conflicting parameters
         with self.assertRaisesRegex(ValueError,
-                                    "At least one of the parameters rql_query "
-                                    "and ids has to be given."):
-            id_list_handling(rql_query="",
-                             ids=[],
-                             parameter_name="ids",
-                             data_field="id")
-
-    def test_correct_rql_query_generation(self):
-        result = id_list_handling(
-            rql_query="",
-            ids=[1, 2, 3],
-            parameter_name="ids",
-            data_field="id")
-        expected_query = "in(id,(1,2,3))"
-        self.assertEqual(result, expected_query)
+                                    "metadata.*can't.*simultaneously"):
+            parameter_validation(metadata="metadata", ids=[1, 2],
+                                 data_field="genome_id", data_type="genome")
+
+    def test_ids_without_data_field(self):
+        # Test when ids is specified without data_field
+        with self.assertRaisesRegex(ValueError, r"ids.*data-field"):
+            parameter_validation(ids=[1, 2], data_type="genome")
+
+    def test_no_rql_query_ids_metadata(self):
+        # Test when neither rql_query, ids, nor metadata is specified
+        with self.assertRaisesRegex(ValueError, "rql-query.*ids.*metadata"):
+            parameter_validation(data_type="genome")
+
+    def test_invalid_data_field_for_data_type(self):
+        # Test when the data_field is not valid for the given data_type
+        with self.assertRaisesRegex(ValueError, "data-field.*permitted"):
+            parameter_validation(ids=[1, 2], data_field="invalid_field",
+                                 data_type="genome")
+
+    def test_valid_rql_query_generation(self):
+        rql_query = parameter_validation(
+            ids=["Bacillus subtilis", "Bacteroidales bacterium"],
+            data_field="species",
+            data_type="genome"
+        )
+        self.assertEqual(rql_query, "in(species,(%22Bacillus%20subtilis%22,"
+                                    "%22Bacteroidales%20bacterium%22))")
+
+    def test_valid_rql_query_with_metadata(self):
+        # Create mock metadata objects
+        mock_metadata = MagicMock()
+        mock_series = MagicMock()
+
+        # Mock the .name attribute of the Series to return "species"
+        mock_series.name = "species"
+
+        # Mock the return value of to_series to be the mock_series
+        mock_metadata.to_series.return_value = mock_series
+
+        # Mock the ids in the series (mimicking a list of ids)
+        mock_series.__iter__.return_value = iter(
+            ["Bacillus subtilis", "Bacteroidales bacterium"])
+
+        # Call the function with the mock metadata
+        rql_query = parameter_validation(
+            metadata=mock_metadata,
+            data_type="genome"
+        )
+
+        # Assert that the rql_query is correctly generated
+        self.assertEqual(rql_query, "in(species,(%22Bacillus%20subtilis%22,"
+                                    "%22Bacteroidales%20bacterium%22))")
 
 
 class TestErrorHandling(TestPluginBase):
@@ -96,58 +136,143 @@ def test_unhandled_response(self):
 class TestDownloadData(TestPluginBase):
     package = 'rescript.tests'
 
-    @patch('rescript.bv_brc.requests.get')
+    @patch('rescript.bv_brc.requests.post')
+    @patch('rescript.bv_brc.read_tsv_data_with_dtypes')
     @patch('rescript.bv_brc.error_handling')
-    def test_download_data_success(self, mock_error_handling,
-                                   mock_requests_get):
-        # Mock the requests.get response for a successful request
-        mock_response = Mock()
-        mock_response.status_code = 200
-        mock_requests_get.return_value = mock_response
+    def test_download_data_json_batch(self, mock_error_handling,
+                                      mock_read_data_with_dtypes, mock_post):
+        # Mock the response for the first batch (25,000 entries)
+        mock_response_1 = MagicMock()
+        mock_response_1.status_code = 200
+        mock_response_1.json.return_value = [{"id": i} for i in range(25000)]
+
+        # Mock the response for the second batch (fewer than 25,000 entries)
+        mock_response_2 = MagicMock()
+        mock_response_2.status_code = 200
+        mock_response_2.json.return_value = [{"id": i} for i in
+                                             range(25000, 25010)]
+
+        # The first call to requests.post returns 25,000 entries,
+        # the second call returns 10 entries.
+        mock_post.side_effect = [mock_response_1, mock_response_2]
+
+        # Call the function for JSON
+        result = download_data(data_type="genome", query="eq(id,1)",
+                               accept="application/json", select=["genome_id"])
+
+        # Check that the result is as expected (25,000 + 10 entries)
+        self.assertEqual(len(result), 25010)
+        self.assertEqual(result[0], {"id": 0})
+        self.assertEqual(result[-1], {"id": 25009})
+
+        # Ensure requests.post is called with the correct parameters
+        mock_post.assert_any_call(
+            url="https://www.bv-brc.org/api/genome/",
+            data="eq(id,1)&limit(25000,0)&select(genome_id)",
+            headers={
+                'Content-Type': 'application/rqlquery+x-www-form-urlencoded',
+                'ACCEPT': 'application/json'}
+        )
+
+        mock_post.assert_any_call(
+            url="https://www.bv-brc.org/api/genome/",
+            data="eq(id,1)&limit(25000,25000)&select(genome_id)",
+            headers={
+                'Content-Type': 'application/rqlquery+x-www-form-urlencoded',
+                'ACCEPT': 'application/json'}
+        )
 
-        url = "http://example.com/data"
-        data_type = "some_type"
+    @patch('rescript.bv_brc.requests.post')
+    @patch('rescript.bv_brc.read_tsv_data_with_dtypes')
+    @patch('rescript.bv_brc.error_handling')
+    def test_download_data_tsv(self, mock_error_handling,
+                               mock_read_data_with_dtypes, mock_post):
+        # Mock the response for TSV data type
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_post.return_value = mock_response
+
+        # Mock reading TSV data
+        mock_df = pd.DataFrame({"id": [1, 2]})
+        mock_read_data_with_dtypes.return_value = mock_df
+
+        # Call the function for TSV
+        result = download_data(data_type="genome", query="eq(id,1)",
+                               accept="text/tsv")
+
+        # Check that the result is a DataFrame and as expected
+        pd.testing.assert_frame_equal(result, pd.DataFrame({"id": [1, 2]}))
+
+        # Ensure requests.post is called with the correct parameters
+        mock_post.assert_called_with(
+            url="https://www.bv-brc.org/api/genome/",
+            data="eq(id,1)&limit(25000,0)",
+            headers={
+                'Content-Type': 'application/rqlquery+x-www-form-urlencoded',
+                'ACCEPT': 'text/tsv'}
+        )
 
-        result = download_data(url, data_type)
+        # Ensure read_tsv_data_with_dtypes was called
+        mock_read_data_with_dtypes.assert_called_once_with(
+            response=mock_response, data_type="genome")
 
-        mock_requests_get.assert_called_once_with(url)
-        self.assertEqual(result, mock_response)
+    @patch('rescript.bv_brc.requests.post')
+    @patch('rescript.bv_brc.error_handling')
+    def test_download_data_gff(self, mock_error_handling, mock_post):
+        # Mock the response for GFF data type
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.text = "mock_gff_data"
+        mock_post.return_value = mock_response
+
+        # Call the function for GFF
+        result = download_data(data_type="genome", query="eq(id,1)",
+                               accept="application/gff")
+
+        # Check that the result is as expected
+        self.assertEqual(result, "mock_gff_data")
+
+        # Ensure requests.post is called with the correct parameters
+        mock_post.assert_called_with(
+            url="https://www.bv-brc.org/api/genome/",
+            data="eq(id,1)&limit(25000,0)",
+            headers={
+                'Content-Type': 'application/rqlquery+x-www-form-urlencoded',
+                'ACCEPT': 'application/gff'}
+        )
 
-    @patch('rescript.bv_brc.requests.get')
+    @patch('rescript.bv_brc.requests.post')
     @patch('rescript.bv_brc.error_handling')
-    def test_download_data_error_400(self, mock_error_handling,
-                                     mock_requests_get):
-        # Mock the requests.get response for a 400 Bad Request
-        mock_response = Mock()
+    def test_download_data_400_error(self, mock_error_handling, mock_post):
+        # Mock the response for 400 error
+        mock_response = MagicMock()
         mock_response.status_code = 400
-        mock_requests_get.return_value = mock_response
-
-        url = "http://example.com/data"
-        data_type = "some_type"
+        mock_post.return_value = mock_response
 
-        download_data(url, data_type)
+        # Call the function and check that error_handling is called
+        download_data(data_type="genome", query="eq(id,1)",
+                      accept="application/json")
 
-        mock_requests_get.assert_called_once_with(url)
-        mock_error_handling.assert_called_once_with(mock_response, data_type)
+        # Ensure error_handling was called for the 400 response
+        mock_error_handling.assert_called_once_with(response=mock_response,
+                                                    data_type="genome")
 
-    @patch('rescript.bv_brc.requests.get')
-    @patch('rescript.bv_brc.error_handling')
-    def test_download_data_other_error(self, mock_error_handling,
-                                       mock_requests_get):
-        # Mock the requests.get response for any other error
-        mock_response = Mock()
+    @patch('rescript.bv_brc.requests.post')
+    def test_download_data_unexpected_error(self, mock_post):
+        # Mock the response for unexpected error
+        mock_response = MagicMock()
         mock_response.status_code = 500
-        mock_response.text = "Server Error"
-        mock_requests_get.return_value = mock_response
+        mock_response.text = "Internal Server Error"
+        mock_post.return_value = mock_response
 
-        url = "http://example.com/data"
-        data_type = "some_type"
+        # Check that the function raises a ValueError for non-200, non-400
+        # responses
+        with self.assertRaises(ValueError) as context:
+            download_data(data_type="genome", query="eq(id,1)",
+                          accept="application/json")
 
-        with self.assertRaisesRegex(ValueError, "Server Error"):
-            download_data(url, data_type)
-
-        mock_requests_get.assert_called_once_with(url)
-        mock_error_handling.assert_not_called()
+        # Ensure the ValueError contains the right message
+        self.assertEqual(str(context.exception), "Internal Server Error")
 
 
 class TestJsonToFasta(TestPluginBase):
@@ -178,23 +303,23 @@ def setUp(self):
 
     @patch('rescript.bv_brc.open', new_callable=mock_open)
     def test_json_to_fasta_single_genome(self, mock_file):
-        json_to_fasta(self.json_input_1, "/fake/dir")
+        result = create_genome_fasta(self.json_input_1)
 
         # Expected FASTA content
-        expected_fasta = ">accn|acc1   desc1   [genome_name1 | genome1]\nATGC"
+        expected_fasta = ">acc1   desc1   [genome_name1 | genome1]\nATGC"
 
         # Check if the file was created with the correct path and content
-        mock_file.assert_called_once_with("/fake/dir/genome1.fasta", 'w')
+        mock_file.assert_called_once_with(f"{str(result)}/genome1.fasta", 'w')
         mock_file().write.assert_called_once_with(expected_fasta)
 
     @patch('rescript.bv_brc.open', new_callable=mock_open)
     def test_json_to_fasta_multiple_genomes(self, mock_file):
-        json_to_fasta(self.json_input_1 + self.json_input_2, "/fake/dir")
+        create_genome_fasta(self.json_input_1 + self.json_input_2)
 
         # Expected FASTA content
-        expected_fasta_genome1 = (">accn|acc1   desc1   [genome_name1 | "
+        expected_fasta_genome1 = (">acc1   desc1   [genome_name1 | "
                                   "genome1]\nATGC")
-        expected_fasta_genome2 = (">accn|acc2   desc2   [genome_name2 | "
+        expected_fasta_genome2 = (">acc2   desc2   [genome_name2 | "
                                   "genome2]\nCGTA")
 
         # Check if the files were created with the correct path and content
@@ -203,277 +328,119 @@ def test_json_to_fasta_multiple_genomes(self, mock_file):
 
     @patch('rescript.bv_brc.open', new_callable=mock_open)
     def test_json_to_fasta_multiple_sequences_same_genome(self, mock_file):
-        json_to_fasta(self.json_input_1 + self.json_input_1, "/fake/dir")
+        result = create_genome_fasta(self.json_input_1 + self.json_input_1)
 
         # Expected FASTA content
-        expected_fasta = (">accn|acc1   desc1   [genome_name1 | "
-                          "genome1]\nATGC\n"
-                          ">accn|acc1   desc1   [genome_name1 | "
-                          "genome1]\nATGC")
+        expected_fasta = (">acc1   desc1   [genome_name1 | genome1]\nATGC\n"
+                          ">acc1   desc1   [genome_name1 | genome1]\nATGC")
 
         # Check if the file was created with the correct path and content
-        mock_file.assert_called_once_with("/fake/dir/genome1.fasta", 'w')
+        mock_file.assert_called_once_with(f"{str(result)}/genome1.fasta", 'w')
         mock_file().write.assert_called_once_with(expected_fasta)
 
 
-class TestFetchGenomeFeaturesBVBR(TestPluginBase):
+class TestGetBvBrcGenomes(TestPluginBase):
     package = 'rescript.tests'
 
-    @patch('rescript.bv_brc.parse_fasta_to_dict')
+    @patch('rescript.bv_brc.get_taxonomy')
+    @patch('rescript.bv_brc.create_genome_fasta')
     @patch('rescript.bv_brc.download_data')
-    @patch('rescript.bv_brc.id_list_handling')
-    @patch('builtins.open', new_callable=mock_open)
-    def test_fetch_genome_features_bv_brc(self, mock_open,
-                                          mock_id_list_handling,
-                                          mock_download_data,
-                                          mock_parse_fasta_to_dict):
-        # Mock the id_list_handling function
-        mock_id_list_handling.return_value = ("in(feature_id,"
-                                              "(feature1,feature2))")
-
-        # Mock the download_data function responses
-        mock_response_genes = MagicMock()
-        mock_response_genes.text = "mocked_genes_fasta_data"
-        mock_response_proteins = MagicMock()
-        mock_response_proteins.text = "mocked_proteins_fasta_data"
-        mock_download_data.side_effect = [mock_response_genes,
-                                          mock_response_proteins]
-
-        # Mock the parse_fasta_to_dict function
-        mock_parse_fasta_to_dict.side_effect = [
-            {'2030927.4755': '>fig|2030927| GTPase [ABC | '
-                             '2030927.4755]\nATGA\n'},
-            {'1234567.89': '>fig|1234567| protein [XYZ | 1234567.89]\nGCGT\n'}
-        ]
-
-        # Call the function with the test RQL query
-        genes, proteins = fetch_genome_features_bv_brc(
-            rql_query="in(feature_id,(feature1,feature2))"
-        )
-
-        # Assertions to ensure the correct calls were made
-        mock_id_list_handling.assert_called_once_with(
-            rql_query="in(feature_id,(feature1,feature2))",
-            ids=None,
-            parameter_name="feature_ids",
-            data_field="feature_id"
-        )
-
-        mock_download_data.assert_any_call(
-            url="https://www.bv-brc.org/api/genome_feature/?in(feature_id,"
-                "(feature1,feature2))&http_accept=application/dna+fasta",
-            data_type="genome_feature"
-        )
-
-        mock_download_data.assert_any_call(
-            url="https://www.bv-brc.org/api/genome_feature/?in(feature_id,"
-                "(feature1,feature2))&http_accept=application/protein+fasta",
-            data_type="genome_feature"
-        )
-
-        mock_parse_fasta_to_dict.assert_any_call("mocked_genes_fasta_data")
-        mock_parse_fasta_to_dict.assert_any_call("mocked_proteins_fasta_data")
-
-        # Check that the files were written correctly for genes
-        mock_open.assert_any_call(
-            os.path.join(str(genes), "2030927.4755.fasta"), 'w')
-        mock_open().write.assert_any_call(
-            '>fig|2030927| GTPase [ABC | 2030927.4755]\nATGA\n')
-
-        # Check that the files were written correctly for proteins
-        mock_open.assert_any_call(
-            os.path.join(str(proteins), "1234567.89.fasta"), 'w')
-        mock_open().write.assert_any_call(
-            '>fig|1234567| protein [XYZ | 1234567.89]\nGCGT\n')
-
-        # Check that the return types are correct
-        self.assertIsInstance(genes, GenesDirectoryFormat)
-        self.assertIsInstance(proteins, ProteinsDirectoryFormat)
-
-    def test_parse_fasta_to_dict(self):
-        fasta_string = (
-            ">fig|2030927| GTPase [ABC | 2030927.4755]\natga\n"
-            ">fig|1234567| protein [XYZ | 1234567.89]\ngcgt\n"
-        )
-        expected_output = {
-            '2030927.4755': (
-                ">fig|2030927| GTPase [ABC | 2030927.4755]\nATGA\n"
-            ),
-            '1234567.89': (
-                ">fig|1234567| protein [XYZ | 1234567.89]\nGCGT\n"
-            )
-        }
-        result = parse_fasta_to_dict(fasta_string)
-        self.assertEqual(result, expected_output)
-
-
-class TestFetchGenomesBVBRC(TestPluginBase):
-    package = 'rescript.tests'
-
-    @patch('rescript.bv_brc.json_to_fasta')
-    @patch('rescript.bv_brc.download_data')
-    @patch('rescript.bv_brc.id_list_handling')
-    def test_fetch_genomes_bv_brc(
-            self, mock_id_list_handling, mock_download_data, mock_json_to_fasta
-    ):
-        # Mock the id_list_handling function
-        mock_id_list_handling.return_value = "genome_id=in(genome1,genome2)"
-
-        # Mock the download_data response
-        mock_response = MagicMock()
-        mock_response.json.return_value = {'genomes': ['genome_data']}
-        mock_download_data.return_value = mock_response
+    @patch('rescript.bv_brc.parameter_validation')
+    def test_get_bv_brc_genomes(self, mock_parameter_validation,
+                                mock_download_data, mock_create_genome_fasta,
+                                mock_get_taxonomy):
+        # Mocked return values for the external functions
+        mock_parameter_validation.return_value = "mocked_rql_query"
+        mock_download_data.return_value = [
+            {'id': 'genome1', 'sequence': 'ATGC'},
+            {'id': 'genome2', 'sequence': 'GCTA'}]
+        mock_create_genome_fasta.return_value = MagicMock(
+            name='GenomeSequencesDirectoryFormat')
+        mock_get_taxonomy.return_value = MagicMock(name='TSVTaxonomyFormat')
 
         # Call the function
-        genomes = fetch_genomes_bv_brc(
-            rql_query="genome_id=in(genome1,genome2)",
-            genome_ids=["genome1", "genome2"]
+        get_bv_brc_genomes(
+            ids_metadata=MagicMock(name='NumericMetadataColumn'),
+            rql_query=None,
+            data_field="mock_field",
+            ids=["id1", "id2"],
+            ranks=["rank1", "rank2"],
+            rank_propagation=True
         )
 
-        # Assertions
-        mock_id_list_handling.assert_called_once_with(
-            rql_query="genome_id=in(genome1,genome2)",
-            ids=["genome1", "genome2"],
-            parameter_name="genome_ids",
-            data_field="genome_id"
-        )
 
-        mock_download_data.assert_called_once_with(
-            url="https://www.bv-brc.org/api/genome_sequence/"
-                "?genome_id=in(genome1,genome2)",
-            data_type="genome_sequence"
-        )
-
-        mock_json_to_fasta.assert_called_once_with(
-            {'genomes': ['genome_data']},
-            str(genomes)
-        )
-
-        self.assertIsInstance(genomes, GenomeSequencesDirectoryFormat)
-
-
-class TestFetchMetadataBVBR(TestPluginBase):
+class TestGetBvBrcMetadata(TestPluginBase):
     package = 'rescript.tests'
 
-    @patch('rescript.bv_brc.qiime2.Metadata')
-    @patch('rescript.bv_brc.pd.read_csv')
     @patch('rescript.bv_brc.download_data')
-    def test_fetch_metadata_bv_brc(self, mock_download_data,
-                                   mock_read_csv, mock_metadata):
-        # Mock the download_data response
-        mock_response = MagicMock()
-        mock_response.text = (
-            "id\tcolumn1\tcolumn2\n1\tdata1\tdata2\n2\tdata3\tdata4")
-        mock_download_data.return_value = mock_response
+    @patch('rescript.bv_brc.parameter_validation')
+    def test_get_bv_brc_metadata(self, mock_parameter_validation,
+                                 mock_download_data):
+        # Mock the return value of parameter_validation
+        mock_parameter_validation.return_value = 'rql(query)'
 
-        # Mock the pandas read_csv return value
+        # Mock the return value of download_data
         mock_df = pd.DataFrame({
-            'column1': ['data1', 'data3'],
-            'column2': ['data2', 'data4']
-        }, index=pd.Index(['1', '2'], name='id'))
-        mock_read_csv.return_value = mock_df
-
-        # Mock qiime2.Metadata creation
-        mock_metadata_instance = MagicMock()
-        mock_metadata.return_value = mock_metadata_instance
+            'id': ['id1', 'id2'],
+            'feature': ['value1', 'value2'],
+            'empty_field': [' ', None]
+        }).set_index('id')
+        mock_download_data.return_value = mock_df
 
         # Call the function
-        fetch_metadata_bv_brc(
-            data_type="genome",
-            rql_query="genome_id=in(1,2)"
+        result_metadata = get_bv_brc_metadata(
+            ids_metadata=None,
+            data_type='genome',
+            data_field='genome_id',
+            ids=['id1', 'id2']
         )
 
-        # Assertions
-        mock_download_data.assert_called_once_with(
-            url="https://www.bv-brc.org/api/genome/"
-                "?genome_id=in(1,2)&http_accept=text/tsv",
-            data_type="genome"
-        )
+        # Assertions on the returned qiime2.Metadata
+        self.assertIsInstance(result_metadata, qiime2.Metadata)
 
-        mock_read_csv.assert_called_once()
-        args, kwargs = mock_read_csv.call_args
-        self.assertEqual(kwargs['sep'], '\t')
+        # Extract the DataFrame from the result and check its values
+        result_df = result_metadata.to_dataframe()
 
-        self.assertEqual(args[0].getvalue(), "id\tcolumn1\tcolumn2\n1\tdata1"
-                                             "\tdata2\n2\tdata3\tdata4")
+        # Check that the DataFrame's index and columns are correct
+        self.assertEqual(result_df.index.tolist(), ['id1', 'id2'])
+        self.assertIn('feature', result_df.columns)
 
-        mock_metadata.assert_called_once_with(mock_df)
+        # Check that empty/space-only fields are replaced with NaN
+        self.assertTrue(pd.isna(result_df.loc['id1', 'empty_field']))
+        self.assertTrue(pd.isna(result_df.loc['id2', 'empty_field']))
 
 
-class TestFetchTaxonomyBVBR(TestPluginBase):
+class TestGetBvBrcGenomeFeatures(TestPluginBase):
     package = 'rescript.tests'
 
-    @patch('pandas.DataFrame.to_csv')
-    @patch('rescript.bv_brc.transform_taxonomy_df')
+    @patch('rescript.bv_brc.get_loci')
+    @patch('rescript.bv_brc.get_taxonomy')
+    @patch('rescript.bv_brc.get_sequences')
     @patch('rescript.bv_brc.download_data')
-    @patch('rescript.bv_brc.pd.read_csv')
-    @patch('rescript.bv_brc.id_list_handling')
-    def test_fetch_taxonomy_bv_brc(
-            self, mock_id_list_handling, mock_read_csv, mock_download_data,
-            mock_transform_taxonomy_df, mock_to_csv
-    ):
-        # Mock the id_list_handling function
-        mock_id_list_handling.return_value = "taxon_id=in(taxon1,taxon2)"
-
-        # Mock the download_data response
-        mock_response = MagicMock()
-        mock_response.text = (
-            "id\trank1\trank2\n1\tdata1\tdata2\n2\tdata3\tdata4")
-        mock_download_data.return_value = mock_response
-
-        # Prepare mocks for file output
-        with patch('builtins.open', unittest.mock.mock_open()):
-            directory = fetch_taxonomy_bv_brc(
-                rql_query="taxon_id=in(taxon1,taxon2)",
-                ranks=['rank1', 'rank2'],
-                taxon_ids=["taxon1", "taxon2"]
-            )
-
-            # Assertions
-            mock_id_list_handling.assert_called_once_with(
-                rql_query="taxon_id=in(taxon1,taxon2)",
-                ids=["taxon1", "taxon2"],
-                parameter_name="taxon_ids",
-                data_field="taxon_id"
-            )
-
-            mock_download_data.assert_called_once_with(
-                url="https://www.bv-brc.org/api/taxonomy/"
-                    "?taxon_id=in(taxon1,taxon2)&http_accept=text/tsv",
-                data_type="taxonomy"
-            )
-
-            self.assertIsInstance(directory, TSVTaxonomyDirectoryFormat)
-
-    @patch('rescript.bv_brc.parse_lineage_names_with_ranks')
-    def test_transform_taxonomy_df(self, mock_parse_lineage_names_with_ranks):
-        # Mock the parse_lineage_names_with_ranks function
-        mock_parse_lineage_names_with_ranks.side_effect = \
-            lambda lineage_names, lineage_ranks, ranks: "Mocked Taxon"
-
-        # Create a sample DataFrame
-        df = pd.DataFrame({
-            'taxon_id': ['taxon1', 'taxon2'],
-            'lineage_names': ['name1;name2', 'name3;name4'],
-            'lineage_ranks': ['rank1;rank2', 'rank3;rank4']
-        })
-
-        ranks = ['rank1', 'rank2', 'rank3']
+    @patch('rescript.bv_brc.parameter_validation')
+    def test_get_bv_brc_genome_features(self, mock_parameter_validation,
+                                        mock_download_data, mock_get_sequences,
+                                        mock_get_taxonomy, mock_get_loci):
+        # Mocked return values for the external functions
+        mock_parameter_validation.return_value = "mocked_rql_query"
+        mock_download_data.return_value = [{"genome_id": "genome1"}]
+        mock_get_sequences.return_value = (MagicMock(), MagicMock())
+        mock_get_taxonomy.return_value = MagicMock()
+        mock_get_loci.return_value = MagicMock()
 
         # Call the function
-        result_df = transform_taxonomy_df(df, ranks)
-
-        # Expected DataFrame after transformation
-        expected_df = pd.DataFrame({
-            'Feature ID': ['taxon1', 'taxon2'],
-            'Taxon': ['Mocked Taxon', 'Mocked Taxon']
-        }).set_index('Feature ID')
+        get_bv_brc_genome_features(
+            ids_metadata=None,
+            rql_query=None,
+            data_field="mock_field",
+            ids=["id1", "id2"],
+            ranks=["rank1", "rank2"],
+            rank_propagation=True
+        )
 
-        # Assert that the result matches the expected DataFrame
-        pd.testing.assert_frame_equal(result_df, expected_df)
 
-    class TestParseLineageNamesWithRanks(TestPluginBase):
-        package = 'rescript.tests'
+class TestCreateTaxonomyEntry(TestPluginBase):
+    package = 'rescript.tests'
 
     def test_basic_functionality(self):
         lineage_names = ("Animalia;Chordata;Mammalia;Primates;Hominidae;Homo;"
@@ -482,7 +449,7 @@ def test_basic_functionality(self):
         expected_result = ("k__Animalia; p__Chordata; c__Mammalia; "
                            "o__Primates; f__Hominidae; g__Homo; s__sapiens")
 
-        result = parse_lineage_names_with_ranks(lineage_names, lineage_ranks)
+        result = create_taxonomy_entry(lineage_names, lineage_ranks)
         self.assertEqual(result, expected_result)
 
     def test_with_missing_ranks(self):
@@ -491,7 +458,8 @@ def test_with_missing_ranks(self):
         expected_result = ("k__Animalia; p__Chordata; c__; o__; f__Hominidae; "
                            "g__; s__sapiens")
 
-        result = parse_lineage_names_with_ranks(lineage_names, lineage_ranks)
+        result = create_taxonomy_entry(lineage_names, lineage_ranks,
+                                       rank_propagation=False)
         self.assertEqual(result, expected_result)
 
     def test_rank_propagation(self):
@@ -500,8 +468,8 @@ def test_rank_propagation(self):
         expected_result = ("k__Animalia; p__Chordata; c__Mammalia; "
                            "o__Mammalia; f__Mammalia; g__Homo; s__Homo")
 
-        result = parse_lineage_names_with_ranks(lineage_names, lineage_ranks,
-                                                rank_propagation=True)
+        result = create_taxonomy_entry(lineage_names, lineage_ranks,
+                                       rank_propagation=True)
         self.assertEqual(result, expected_result)
 
     def test_genus_species_split(self):
@@ -511,7 +479,7 @@ def test_genus_species_split(self):
         expected_result = ("k__Animalia; p__Chordata; c__Mammalia;"
                            " o__Primates; f__Hominidae; g__Homo; s__sapiens")
 
-        result = parse_lineage_names_with_ranks(lineage_names, lineage_ranks)
+        result = create_taxonomy_entry(lineage_names, lineage_ranks)
         self.assertEqual(result, expected_result)
 
     def test_genus_only_split(self):
@@ -521,7 +489,7 @@ def test_genus_only_split(self):
         expected_result = ("k__Animalia; p__Chordata; c__Mammalia; "
                            "o__Primates; f__Hominidae; g__Homo; s__sapiens")
 
-        result = parse_lineage_names_with_ranks(lineage_names, lineage_ranks)
+        result = create_taxonomy_entry(lineage_names, lineage_ranks)
         self.assertEqual(result, expected_result)
 
     def test_no_species_in_ranks(self):
@@ -531,10 +499,10 @@ def test_no_species_in_ranks(self):
         expected_result = ("k__Animalia; p__Chordata; c__Mammalia; "
                            "o__Primates; f__Hominidae; g__Homo sapiens")
 
-        result = parse_lineage_names_with_ranks(lineage_names, lineage_ranks,
-                                                ranks=['kingdom', 'phylum',
-                                                       'class', 'order',
-                                                       'family', 'genus'])
+        result = create_taxonomy_entry(lineage_names, lineage_ranks,
+                                       ranks=['kingdom', 'phylum',
+                                              'class', 'order',
+                                              'family', 'genus'])
         self.assertEqual(result, expected_result)
 
     def test_custom_ranks(self):
@@ -542,15 +510,257 @@ def test_custom_ranks(self):
         lineage_ranks = "superkingdom"
         expected_result = "sk__Metazoa"
 
-        result = parse_lineage_names_with_ranks(lineage_names, lineage_ranks,
-                                                ranks=['superkingdom'])
+        result = create_taxonomy_entry(lineage_names, lineage_ranks,
+                                       ranks=['superkingdom'])
         self.assertEqual(result, expected_result)
 
-    def test_genomes(self):
-        fetch_genomes_bv_brc(rql_query="eq(taxon_id,1313)")
 
-    def test_genome_features(self):
-        fetch_genome_features_bv_brc(rql_query="eq(taxon_id,1313)")
+class TestGetTaxonomy(TestPluginBase):
+    package = 'rescript.tests'
+
+    @patch('rescript.bv_brc.create_taxonomy')
+    @patch('rescript.bv_brc.download_data')
+    def test_get_taxonomy(self, mock_download_data, mock_create_taxonomy):
+        # Mock response_sequences (list of dicts)
+        response_sequences = [
+            {"taxon_id": "taxon1", "feature_id": "feature1"},
+            {"taxon_id": "taxon2", "feature_id": "feature2"},
+            {"taxon_id": "taxon3", "feature_id": "feature3"},
+            {"taxon_id": "taxon1", "feature_id": "feature4"},
+        ]
+
+        # Mock the download_data and create_taxonomy functions
+        mock_download_data.return_value = MagicMock()
+        mock_create_taxonomy.return_value = MagicMock()
+
+        # Define test parameters
+        ranks = ["kingdom", "phylum", "class"]
+        rank_propagation = True
+        accession_name = "feature_id"
+
+        # Call the function
+        get_taxonomy(
+            response_sequences=response_sequences,
+            ranks=ranks,
+            rank_propagation=rank_propagation,
+            accession_name=accession_name
+        )
+
+        # Check that the taxon_ids are extracted correctly and that duplicates
+        # are removed
+        expected_taxon_ids = {"taxon1", "taxon2", "taxon3"}
+        extracted_taxon_ids = {str(entry["taxon_id"]) for entry in
+                               response_sequences}
+        self.assertEqual(extracted_taxon_ids, expected_taxon_ids)
+
+
+class TestCreateTaxonomy(TestPluginBase):
+    package = 'rescript.tests'
+
+    @patch('rescript.bv_brc.TSVTaxonomyFormat')
+    @patch('rescript.bv_brc.create_taxonomy_entry')
+    def test_create_taxonomy(self, mock_create_taxonomy_entry,
+                             mock_TSVTaxonomyFormat):
+        # Mock the input data for taxonomy_bvbrc DataFrame
+        taxonomy_bvbrc = pd.DataFrame({
+            'taxon_id': ['taxon1', 'taxon2'],
+            'lineage_names': [['Bacteria', 'Proteobacteria'],
+                              ['Bacteria', 'Firmicutes']],
+            'lineage_ranks': [['domain', 'phylum'], ['domain', 'phylum']]
+        })
+
+        # Mock the create_taxonomy_entry to return a fake taxonomy string
+        mock_create_taxonomy_entry.side_effect = \
+            lambda lineage_names, lineage_ranks, rank_propagation, ranks: (
+                ";".join(lineage_names))
+
+        # Mock response_sequences (list of dicts)
+        response_sequences = [
+            {"taxon_id": "taxon1", "feature_id": "feature1"},
+            {"taxon_id": "taxon2", "feature_id": "feature2"}
+        ]
+
+        # Mock TSVTaxonomyFormat to return a file-like object
+        mock_taxonomy_file = MagicMock()
+        (mock_TSVTaxonomyFormat.return_value.open.
+         return_value.__enter__).return_value = mock_taxonomy_file
+
+        # Call the create_taxonomy function
+        create_taxonomy(
+            taxonomy_bvbrc=taxonomy_bvbrc,
+            response_sequences=response_sequences,
+            ranks=['domain', 'phylum'],
+            rank_propagation=True,
+            accession_name="feature_id"
+        )
+
+        # Ensure that the correct data was written to the file
+        written_data = "\t".join(["Feature ID", "Taxon"]) + "\n" + \
+                       "\t".join(
+                           ["feature1", "Bacteria;Proteobacteria"]) + "\n" + \
+                       "\t".join(["feature2", "Bacteria;Firmicutes"]) + "\n"
+
+        # Check if the 'write' method was actually called with the right data
+        mock_taxonomy_file.write.assert_called_once_with(written_data)
+
+
+class TestReadDataWithDtypes(TestPluginBase):
+    package = 'rescript.tests'
+
+    def test_read_data_with_dtypes(self):
+        # Mock response with a TSV file containing antibiotics data
+        mock_response = MagicMock()
+        mock_response.text = """_version_\tantibiotic_name\tcas_id\tdescription
+1.0\tPenicillin\tCAS1234\tAntibiotic description
+2.0\tAmoxicillin\tCAS5678\tAntibiotic description 2
+"""
+
+        # Call the function with mock data and data_type "antibiotics"
+        df = read_tsv_data_with_dtypes(mock_response, "antibiotics")
+
+        # Expected DataFrame output
+        expected_data = {
+            "_version_": ["1.0", "2.0"],
+            "antibiotic_name": ["Penicillin", "Amoxicillin"],
+            "cas_id": ["CAS1234", "CAS5678"],
+            "description": ["Antibiotic description",
+                            "Antibiotic description 2"]
+        }
+        expected_df = pd.DataFrame(expected_data)
+
+        # Check if the DataFrame matches the expected output
+        pd.testing.assert_frame_equal(df, expected_df)
+
+    def test_no_data_raises_value_error(self):
+        # Mock response with only the column headers
+        mock_response = MagicMock()
+        mock_response.text = """_version_\tantibiotic_name\tcas_id"""
+
+        # Assert that a ValueError is raised when no data rows are present
+        with self.assertRaises(ValueError):
+            read_tsv_data_with_dtypes(mock_response, "antibiotics")
+
+
+class TestGetLoci(TestPluginBase):
+    package = 'rescript.tests'
+
+    @patch('rescript.bv_brc.download_data')
+    @patch('rescript.bv_brc.process_loci')
+    @patch('builtins.open', new_callable=mock_open)
+    @patch('rescript.bv_brc.LociDirectoryFormat')
+    def test_get_loci(self, mock_loci_dir_format, mock_open_file,
+                      mock_process_loci, mock_download_data):
+        # Set up the mocks
+        mock_download_data.return_value = "mock_gff_data"
+        mock_process_loci.return_value = "processed_gff_data"
+
+        # Create a mock directory format
+        mock_loci_dir = MagicMock()
+        mock_loci_dir_format.return_value = mock_loci_dir
+
+        # Mock response_sequences with genome_id information
+        response_sequences = [
+            {"genome_id": "genome1"},
+            {"genome_id": "genome2"},
+            {"genome_id": "genome1"}
+        ]
+
+        # Call the function with response_sequences
+        get_loci(response_sequences)
+
+        # Check that download_data is called for each unique genome_id
+        mock_download_data.assert_any_call(data_type="genome_feature",
+                                           query="eq(genome_id,genome1)",
+                                           accept="application/gff")
+        mock_download_data.assert_any_call(data_type="genome_feature",
+                                           query="eq(genome_id,genome2)",
+                                           accept="application/gff")
+
+        # Ensure it is only called twice (for unique genome_ids)
+        self.assertEqual(mock_download_data.call_count, 2)
+
+        # Check that process_loci is called with the right data
+        mock_process_loci.assert_any_call(gff_string="mock_gff_data")
+
+        # Check that open was called with the correct paths and the data was
+        # written
+        mock_open_file.assert_any_call(
+            mock_loci_dir.__str__() + '/genome1.gff', 'w')
+        mock_open_file.assert_any_call(
+            mock_loci_dir.__str__() + '/genome2.gff', 'w')
+
+        # Ensure the processed data was written to the file
+        mock_open_file().write.assert_any_call("processed_gff_data")
+
+    def test_process_loci(self):
+        # Input GFF string with both headers and data lines
+        input_data = """##gff-version 3
+##sequence-region accn|NC_000001.11 1 1000
+accn|NC_000001.11\tRefSeq\tregion\t1\t1000\t.\t+\t.\tID=region0;
+accn|NC_000002.11\tRefSeq\tgene\t1\t1000\t.\t+\t.\tID=gene0;"""
+
+        # Expected output after processing (removing "accn|")
+        expected_output = """##gff-version 3
+##sequence-region accn|NC_000001.11 1 1000
+NC_000001.11\tRefSeq\tregion\t1\t1000\t.\t+\t.\tID=region0;
+NC_000002.11\tRefSeq\tgene\t1\t1000\t.\t+\t.\tID=gene0;"""
+
+        # Call the function with the input data
+        result = process_loci(input_data)
+
+        # Check if the result matches the expected output
+        self.assertEqual(result, expected_output)
+
+
+class TestGetSequences(TestPluginBase):
+    package = 'rescript.tests'
+
+    @patch('rescript.bv_brc.download_data')
+    @patch('builtins.open', new_callable=mock_open)
+    def test_get_sequences(self, mock_open, mock_download_data):
+        # Mock data for genome_features
+        genome_features = [
+            {
+                "genome_id": "genome1",
+                "feature_id": "feature1",
+                "na_sequence_md5": "md5na1",
+                "aa_sequence_md5": "md5aa1"
+            },
+            {
+                "genome_id": "genome2",
+                "feature_id": "feature2",
+                "na_sequence_md5": "md5na2"
+            }
+        ]
+
+        # Mock the return value of download_data
+        mock_download_data.return_value = [
+            {"md5": "md5na1", "sequence": "atgc"},
+            {"md5": "md5aa1", "sequence": "MKV"},
+            {"md5": "md5na2", "sequence": "gtca"}
+        ]
+
+        # Call the function
+        get_sequences(genome_features)
+
+        # Assertions to check that download_data was called correctly
+        mock_download_data.assert_called_once_with(
+            data_type="feature_sequence",
+            query=unittest.mock.ANY,
+            accept="application/json",
+            select=["md5", "sequence"]
+        )
+
+        # Check that genome1.fasta and genome2.fasta are in the paths used
+        # for file opening
+        open_calls = [call[0][0] for call in mock_open.call_args_list]
+
+        self.assertTrue(any('genome1.fasta' in call for call in open_calls))
+        self.assertTrue(any('genome2.fasta' in call for call in open_calls))
+
+        # Check if the correct sequences were written to the genes file
+        mock_open().write.assert_any_call('>feature1\nATGC\n')
+        mock_open().write.assert_any_call('>feature2\nGTCA\n')
 
-    def test_genome_features2(self):
-        fetch_genome_features_bv_brc(rql_query="eq(genome_id,1313.5550)")
\ No newline at end of file
+        # Check if the correct sequences were written to the proteins file
+        mock_open().write.assert_any_call('>feature1\nMKV\n')