diff --git a/GenomeFileUtil.spec b/GenomeFileUtil.spec index 05cca1f9..5971b43d 100644 --- a/GenomeFileUtil.spec +++ b/GenomeFileUtil.spec @@ -318,6 +318,7 @@ module GenomeFileUtil { returns (MetagenomeSaveResult returnVal) authentication required; typedef structure { + int workspace_id; string workspace; string name; KBaseGenomes.Genome data; diff --git a/lib/GenomeFileUtil/GenomeFileUtilImpl.py b/lib/GenomeFileUtil/GenomeFileUtilImpl.py index 0d01dd6e..b41f4d82 100644 --- a/lib/GenomeFileUtil/GenomeFileUtilImpl.py +++ b/lib/GenomeFileUtil/GenomeFileUtilImpl.py @@ -69,7 +69,7 @@ class GenomeFileUtil: ######################################### noqa VERSION = "0.11.7" GIT_URL = "git@github.com:kbaseapps/GenomeFileUtil.git" - GIT_COMMIT_HASH = "591a19ccf4d1b42f01cc06486654b6d3a8ea08e4" + GIT_COMMIT_HASH = "861a50b5ce2d0b8b481fe514b41a6d6b89ee0c96" #BEGIN_CLASS_HEADER #END_CLASS_HEADER @@ -831,27 +831,27 @@ def fasta_gff_to_metagenome(self, ctx, params): def save_one_genome(self, ctx, params): """ :param params: instance of type "SaveOneGenomeParams" -> structure: - parameter "workspace" of String, parameter "name" of String, - parameter "data" of type "Genome" (Genome type -- annotated and - assembled genome data. Field descriptions: id - string - KBase - legacy data ID scientific_name - string - human readable species - name domain - string - human readable phylogenetic domain name - (eg. "Bacteria") warnings - list of string - genome-level warnings - generated in the annotation process genome_tiers - list of string - - controlled vocabulary (based on app input and checked by - GenomeFileUtil) A list of labels describing the data source for - this genome. Allowed values - Representative, Reference, - ExternalDB, User Tier assignments based on genome source: * All - phytozome - Representative and ExternalDB * Phytozome flagship - genomes - Reference, Representative and ExternalDB * Ensembl - - Representative and ExternalDB * RefSeq Reference - Reference, - Representative and ExternalDB * RefSeq Representative - - Representative and ExternalDB * RefSeq Latest or All Assemblies - folder - ExternalDB * User Data - User tagged feature_counts - map - of string to integer - total counts of each type of feature keys - are a controlled vocabulary of - "CDS", "gene", "misc_feature", - "misc_recomb", "mobile_element", "ncRNA" - 72, - "non_coding_features", "non_coding_genes", + parameter "workspace_id" of Long, parameter "workspace" of String, + parameter "name" of String, parameter "data" of type "Genome" + (Genome type -- annotated and assembled genome data. Field + descriptions: id - string - KBase legacy data ID scientific_name - + string - human readable species name domain - string - human + readable phylogenetic domain name (eg. "Bacteria") warnings - list + of string - genome-level warnings generated in the annotation + process genome_tiers - list of string - controlled vocabulary + (based on app input and checked by GenomeFileUtil) A list of + labels describing the data source for this genome. Allowed values + - Representative, Reference, ExternalDB, User Tier assignments + based on genome source: * All phytozome - Representative and + ExternalDB * Phytozome flagship genomes - Reference, + Representative and ExternalDB * Ensembl - Representative and + ExternalDB * RefSeq Reference - Reference, Representative and + ExternalDB * RefSeq Representative - Representative and ExternalDB + * RefSeq Latest or All Assemblies folder - ExternalDB * User Data + - User tagged feature_counts - map of string to integer - total + counts of each type of feature keys are a controlled vocabulary of + - "CDS", "gene", "misc_feature", "misc_recomb", "mobile_element", + "ncRNA" - 72, "non_coding_features", "non_coding_genes", "protein_encoding_gene", "rRNA", "rep_origin", "repeat_region", "tRNA" genetic_code - int - An NCBI-assigned taxonomic category for the organism See here - diff --git a/lib/GenomeFileUtil/core/GenbankToGenome.py b/lib/GenomeFileUtil/core/GenbankToGenome.py index d1851a61..d5e42b55 100644 --- a/lib/GenomeFileUtil/core/GenbankToGenome.py +++ b/lib/GenomeFileUtil/core/GenbankToGenome.py @@ -22,7 +22,8 @@ from installed_clients.WorkspaceClient import Workspace from GenomeFileUtil.core.GenomeUtils import ( is_parent, propagate_cds_props_to_gene, warnings, parse_inferences, - load_ontology_mappings, set_taxon_data, set_default_taxon_data + load_ontology_mappings, set_taxon_data, set_default_taxon_data, + set_up_single_params, validate_mass_params ) MAX_MISC_FEATURE_SIZE = 10000 @@ -113,52 +114,16 @@ def __init__(self, config): def import_genbank(self, params): print('validating parameters') - mass_params = self._set_up_single_params(params) + mass_params = set_up_single_params( + params, _WSNAME, self._validate_params, self.dfu.ws_name_to_id + ) return self._import_genbank_mass(mass_params)[0] def import_genbank_mass(self, params): print('validating parameters') - self._validate_mass_params(params) + validate_mass_params(params, self._validate_params) return self._import_genbank_mass(params) - def _set_up_single_params(self, params): - # avoid side effects and keep variables in params unmodfied - inputs = dict(params) - self._validate_params(inputs) - ws_id = self._get_int(inputs.pop(_WSID, None), _WSID) - ws_name = inputs.pop(_WSNAME, None) - if (bool(ws_id) == bool(ws_name)): # xnor - raise ValueError(f"Exactly one of a '{_WSID}' or a '{_WSNAME}' parameter must be provided") - if not ws_id: - print(f"Translating workspace name {ws_name} to a workspace ID. Prefer submitting " - + "a workspace ID over a mutable workspace name that may cause race conditions") - ws_id = self.dfu.ws_name_to_id(ws_name) - mass_params = {_WSID: ws_id, _INPUTS: [inputs]} - return mass_params - - def _validate_mass_params(self, params): - ws_id = self._get_int(params.get(_WSID), _WSID) - if not ws_id: - raise ValueError(f"{_WSID} is required") - inputs = params.get(_INPUTS) - if not inputs or type(inputs) is not list: - raise ValueError(f"{_INPUTS} field is required and must be a non-empty list") - for i, inp in enumerate(inputs, start=1): - if type(inp) is not dict: - raise ValueError(f"Entry #{i} in {_INPUTS} field is not a mapping as required") - try: - self._validate_params(inp) - except Exception as e: - raise ValueError(f"Entry #{i} in {_INPUTS} field has invalid params: {e}") from e - - def _get_int(self, putative_int, name, minimum=1): - if putative_int is not None: - if type(putative_int) is not int: - raise ValueError(f"{name} must be an integer, got: {putative_int}") - if putative_int < minimum: - raise ValueError(f"{name} must be an integer >= {minimum}") - return putative_int - def _import_genbank_mass(self, params): workspace_id = params[_WSID] @@ -197,6 +162,12 @@ def _import_genbank_mass(self, params): # parse genbank file self._parse_genbank(genome_obj) + # check features + self.gi.check_dna_sequence_in_features(genome_obj.genome_data) + + # validate genome + genome_obj.genome_data['warnings'] = self.gi.validate_genome(genome_obj.genome_data) + # gather all objects genome_objs.append(genome_obj) @@ -209,7 +180,6 @@ def _import_genbank_mass(self, params): for genome_obj in genome_objs: shutil.rmtree(genome_obj.input_directory) - # TODO make an internal mass function save_genomes results = self._save_genomes(workspace_id, genome_objs) # return the result @@ -227,17 +197,18 @@ def _import_genbank_mass(self, params): return details def _save_genomes(self, workspace_id, genome_objs): - results = [ - self.gi.save_one_genome( - { - 'workspace': workspace_id, - 'name': genome_obj.genome_name, - 'data': genome_obj.genome_data, - "meta": genome_obj.genome_meta, - } - ) for genome_obj in genome_objs - ] - + results = self.gi.save_genome_mass( + { + "workspace_id": workspace_id, + "inputs": [ + { + "name": genome_obj.genome_name, + "data": genome_obj.genome_data, + "meta": genome_obj.genome_meta, + } for genome_obj in genome_objs + ], + } + ) return results def _validate_params(self, params): diff --git a/lib/GenomeFileUtil/core/GenomeInterface.py b/lib/GenomeFileUtil/core/GenomeInterface.py index def9a499..e16b12e1 100644 --- a/lib/GenomeFileUtil/core/GenomeInterface.py +++ b/lib/GenomeFileUtil/core/GenomeInterface.py @@ -16,6 +16,10 @@ MAX_GENOME_SIZE = 2**30 +_WS = "workspace" +_WSID = "workspace_id" +_INPUTS = "inputs" + class GenomeInterface: def __init__(self, config): @@ -32,18 +36,58 @@ def __init__(self, config): self.scratch = config.raw['scratch'] self.ws_large_data = WsLargeDataIO(self.callback_url) - @staticmethod - def _validate_save_one_genome_params(params): + def save_one_genome(self, params): + """ + Saves a single genome object to the workspace. + + This method prepares and validates the necessary parameters for saving a genome. + It then executes the saving process and returns relevant information about the saved genome. + + Args: + params (dict): A dictionary containing the parameters for saving the genome. + Must include workspace and genome-specific information. + + Returns: + dict: The information about the saved genome object, including metadata. """ - _validate_save_one_genome_params: - validates params passed to save_one_genome method + mass_params = GenomeUtils.set_up_single_params( + params, _WS, self._validate_genome_input_params, self.dfu.ws_name_to_id + ) + return self._save_genome_mass(mass_params)[0] + + def save_genome_mass(self, params, validate_genome=False): + """ + Saves multiple genome objects to the workspace. + + This method handles the saving of multiple genome objects in bulk. It validates + the parameters, processes each genome individually, and performs necessary + updates or validations before saving. If requested, it will also validate the + genomes before saving. + + # NOTE If there is more than 1GB of data or more than 10,000 genomes to upload, + # the workspace will fail. + + Args: + params (dict): A dictionary containing the parameters for saving the genomes. + Should include workspace ID and a list of genome inputs with their data. + validate_genome (bool, optional): A flag indicating whether to validate the + genomes before saving. Defaults to False. + + Returns: + list: A list of dictionaries, each containing information about a saved + genome object and any warnings encountered during the saving process. + """ + GenomeUtils.validate_mass_params(params, self._validate_genome_input_params) + return self._save_genome_mass(params, validate_genome=validate_genome) + + def _validate_genome_input_params(self, genome_input): + """ + Check required parameters are in genome_input """ - logging.info('start validating save_one_genome params') # check for required parameters - for p in ['workspace', 'name', 'data']: - if p not in params: - raise ValueError( - '"{}" parameter is required, but missing'.format(p)) + for p in ["name", "data"]: + if p not in genome_input: + raise ValueError(f"{p} parameter is required, but missing") def _check_shock_response(self, response, errtxt): """ @@ -90,11 +134,10 @@ def _own_handle(self, genome_data, handle_property): handle_id = dfu_shock['handle']['hid'] genome_data[handle_property] = handle_id - def _check_dna_sequence_in_features(self, genome): + def check_dna_sequence_in_features(self, genome): """ - _check_dna_sequence_in_features: check dna sequence in each feature + check_dna_sequence_in_features: check dna sequence in each feature """ - logging.info('start checking dna sequence in each feature') if 'features' in genome: features_to_work = {} @@ -128,54 +171,70 @@ def get_one_genome(self, params): return data, res['info'] # return self.dfu.get_objects(params)['data'][0] - def save_one_genome(self, params): - logging.info('start saving genome object') - self._validate_save_one_genome_params(params) - workspace = params['workspace'] - name = params['name'] - data = params['data'] - # XXX there is no `workspace_datatype` param in the spec - ws_datatype = params.get('workspace_datatype', "KBaseGenomes.Genome") - # XXX there is no `meta` param in the spec - meta = params.get('meta', {}) - if "AnnotatedMetagenomeAssembly" in ws_datatype: - if params.get('upgrade') or 'feature_counts' not in data: - data = self._update_metagenome(data) - else: - if params.get('upgrade') or 'feature_counts' not in data: - data = self._update_genome(data) - - # check all handles point to shock nodes owned by calling user - self._own_handle(data, 'genbank_handle_ref') - self._own_handle(data, 'gff_handle_ref') - if "AnnotatedMetagenomeAssembly" not in ws_datatype: - self._check_dna_sequence_in_features(data) - data['warnings'] = self.validate_genome(data) - - # sort data - data = GenomeUtils.sort_dict(data) - # dump genome to scratch for upload - data_path = os.path.join(self.scratch, name + ".json") - json.dump(data, open(data_path, 'w')) - if 'hidden' in params and str(params['hidden']).lower() in ('yes', 'true', 't', '1'): - hidden = 1 - else: - hidden = 0 + def _save_genome_mass(self, params, validate_genome=True): - if isinstance(workspace, int) or workspace.isdigit(): - workspace_id = workspace - else: - workspace_id = self.dfu.ws_name_to_id(workspace) - - save_params = {'id': workspace_id, - 'objects': [{'type': ws_datatype, - 'data_json_file': data_path, - 'name': name, - 'meta': meta, - 'hidden': hidden}]} - dfu_oi = self.ws_large_data.save_objects(save_params)[0] - returnVal = {'info': dfu_oi, 'warnings': data.get('warnings', [])} - return returnVal + workspace_id = params[_WSID] + inputs = params[_INPUTS] + + objects = [] + warnings = [] + + for input_params in inputs: + + obj = {} + + # retrive required params + name = input_params['name'] + data = dict(input_params['data']) + + # XXX there is no `workspace_datatype` param in the spec + # NOTE: This allows a user to specify any arbitrary workspace type which could cause, + # in the worst case, data corruption. It should be removed from the API + # (note it is not currently documented) so users cannot access it. + ws_datatype = input_params.get('workspace_datatype', "KBaseGenomes.Genome") + # XXX there is no `meta` param in the spec + meta = input_params.get('meta', {}) + + obj["type"] = ws_datatype + obj["name"] = name + obj["meta"] = meta + + if "AnnotatedMetagenomeAssembly" not in ws_datatype: + if input_params.get('upgrade') or 'feature_counts' not in data: + data = self._update_genome(data) + + # check all handles point to shock nodes owned by calling user + self._own_handle(data, 'genbank_handle_ref') + self._own_handle(data, 'gff_handle_ref') + if "AnnotatedMetagenomeAssembly" not in ws_datatype and validate_genome: + self.check_dna_sequence_in_features(data) + data['warnings'] = self.validate_genome(data) + + # sort data + data = GenomeUtils.sort_dict(data) + # dump genome to scratch for upload + data_path = os.path.join(self.scratch, name + ".json") + json.dump(data, open(data_path, 'w')) + if 'hidden' in input_params and str(input_params['hidden']).lower() in ('yes', 'true', 't', '1'): + hidden = 1 + else: + hidden = 0 + + obj["data_json_file"] = data_path + obj["hidden"] = hidden + + objects.append(obj) + warnings.append(data.get('warnings', [])) + + dfu_infos = self.ws_large_data.save_objects( + {'id': workspace_id, 'objects': objects} + ) + + output = [ + {'info': dfu_oi, 'warnings': warning} + for dfu_oi, warning in zip(dfu_infos, warnings) + ] + return output @staticmethod def determine_tier(source): @@ -203,11 +262,6 @@ def determine_tier(source): return "Ensembl", ['Representative', 'ExternalDB'] return source, ['User'] - def _update_metagenome(self, genome): - """Checks for missing required fields and fixes breaking changes""" - if 'molecule_type' not in genome: - genome['molecule_type'] = 'Unknown' - def _update_genome(self, genome): """Checks for missing required fields and fixes breaking changes""" # do top level updates @@ -345,8 +399,6 @@ def validate_genome(g): """ allowed_tiers = {'Representative', 'Reference', 'ExternalDB', 'User'} - - logging.info('Validating genome object contents') warnings = g.get('warnings', []) # TODO: Determine whether these checks make any sense for Metagenome diff --git a/lib/GenomeFileUtil/core/GenomeUtils.py b/lib/GenomeFileUtil/core/GenomeUtils.py index 0ffeae29..491f6d24 100644 --- a/lib/GenomeFileUtil/core/GenomeUtils.py +++ b/lib/GenomeFileUtil/core/GenomeUtils.py @@ -3,13 +3,18 @@ import os import re import time +from typing import Callable, Dict, Any from relation_engine_client import REClient from relation_engine_client.exceptions import RENotFound +from GenomeFileUtil.core.MiscUtils import get_int # Name of the ncbi taxonomy namespace stored in "taxon_assignments" _NCBI_TAX = 'ncbi' +_WSID = 'workspace_id' +_INPUTS = 'inputs' + warnings = { "cds_excluded": "SUSPECT: CDS from {} was excluded because the associated " "CDS failed coordinates validation", @@ -482,4 +487,89 @@ def set_taxon_data(tax_id, re_api_url, genome_dict): ) # Assign the scientific name to the most specific (right-most) taxon in the lineage genome_dict['scientific_name'] = sciname - \ No newline at end of file + + +def set_up_single_params( + params: Dict[str, Any], + ws: str, + validate_params_func: Callable[[Dict[str, Any]], None], + ws_name_to_id_func: Callable[[str], int] +) -> Dict[str, Any]: + f""" + Sets up parameters by validating them and ensuring that exactly one of workspace ID or name is provided. + + Args: + params (Dict[str, Any]): A dictionary where the keys are parameter names (strings) and the values + can be of any type. + ws (str): A string representing the key for the workspace name or identifier. + validate_params_func (Callable[[Dict[str, Any]], None]): A function that takes a dictionary of parameters + and validates them. This function should raise an exception if the parameters are invalid. + ws_name_to_id_func (Callable[[str], int]): A function that takes a workspace name (string) and returns + its corresponding ID (integer). + + Returns: + Dict[str, Any]: A dictionary containing the workspace ID and the processed parameters. The dictionary + has keys {_WSID} and {_INPUTS}, where {_WSID} is the workspace ID and {_INPUTS} is a list containing + the input parameters. + + Raises: + ValueError: If neither or both the workspace ID and workspace name are provided in the parameters. + + Notes: + - If a workspace ID is not provided, the function will attempt to convert the workspace name to an ID + using `ws_name_to_id_func`. + - It is preferable to provide a workspace ID directly to avoid potential race conditions with mutable + workspace names. + """ + inputs = dict(params) + validate_params_func(inputs) + ws_id = get_int(inputs.pop(_WSID, None), _WSID) + ws_name = inputs.pop(ws, None) + if bool(ws_id) == bool(ws_name): # xnor + raise ValueError(f"Exactly one of a '{_WSID}' or a '{ws}' parameter must be provided") + if not ws_id: + print(f"Translating workspace name {ws_name} to a workspace ID. Prefer submitting " + + "a workspace ID over a mutable workspace name that may cause race conditions") + ws_id = ws_name_to_id_func(ws_name) + mass_params = {_WSID: ws_id, _INPUTS: [inputs]} + return mass_params + + +def validate_mass_params( + params: Dict[str, Any], + validate_params_func: Callable[[Dict[str, Any]], None] +) -> None: + f""" + Validates the provided parameters according to specific rules. + + Args: + params (Dict[str, Any]): A dictionary containing parameters to validate. Must include: + - {_WSID}: A workspace ID, which must be present and valid. + - {_INPUTS}: A list of parameter dictionaries, each of which must be validated by `validate_params_func`. + + validate_params_func (Callable[[Dict[str, Any]], None]): A function that takes a dictionary of parameters + and validates it. The function should raise an exception if the parameters are invalid. + + Raises: + ValueError: If {_WSID} is missing or invalid, if {_INPUTS} is missing or not a non-empty list, or if any + entry in {_INPUTS} is not a dictionary or fails validation. + + Notes: + - The function checks that {_WSID} is present and converts it to an integer using `get_int`. + - The {_INPUTS} field must be a non-empty list of dictionaries. Each dictionary in the list is validated + using `validate_params_func`. + - If any validation fails, a `ValueError` is raised with a message indicating the issue and entry index. + """ + ws_id = get_int(params.get(_WSID), _WSID) + if not ws_id: + raise ValueError(f"{_WSID} is required") + inputs = params.get(_INPUTS) + if not inputs or type(inputs) != list: + raise ValueError(f"{_INPUTS} field is required and must be a non-empty list") + for i, inp in enumerate(inputs, start=1): + if type(inp) != dict: + raise ValueError(f"Entry #{i} in {_INPUTS} field is not a mapping as required") + try: + validate_params_func(inp) + except Exception as e: + raise ValueError(f"Entry #{i} in {_INPUTS} field has invalid params: {e}") from e \ No newline at end of file diff --git a/lib/GenomeFileUtil/core/MiscUtils.py b/lib/GenomeFileUtil/core/MiscUtils.py index a34eb6f7..e99a7b71 100644 --- a/lib/GenomeFileUtil/core/MiscUtils.py +++ b/lib/GenomeFileUtil/core/MiscUtils.py @@ -12,3 +12,29 @@ def validate_lists_have_same_elements(l1, l2): diff = set(l1) ^ (set(l2)) # get the symmetric difference of the sets # check if all ids are shared return len(diff) == 0 + + +def get_int(putative_int, name, minimum=1): + """ + Validates and returns an integer value. + + This function checks whether the provided value is an integer and if it meets the specified minimum value. + If the checks are not passed, it raises a `ValueError` with a descriptive message. + + Args: + putative_int (int or None): The value to be validated and returned. If `None`, it will be returned as is. + name (str): A descriptive name for the value being checked. This is used in error messages. + minimum (int, optional): The minimum acceptable value for `putative_int`. Defaults to 1. + + Returns: + int: The validated integer if all checks are passed. + + Raises: + ValueError: If `putative_int` is not an integer, or if it is less than `minimum`. + """ + if putative_int is not None: + if type(putative_int) is not int: + raise ValueError(f"{name} must be an integer, got: {putative_int}") + if putative_int < minimum: + raise ValueError(f"{name} must be an integer >= {minimum}") + return putative_int diff --git a/test/problematic_tests/save_genome_test.py b/test/problematic_tests/save_genome_test.py index 7a93d816..c2e51b46 100644 --- a/test/problematic_tests/save_genome_test.py +++ b/test/problematic_tests/save_genome_test.py @@ -11,6 +11,7 @@ import urllib.request from configparser import ConfigParser from os import environ +from datetime import datetime import requests # noqa: F401 @@ -23,6 +24,13 @@ from GenomeFileUtil.core.GenomeInterface import GenomeInterface from installed_clients.WorkspaceClient import Workspace as workspaceService +_KBASE_GENOME = "KBaseGenomes.Genome" +_GENOME_FILE_WARNINGS = [ + 'For prokaryotes, CDS array should generally be the same length as the Features array.', + 'Genome molecule_type Unknown is not expected for domain Bacteria.', + 'Unable to determine organism taxonomy' +] + class SaveGenomeTest(unittest.TestCase): @@ -92,6 +100,7 @@ def setUpClass(cls): suffix = int(time.time() * 1000) cls.wsName = "test_SaveGenomeTest_" + str(suffix) cls.wsClient.create_workspace({'workspace': cls.wsName}) + cls.wsID = cls.dfu.ws_name_to_id(cls.wsName) cls.nodes_to_delete = [] cls.prepare_data() @@ -114,8 +123,7 @@ def delete_shock_node(cls, node_id): @classmethod def prepare_data(cls): - assembly_file_path = os.path.join(cls.scratch, - 'e_coli_assembly.fasta') + assembly_file_path = os.path.join(cls.scratch,'e_coli_assembly.fasta') shutil.copy('data/e_coli/e_coli_assembly.fasta', assembly_file_path) au = AssemblyUtil(os.environ['SDK_CALLBACK_URL']) assembly_ref = au.save_assembly_from_fasta({ @@ -142,29 +150,46 @@ def start_test(self): testname = inspect.stack()[1][3] print(('\n*** starting test: ' + testname + ' **')) - def fail_save_one_genome(self, params, error, exception=ValueError, contains=False): + def fail_save_genome(self, params, error, exception=ValueError, contains=False, mass=False): with self.assertRaises(exception) as context: - self.getImpl().save_one_genome(self.ctx, params) + if mass: + self.genome_interface.save_genome_mass(params) + else: + self.getImpl().save_one_genome(self.ctx, params) if contains: self.assertIn(error, str(context.exception)) else: self.assertEqual(error, str(context.exception)) - def check_save_one_genome_output(self, ret, genome_name): + def check_save_one_genome_output( + self, + ret, + genome_name, + data_type=_KBASE_GENOME, + warnings=_GENOME_FILE_WARNINGS + ): self.assertTrue('info' in ret) + self.assertTrue('warnings' in ret) + # Check info genome_info = ret['info'] self.assertEqual(genome_info[1], genome_name) - self.assertEqual(genome_info[2].split('-')[0], 'KBaseGenomes.Genome') + self.assertEqual(genome_info[2].split('-')[0], data_type) + self.assertTrue(datetime.strptime(genome_info[3], '%Y-%m-%dT%H:%M:%S+%f')) self.assertEqual(genome_info[5], self.user_id) + self.assertEqual(genome_info[6], self.wsID) + self.assertEqual(genome_info[7], self.wsName) + + # Check warnings + self.assertEqual(ret['warnings'], warnings) def test_bad_one_genome_params(self): self.start_test() invalidate_params = {'missing_workspace': 'workspace', 'name': 'name', 'data': 'data'} - error_msg = '"workspace" parameter is required, but missing' - self.fail_save_one_genome(invalidate_params, error_msg) + error_msg = "Exactly one of a 'workspace_id' or a 'workspace' parameter must be provided" + self.fail_save_genome(invalidate_params, error_msg) def test_one_genome(self): self.start_test() @@ -192,11 +217,58 @@ def test_one_genome_with_hidden(self): ret = self.getImpl().save_one_genome(self.ctx, params)[0] self.check_save_one_genome_output(ret, genome_name) + def test_genomes(self): + self.start_test() + genome_name = 'test_genome' + inputs = [ + { + 'name': genome_name, + 'data': self.test_genome_data, + } + ] + params = {'workspace_id': self.wsID, 'inputs': inputs} + ret = self.genome_interface.save_genome_mass(params, validate_genome=True)[0] + self.check_save_one_genome_output(ret, genome_name) + + def test_genomes_with_hidden(self): + self.start_test() + genome_name = 'test_genome_hidden' + inputs = [ + { + 'name': genome_name, + 'data': self.test_genome_data, + 'hidden': 1, + } + ] + params = {'workspace_id': self.wsID, 'inputs': inputs} + ret = self.genome_interface.save_genome_mass(params)[0] + self.check_save_one_genome_output(ret, genome_name, warnings=[]) + + inputs = [ + { + 'name': genome_name, + 'data': self.test_genome_data, + 'hidden': True, + } + ] + params = {'workspace_id': self.wsID, 'inputs': inputs} + ret = self.genome_interface.save_genome_mass(params)[0] + self.check_save_one_genome_output(ret, genome_name, warnings=[]) + + def test_bad_genomes_params_missing_parameter(self): + self.start_test() + invalidate_params = { + 'workspace_id': self.wsID, + 'inputs': [{'data': 'data'}], + } + error_msg = "Entry #1 in inputs field has invalid params: name parameter is required, but missing" + self.fail_save_genome(invalidate_params, error_msg, mass=True) + def test_GenomeInterface_check_dna_sequence_in_features(self): # no feature in genome genome = {'missing_features': 'features'} copied_genome = genome.copy() - self.genome_interface._check_dna_sequence_in_features(copied_genome) + self.genome_interface.check_dna_sequence_in_features(copied_genome) self.assertEqual(copied_genome, genome) # with contigs @@ -204,7 +276,7 @@ def test_GenomeInterface_check_dna_sequence_in_features(self): for feat in copied_genome['features']: if 'dna_sequence' in feat: del feat['dna_sequence'] - self.genome_interface._check_dna_sequence_in_features(copied_genome) + self.genome_interface.check_dna_sequence_in_features(copied_genome) feature_dna_sum = 0 for feature in copied_genome['features']: