From ae4a15b1429e9f3845542474827dadd4e91fdb60 Mon Sep 17 00:00:00 2001
From: pchaumeil <uqpchaum@uq.edu.au>
Date: Fri, 13 Apr 2018 13:15:11 +1000
Subject: [PATCH] release 83 tree.debug.error handling.

---
 bin/gtdbtk                     |   4 +
 gtdbtk/VERSION                 |   4 +
 gtdbtk/classify.py             | 561 +++++++++++++++++----------------
 gtdbtk/config/config.py        | 202 ++++++------
 gtdbtk/external/pfam_search.py |  64 ++--
 gtdbtk/main.py                 |   9 +-
 6 files changed, 453 insertions(+), 391 deletions(-)

diff --git a/bin/gtdbtk b/bin/gtdbtk
index 923c44db..5c119272 100755
--- a/bin/gtdbtk
+++ b/bin/gtdbtk
@@ -142,6 +142,8 @@ if __name__ == '__main__':
                                     help='desired prefix for output files')
     optional_classify_wf.add_argument('--cpus', default=1, type=int,
                                     help='number of CPUs to use')
+    optional_classify_wf.add_argument('--debug', action="store_true",
+                                    help='create intermediate files for debugging purposes.')
     optional_classify_wf.add_argument('-h', '--help', action="help",
                                     help="show help message")
     
@@ -249,6 +251,8 @@ if __name__ == '__main__':
                                     help='desired prefix for output files')
     optional_classify.add_argument('--cpus', default=1, type=int,
                                     help='number of CPUs to use')
+    optional_classify.add_argument('--debug', action="store_true",
+                                    help='create intermediate files for debugging purposes.')
     optional_classify.add_argument('-h', '--help', action="help",
                                     help="show help message")
                                        
diff --git a/gtdbtk/VERSION b/gtdbtk/VERSION
index 0f916e9a..73ae9d12 100644
--- a/gtdbtk/VERSION
+++ b/gtdbtk/VERSION
@@ -1,3 +1,7 @@
+0.0.6
+- Migration to R83 and integration of UBA genomes
+- add "debug" flag to classify options
+- error handling improvement
 0.0.5
 - stable version for pip 
 0.0.4b3
diff --git a/gtdbtk/classify.py b/gtdbtk/classify.py
index b6a35a34..b8bac16d 100644
--- a/gtdbtk/classify.py
+++ b/gtdbtk/classify.py
@@ -106,281 +106,295 @@ def run(self,
             genomes,
             align_dir,
             out_dir, 
-            prefix):
-        """Classify genomes based on position in reference tree."""
-        
-        for marker_set_id in ('bac120', 'ar122'):
-            user_msa_file = os.path.join(align_dir, prefix+'.%s.user_msa.fasta' % marker_set_id)
-            if not os.path.exists(user_msa_file):
-                # file will not exist if there are no User genomes from a given domain
-                continue 
-            
-            classify_tree = self.place_genomes(user_msa_file,
-                                                marker_set_id,
-                                                out_dir,
-                                                prefix)
-
-            # get taxonomic classification of each user genome
-            tree = dendropy.Tree.get_from_path(classify_tree, 
-                                                schema='newick', 
-                                                rooting='force-rooted', 
-                                                preserve_underscores=True)
-            
-            gtdb_taxonomy = Taxonomy().read(self.taxonomy_file)
-            
-            
-            fout = open(os.path.join(out_dir, prefix + '.%s.classification.tsv' % marker_set_id), 'w')
-            fastaniout = open(os.path.join(out_dir, prefix + '.%s.fastani_results.tsv' % marker_set_id), 'w')
-            redfout = open(os.path.join(out_dir, prefix + '.%s.summary.tsv' % marker_set_id), 'w')
-            #parchiinfo = open(os.path.join(out_dir, prefix + '.%s.parentinfo.tsv' % marker_set_id), 'w')
-
-            reddictfile = open(os.path.join(out_dir, prefix + '.%s.red_dictionary.tsv' % marker_set_id), 'w')
-            
-            marker_dict = {}
-            if marker_set_id == 'bac120':
-                marker_dict = Config.RED_DIST_BAC_DICT
-            elif marker_set_id == 'ar122':
-                marker_dict = Config.RED_DIST_ARC_DICT
-            reddictfile.write('Phylum\t{0}\n'.format(marker_dict.get('p__')))
-            reddictfile.write('Class\t{0}\n'.format(marker_dict.get('c__')))
-            reddictfile.write('Order\t{0}\n'.format(marker_dict.get('o__')))
-            reddictfile.write('Family\t{0}\n'.format(marker_dict.get('f__')))
-            reddictfile.write('Genus\t{0}\n'.format(marker_dict.get('g__')))
-            reddictfile.close()
-            
-            fastaniout.write("User genome\tReference genome\tANI\n")
-            #redfout.write("User genome\tRed value\tHigher rank\tHigher value\tLower rank\tLower value\tcase\tclosest_rank\tTool\n")
-            redfout.write("user_genome\tclassification_method\tred_value\n")
-            #parchiinfo.write("User genome\tHigher rank\tHigher value\tLower rank\tLower value\tcase\tclosest_rank\n")
-            
+            prefix,
+            debugopt = False ):
+        try:
+            """Classify genomes based on position in reference tree."""
             
-            # Genomes can be classified by using Mash or RED values
-            # We go through all leaves of the tree. if the leaf is a user genome we take it's parent node and look at all the leaves for this node.
-            # If the parent node has only one Reference genome ( GB or RS ) we calculate the mash distance between the user genome and the reference genome
-            analysed_nodes = []
-            fastani_dict = {}
-            all_fastani_dict = {}
-                          
-            fastani_list = []
-            # some genomes of Case C are handled here, if Mash distance is close enough 
-            self.logger.info('Calculating Average Nucleotide Identity using FastANI.')            
-             
-            for nd in tree.preorder_node_iter():
-                #We store the prefixes of each leaves to check if one starts with GB_ or RS_
-                list_subnode_initials = [subnd.taxon.label.replace("'",'')[0:3] for subnd in nd.leaf_iter()]
-                list_subnode = [subnd.taxon.label.replace("'",'') for subnd in nd.leaf_iter()]
-                #if only one genome is a reference genome
-                if (list_subnode_initials.count('RS_') + list_subnode_initials.count('GB_')) == 1 and len(list_subnode_initials) > 1 and list_subnode[0] not in analysed_nodes:
-                    fastani_list.append(list_subnode)
-                    analysed_nodes.extend(list_subnode)            
-             
-            manager = multiprocessing.Manager()
-            out_q = manager.dict()
-            procs = []
-            nprocs = self.cpus
-            if len(fastani_list) > 0:
-                for item in splitchunks_list(fastani_list, nprocs):
-                    p = multiprocessing.Process(
-                        target=self._fastaniWorker,
-                        args=(item, genomes, out_q))
-                    procs.append(p)
-                    p.start()
-                      
-                # Collect all results into a single result dict. We know how many dicts
-                # with results to expect.
-                #while out_q.empty():
-                #    time.sleep(1)
-              
-                # Wait for all worker processes to finish
-                for p in procs:
-                    p.join()
+            for marker_set_id in ('bac120', 'ar122'):
+                user_msa_file = os.path.join(align_dir, prefix+'.%s.user_msa.fasta' % marker_set_id)
+                if not os.path.exists(user_msa_file):
+                    # file will not exist if there are no User genomes from a given domain
+                    continue 
+                
+                classify_tree = self.place_genomes(user_msa_file,
+                                                    marker_set_id,
+                                                    out_dir,
+                                                    prefix)
+    
+                # get taxonomic classification of each user genome
+                tree = dendropy.Tree.get_from_path(classify_tree, 
+                                                    schema='newick', 
+                                                    rooting='force-rooted', 
+                                                    preserve_underscores=True)
+                
+                gtdb_taxonomy = Taxonomy().read(self.taxonomy_file)
+                
+                
+                fout = open(os.path.join(out_dir, prefix + '.%s.classification.tsv' % marker_set_id), 'w')
+                fastaniout = open(os.path.join(out_dir, prefix + '.%s.fastani_results.tsv' % marker_set_id), 'w')
+                redfout = open(os.path.join(out_dir, prefix + '.%s.summary.tsv' % marker_set_id), 'w')
+                if debugopt:
+                    parchiinfo = open(os.path.join(out_dir, prefix + '.%s.debug_file.tsv' % marker_set_id), 'w')
+    
+                reddictfile = open(os.path.join(out_dir, prefix + '.%s.red_dictionary.tsv' % marker_set_id), 'w')
+                
+                marker_dict = {}
+                if marker_set_id == 'bac120':
+                    marker_dict = Config.RED_DIST_BAC_DICT
+                elif marker_set_id == 'ar122':
+                    marker_dict = Config.RED_DIST_ARC_DICT
+                reddictfile.write('Phylum\t{0}\n'.format(marker_dict.get('p__')))
+                reddictfile.write('Class\t{0}\n'.format(marker_dict.get('c__')))
+                reddictfile.write('Order\t{0}\n'.format(marker_dict.get('o__')))
+                reddictfile.write('Family\t{0}\n'.format(marker_dict.get('f__')))
+                reddictfile.write('Genus\t{0}\n'.format(marker_dict.get('g__')))
+                reddictfile.close()
+                
+                fastaniout.write("User genome\tReference genome\tANI\n")
+                redfout.write("user_genome\tclassification_method\tred_value\n")
+                if debugopt:
+                    parchiinfo.write("User genome\tHigher rank\tHigher value\tLower rank\tLower value\tcase\tclosest_rank\n")
+                
+                
+                # Genomes can be classified by using Mash or RED values
+                # We go through all leaves of the tree. if the leaf is a user genome we take it's parent node and look at all the leaves for this node.
+                # If the parent node has only one Reference genome ( GB or RS ) we calculate the mash distance between the user genome and the reference genome
+                analysed_nodes = []
+                fastani_dict = {}
+                all_fastani_dict = {}
+                              
+                fastani_list = []
+                # some genomes of Case C are handled here, if Mash distance is close enough 
+                self.logger.info('Calculating Average Nucleotide Identity using FastANI.')            
+                 
+                for nd in tree.preorder_node_iter():
+                    #We store the prefixes of each leaves to check if one starts with GB_ or RS_
+                    list_subnode_initials = [subnd.taxon.label.replace("'",'')[0:3] for subnd in nd.leaf_iter()]
+                    list_subnode = [subnd.taxon.label.replace("'",'') for subnd in nd.leaf_iter()]
+                    #if only one genome is a reference genome
+                    if (list_subnode_initials.count('RS_') + list_subnode_initials.count('GB_')+ list_subnode_initials.count('UBA')) == 1 and len(list_subnode_initials) > 1 and list_subnode[0] not in analysed_nodes:
+                        fastani_list.append(list_subnode)
+                        analysed_nodes.extend(list_subnode)            
+                 
+                manager = multiprocessing.Manager()
+                out_q = manager.dict()
+                procs = []
+                nprocs = self.cpus
+                if len(fastani_list) > 0:
+                    for item in splitchunks_list(fastani_list, nprocs):
+                        p = multiprocessing.Process(
+                            target=self._fastaniWorker,
+                            args=(item, genomes, out_q))
+                        procs.append(p)
+                        p.start()
                           
-                all_fastani_dict = dict(out_q)
-            
+                    # Collect all results into a single result dict. We know how many dicts
+                    # with results to expect.
+                    #while out_q.empty():
+                    #    time.sleep(1)
+                  
+                    # Wait for all worker processes to finish
+                    for p in procs:
+                        p.join()
+                        if p.exitcode == 1:
+                            raise ValueError("Stop!!")
+                              
+                    all_fastani_dict = dict(out_q)
                 
-   
-            for k,v in all_fastani_dict.iteritems():
-                fastaniout.write("{0}\t{1}\t{2}\n".format(k,v.get("ref_genome"),v.get("ani")))
-                if Config.FASTANI_SPECIES_THRESHOLD < v.get("ani"):
-                    suffixed_name = add_ncbi_prefix(v.get("ref_genome"))
-                    taxa_str = ";".join(gtdb_taxonomy.get(suffixed_name))
-                    if taxa_str.endswith("s__"):
-                        taxa_str = taxa_str+v.get("ref_genome")
-                    fout.write('%s\t%s\n' % (k, taxa_str))
-                    fastani_dict[k]=v
-                    redfout.write("{0}\tani\tNone\n".format(k))
-            fastaniout.close()
-            
-            self.logger.info('{0} genomes have been classify with FastANI.'.format(len(fastani_dict)))
-            
-            
-
-            scaled_tree = self._calculate_red_distances(classify_tree, out_dir)
-            
-            
-            user_genome_ids = set(read_fasta(user_msa_file).keys())
-            user_genome_ids = user_genome_ids.difference(set(fastani_dict.keys()))
-            # for all other cases we measure the RED distance between a leaf and a parent node ( RED = 1-edge_length). This RED value will tell us
-            # the rank level that can be associated with a User genome. 
-            # As an example if the RED value is close to the order level, the user genome will take the order level of the Reference genome under the same parent node.
-            # Is there are multiple orders under the parent node. The user genome is considered as a new order
-            for leaf in scaled_tree.leaf_node_iter(): 
-                if leaf.taxon.label in user_genome_ids:
-                    taxa = []
-                    # In some cases , pplacer can associate 2 user genomes on the same parent node so we need to go up the tree to find a node with a reference genome as leaf.
-                    cur_node = leaf.parent_node
-                    list_subnode_initials = [subnd.taxon.label.replace("'",'')[0:3] for subnd in cur_node.leaf_iter()]
-                    while 'RS_' not in list_subnode_initials and 'GB_' not in list_subnode_initials:
-                        cur_node = cur_node.parent_node
+                for k,v in all_fastani_dict.iteritems():
+                    fastaniout.write("{0}\t{1}\t{2}\n".format(k,v.get("ref_genome"),v.get("ani")))
+                    if Config.FASTANI_SPECIES_THRESHOLD <= v.get("ani"):
+                        suffixed_name = add_ncbi_prefix(v.get("ref_genome"))
+                        taxa_str = ";".join(gtdb_taxonomy.get(suffixed_name))
+                        if taxa_str.endswith("s__"):
+                            taxa_str = taxa_str+v.get("ref_genome")
+                        fout.write('%s\t%s\n' % (k, taxa_str))
+                        fastani_dict[k]=v
+                        redfout.write("{0}\tani\tNone\n".format(k))
+                fastaniout.close()
+                
+                self.logger.info('{0} genomes have been classify with FastANI.'.format(len(fastani_dict)))
+                
+                
+    
+                scaled_tree = self._calculate_red_distances(classify_tree, out_dir)
+                
+                
+                user_genome_ids = set(read_fasta(user_msa_file).keys())
+                user_genome_ids = user_genome_ids.difference(set(fastani_dict.keys()))
+                # for all other cases we measure the RED distance between a leaf and a parent node ( RED = 1-edge_length). This RED value will tell us
+                # the rank level that can be associated with a User genome. 
+                # As an example if the RED value is close to the order level, the user genome will take the order level of the Reference genome under the same parent node.
+                # Is there are multiple orders under the parent node. The user genome is considered as a new order
+                for leaf in scaled_tree.leaf_node_iter(): 
+                    if leaf.taxon.label in user_genome_ids:
+                        taxa = []
+                        # In some cases , pplacer can associate 2 user genomes on the same parent node so we need to go up the tree to find a node with a reference genome as leaf.
+                        cur_node = leaf.parent_node
                         list_subnode_initials = [subnd.taxon.label.replace("'",'')[0:3] for subnd in cur_node.leaf_iter()]
-                    
-                    current_rel_list = cur_node.rel_dist
-                    
-                    parent_taxon_node  = cur_node.parent_node
-                    _support, parent_taxon, _aux_info = parse_label(parent_taxon_node.label)
-
-                    
-                    while parent_taxon_node is not None and not parent_taxon:
-                            parent_taxon_node =parent_taxon_node.parent_node
-                            _support, parent_taxon, _aux_info = parse_label(parent_taxon_node.label)
-
-                    parent_rank = parent_taxon.split(";")[-1][0:3]
-                    parent_rel_dist = parent_taxon_node.rel_dist
-                    
-                    
-                    genome_parent_child = [leaf.taxon.label,parent_rank,parent_rel_dist,'','','','']
-                    
-                    child_taxons = []
-                    closest_rank = None;
-                    detection = "RED"
-                    # if the genome is placed between the genus and specie ranks , it will be associated with the genus when _get_closest_red_rank is called
-                    if parent_rank != 'g__':
-                        child_rk = self.order_rank[self.order_rank.index(parent_rank)+1]
-                        list_subnode = [childnd.taxon.label.replace("'",'') for childnd in cur_node.leaf_iter() if (childnd.taxon.label.startswith('RS_') or childnd.taxon.label.startswith('GB_'))]
-                        list_ranks = [gtdb_taxonomy.get(name)[self.order_rank.index(child_rk)] for name in list_subnode]
-                        if len(set(list_ranks)) == 1:
-                            for subranknd in cur_node.preorder_iter():
-                                _support, subranknd_taxon, _aux_info = parse_label(subranknd.label)
-                                if subranknd.is_internal() and subranknd_taxon is not None and subranknd_taxon.startswith(child_rk):
-                                    child_taxons = subranknd_taxon.split(";")
-                                    child_taxon_node = subranknd
-                                    child_rel_dist = child_taxon_node.rel_dist
-                                    break
+                        while 'RS_' not in list_subnode_initials and 'GB_' not in list_subnode_initials and 'UBA' not in list_subnode_initials:
+                            cur_node = cur_node.parent_node
+                            list_subnode_initials = [subnd.taxon.label.replace("'",'')[0:3] for subnd in cur_node.leaf_iter()]
+                        
+                        current_rel_list = cur_node.rel_dist
+                        
+                        parent_taxon_node  = cur_node.parent_node
+                        _support, parent_taxon, _aux_info = parse_label(parent_taxon_node.label)
+    
+                        
+                        while parent_taxon_node is not None and not parent_taxon:
+                                parent_taxon_node =parent_taxon_node.parent_node
+                                _support, parent_taxon, _aux_info = parse_label(parent_taxon_node.label)
+    
+                        parent_rank = parent_taxon.split(";")[-1][0:3]
+                        parent_rel_dist = parent_taxon_node.rel_dist
+                        
+                        
+                        genome_parent_child = [leaf.taxon.label,parent_rank,parent_rel_dist,'','','','']
+                        
+                        child_taxons = []
+                        closest_rank = None;
+                        detection = "RED"
+                        # if the genome is placed between the genus and specie ranks , it will be associated with the genus when _get_closest_red_rank is called
+                        if parent_rank != 'g__':
+                            child_rk = self.order_rank[self.order_rank.index(parent_rank)+1]
+                            list_subnode = [childnd.taxon.label.replace("'",'') for childnd in cur_node.leaf_iter() if (childnd.taxon.label.startswith('RS_') or childnd.taxon.label.startswith('GB_'))]
+                            list_ranks = [gtdb_taxonomy.get(name)[self.order_rank.index(child_rk)] for name in list_subnode]
+                            if len(set(list_ranks)) == 1:
+                                for subranknd in cur_node.preorder_iter():
+                                    _support, subranknd_taxon, _aux_info = parse_label(subranknd.label)
+                                    if subranknd.is_internal() and subranknd_taxon is not None and subranknd_taxon.startswith(child_rk):
+                                        child_taxons = subranknd_taxon.split(";")
+                                        child_taxon_node = subranknd
+                                        child_rel_dist = child_taxon_node.rel_dist
+                                        break
+                            else:
+                                #case 2a and 2b                        
+                                closest_rank = parent_rank;
+                                detection = "Topology"
                         else:
-                            #case 2a and 2b                        
+                            #case 1a
                             closest_rank = parent_rank;
                             detection = "Topology"
-                    else:
-                        #case 1a
-                        closest_rank = parent_rank;
-                        detection = "Topology"
-                    
-                    #case 1b    
-                    if len(child_taxons) == 0 and closest_rank is None:
-                        list_leaves = [childnd.taxon.label.replace("'",'') for childnd in cur_node.leaf_iter() if (childnd.taxon.label.startswith('RS_') or childnd.taxon.label.startswith('GB_'))]
-                        if len(list_leaves) != 1 :
-                            self.logger.error('There should be only one leaf.')
-                            sys.exit(-1)
-                        list_leaf_ranks = gtdb_taxonomy.get(list_leaves[0])[self.order_rank.index(child_rk):-1]
-                        for leaf_taxon in reversed(list_leaf_ranks):
-                            if leaf_taxon == list_leaf_ranks[0]:
-                                if  abs(current_rel_list - marker_dict.get(leaf_taxon[:3])) < abs((current_rel_list) - marker_dict.get(parent_rank)):
-                                    #and current_rel_list - marker_dict.get(leaf_taxon[:3]) > 0 ):
-                                    closest_rank = leaf_taxon[:3]
-                                    genome_parent_child[3]=leaf_taxon
-                                    genome_parent_child[5]='case 1b - III'
-                                    break
+                        
+                        #case 1b    
+                        if len(child_taxons) == 0 and closest_rank is None:
+                            list_leaves = [childnd.taxon.label.replace("'",'') for childnd in cur_node.leaf_iter() if (childnd.taxon.label.startswith('RS_') or childnd.taxon.label.startswith('GB_'))]
+                            if len(list_leaves) != 1 :
+                                self.logger.error('There should be only one leaf.')
+                                sys.exit(-1)
+                            list_leaf_ranks = gtdb_taxonomy.get(list_leaves[0])[self.order_rank.index(child_rk):-1]
+                            for leaf_taxon in reversed(list_leaf_ranks):
+                                if leaf_taxon == list_leaf_ranks[0]:
+                                    if  abs(current_rel_list - marker_dict.get(leaf_taxon[:3])) < abs((current_rel_list) - marker_dict.get(parent_rank)):
+                                        #and current_rel_list - marker_dict.get(leaf_taxon[:3]) > 0 ):
+                                        closest_rank = leaf_taxon[:3]
+                                        genome_parent_child[3]=leaf_taxon
+                                        genome_parent_child[5]='case 1b - III'
+                                        break
+                                else:
+                                    pchildrank = list_leaf_ranks[list_leaf_ranks.index(leaf_taxon)-1]
+                                    if abs(current_rel_list - marker_dict.get(leaf_taxon[:3])) < abs(current_rel_list-marker_dict.get(pchildrank[:3])):
+                                        #and current_rel_list - marker_dict.get(leaf_taxon[:3]) > 0 ) :
+                                        closest_rank = leaf_taxon[:3]
+                                        genome_parent_child[1]=pchildrank
+                                        genome_parent_child[2]=1.0
+                                        genome_parent_child[3]= leaf_taxon
+                                        genome_parent_child[5]='case 1b - II'                                 
+                                        break
+                            if closest_rank is None:
+                                closest_rank = parent_rank
+                                genome_parent_child[3]=list_leaf_ranks[0]
+                                genome_parent_child[5]='case 1b - IV'
+                                
+                                                
+                        #if there is multiple ranks on the child node (i.e genome between p__Nitrospirae and c__Nitrospiria;o__Nitrospirales;f__Nitropiraceae)
+                        #we loop through the list of rank from f_ to c_ rank
+                        for child_taxon in reversed(child_taxons):
+                            # if lower rank is c__Nitropiria
+                            if child_taxon == child_taxons[0]:                                                                             
+                                if ( abs(current_rel_list - marker_dict.get(child_taxon[:3])) < abs(child_rel_dist -marker_dict.get(child_taxon[:3]))
+                                     and abs(current_rel_list - marker_dict.get(child_taxon[:3])) < abs(current_rel_list - marker_dict.get(parent_rank))) :
+                                    genome_parent_child[3]=';'.join(child_taxons)
+                                    genome_parent_child[4]=child_rel_dist
+                                    genome_parent_child[5]='case 3b - II'
+                                    closest_rank = child_taxon[:3]
+                                elif closest_rank is None:
+                                    closest_rank = parent_rank
+                                    genome_parent_child[3]=';'.join(child_taxons)
+                                    genome_parent_child[4]=child_rel_dist
+                                    genome_parent_child[5]='case 3b - III'          
                             else:
-                                pchildrank = list_leaf_ranks[list_leaf_ranks.index(leaf_taxon)-1]
-                                if abs(current_rel_list - marker_dict.get(leaf_taxon[:3])) < abs(current_rel_list-marker_dict.get(pchildrank[:3])):
-                                    #and current_rel_list - marker_dict.get(leaf_taxon[:3]) > 0 ) :
-                                    closest_rank = leaf_taxon[:3]
-                                    genome_parent_child[1]=pchildrank
-                                    genome_parent_child[2]=1.0
-                                    genome_parent_child[3]= leaf_taxon
-                                    genome_parent_child[5]='case 1b - II'                                 
+                                pchildrank = child_taxons[child_taxons.index(child_taxon)-1]
+                                if (abs(current_rel_list - marker_dict.get(child_taxon[:3])) < abs(current_rel_list - marker_dict.get(pchildrank[:3]))
+                                    and abs(current_rel_list - marker_dict.get(child_taxon[:3])) < abs(child_rel_dist -marker_dict.get(child_taxon[:3]))) :
+                                    closest_rank = child_taxon
+                                    genome_parent_child[3]=';'.join(child_taxons)
+                                    genome_parent_child[4]=child_rel_dist
+                                    genome_parent_child[5]='case 3b - I'
                                     break
+    
+                        
+                        # case 1b       
                         if closest_rank is None:
-                            closest_rank = parent_rank
-                            genome_parent_child[3]=list_leaf_ranks[0]
-                            genome_parent_child[5]='case 1b - IV'
+                            print "IT SHOULDN'T HAPPEN!!!"
                             
-                                            
-                    #if there is multiple ranks on the child node (i.e genome between p__Nitrospirae and c__Nitrospiria;o__Nitrospirales;f__Nitropiraceae)
-                    #we loop through the list of rank from f_ to c_ rank
-                    for child_taxon in reversed(child_taxons):
-                        # if lower rank is c__Nitropiria
-                        if child_taxon == child_taxons[0]:                                                                             
-                            if ( abs(current_rel_list - marker_dict.get(child_taxon[:3])) < abs(child_rel_dist -marker_dict.get(child_taxon[:3]))
-                                 and abs(current_rel_list - marker_dict.get(child_taxon[:3])) < abs(current_rel_list - marker_dict.get(parent_rank))) :
-                                genome_parent_child[3]=';'.join(child_taxons)
-                                genome_parent_child[4]=child_rel_dist
-                                genome_parent_child[5]='case 3b - II'
-                                closest_rank = child_taxon[:3]
-                            elif closest_rank is None:
-                                closest_rank = parent_rank
-                                genome_parent_child[3]=';'.join(child_taxons)
-                                genome_parent_child[4]=child_rel_dist
-                                genome_parent_child[5]='case 3b - III'          
-                        else:
-                            pchildrank = child_taxons[child_taxons.index(child_taxon)-1]
-                            if (abs(current_rel_list - marker_dict.get(child_taxon[:3])) < abs(current_rel_list - marker_dict.get(pchildrank[:3]))
-                                and abs(current_rel_list - marker_dict.get(child_taxon[:3])) < abs(child_rel_dist -marker_dict.get(child_taxon[:3]))) :
-                                closest_rank = child_taxon
-                                genome_parent_child[3]=';'.join(child_taxons)
-                                genome_parent_child[4]=child_rel_dist
-                                genome_parent_child[5]='case 3b - I'
-                                break
-
-                    
-                    # case 1b       
-                    if closest_rank is None:
-                        print "IT SHOULDN'T HAPPEN!!!"
+                            
+                        genome_parent_child[6]=closest_rank                   
                         
+                                            
+                        list_subnode = [subnd.taxon.label.replace("'",'') for subnd in cur_node.leaf_iter()]
+                        red_taxonomy = self._get_redtax(list_subnode,closest_rank,gtdb_taxonomy)
                         
-                    genome_parent_child[6]=closest_rank                   
-                    
-                                        
-                    list_subnode = [subnd.taxon.label.replace("'",'') for subnd in cur_node.leaf_iter()]
-                    red_taxonomy = self._get_redtax(list_subnode,closest_rank,gtdb_taxonomy)
-                    
-                    fout.write('{0}\t{1}\n'.format(leaf.taxon.label, red_taxonomy))
-                    del genome_parent_child[0]
-                    redfout.write("{0}\t{1}\t{2}\n".format(leaf.taxon.label,detection,current_rel_list))
-                    #redfout.write('{0}\t{1}\t{2}\t{3}\n'.format(leaf.taxon.label,current_rel_list,'\t'.join(str(x) for x in genome_parent_child),detection))
-
-            redfout.close()
-            fout.close()
-
-            pplaceout = open(os.path.join(out_dir, prefix + '.%s.classification_pplacer.tsv' % marker_set_id), 'w')        
-            
-            # We get the pplacer taxonomy for comparison
-            user_genome_ids = set(read_fasta(user_msa_file).keys())
-            for leaf in tree.leaf_node_iter():
-                if leaf.taxon.label in user_genome_ids:
-                    taxa = []
-                    cur_node = leaf
-                    while cur_node.parent_node:
-                        _support, taxon, _aux_info = parse_label(cur_node.label)      
-                        if taxon:
-                            for t in taxon.split(';')[::-1]:
-                                taxa.append(t.strip())                           
-                        cur_node = cur_node.parent_node
-                    taxa_str = ';'.join(taxa[::-1])
-                    pplaceout.write('%s\t%s\n' % (leaf.taxon.label, taxa_str))
-            pplaceout.close()
+                        fout.write('{0}\t{1}\n'.format(leaf.taxon.label, red_taxonomy))
+                        del genome_parent_child[0]
+                        redfout.write("{0}\t{1}\t{2}\n".format(leaf.taxon.label,detection,current_rel_list))
+                        if debugopt:
+                            parchiinfo.write('{0}\t{1}\t{2}\t{3}\n'.format(leaf.taxon.label,current_rel_list,'\t'.join(str(x) for x in genome_parent_child),detection))
+    
+                redfout.close()
+                fout.close()
+                if debugopt:
+                    parchiinfo.close()
+    
+                pplaceout = open(os.path.join(out_dir, prefix + '.%s.classification_pplacer.tsv' % marker_set_id), 'w')        
+                
+                # We get the pplacer taxonomy for comparison
+                user_genome_ids = set(read_fasta(user_msa_file).keys())
+                for leaf in tree.leaf_node_iter():
+                    if leaf.taxon.label in user_genome_ids:
+                        taxa = []
+                        cur_node = leaf
+                        while cur_node.parent_node:
+                            _support, taxon, _aux_info = parse_label(cur_node.label)      
+                            if taxon:
+                                for t in taxon.split(';')[::-1]:
+                                    taxa.append(t.strip())                           
+                            cur_node = cur_node.parent_node
+                        taxa_str = ';'.join(taxa[::-1])
+                        pplaceout.write('%s\t%s\n' % (leaf.taxon.label, taxa_str))
+                pplaceout.close()
+        except ValueError as error:
+            print "GTDB-Tk has stopped before finishing"
+            sys.exit(-1)
+        except Exception as error:
+            print "GTDB-Tk has stopped before finishing"
+            sys.exit(-1)
             
 
     def _fastaniWorker(self, sublist_genomes, genomes, out_q):
-
-
-        for items in sublist_genomes:
-            dict_parser_distance = self._calculate_fastani_distance(items, genomes)
-            for k,v in dict_parser_distance.iteritems():
-                if k in out_q:
-                    print "It should not happen (if k in out_q)"
-                out_q[k]=v
-        return True
+        try:
+            for items in sublist_genomes:
+                dict_parser_distance = self._calculate_fastani_distance(items, genomes)
+                for k,v in dict_parser_distance.iteritems():
+                    if k in out_q:
+                        print "It should not happen (if k in out_q)"
+                    out_q[k]=v
+            return True
+        except Exception as error:
+            print error
+            raise
 
             
     def _parse_subtree(self,cur_node,dict_subrank,lower_rank):
@@ -455,7 +469,7 @@ def _parse_subnodes(self,list_subnode,closest_rank,gtdb_taxonomy):
         initial_loop = True
         for item in list_subnode:
             # We get the taxonomy of all reference genomes
-            if item.startswith('RS_') or item.startswith('GB_'):
+            if item.startswith('RS_') or item.startswith('GB_') or item.startswith('UBA'):
                 taxonomy_from_file = gtdb_taxonomy.get(item)
                 # we store the selected rank (i.e. order) for each reference genome 
                 for rank in taxonomy_from_file:
@@ -572,10 +586,12 @@ def _calculate_fastani_distance(self,list_leaf,genomes):
             ref_list_file = open(os.path.join(self.tmp_output_dir, 'ref_list.txt'),'w')
             make_sure_path_exists(self.tmp_output_dir)
             for leaf in list_leaf:
-                if not leaf.startswith('GB_') and not leaf.startswith('RS_'):
+                if not leaf.startswith('GB_') and not leaf.startswith('RS_') and not leaf.startswith('UBA'):
                     query_list_file.write('{0}\n'.format(genomes.get(leaf)))
                 else:
-                    shortleaf = leaf[3:]
+                    shortleaf = leaf
+                    if leaf.startswith('GB_') or leaf.startswith('RS_'):
+                        shortleaf = leaf[3:]
                     ref_list_file.write('{0}{1}{2}\n'.format(Config.FASTANI_GENOMES,shortleaf,Config.FASTANI_GENOMES_EXT))
                     
             query_list_file.close()
@@ -584,19 +600,30 @@ def _calculate_fastani_distance(self,list_leaf,genomes):
             if not os.path.isfile(os.path.join(self.tmp_output_dir, 'query_list.txt')) or not os.path.isfile(os.path.join(self.tmp_output_dir, 'ref_list.txt')):
                 raise
               
-            cmd = 'fastANI --ql {0} --rl {1} -o {2} > /dev/null 2>&1'.format(os.path.join(self.tmp_output_dir, 'query_list.txt'), 
+            cmd = 'fastANI --ql {0} --rl {1} -o {2} > /dev/null 2>{3}'.format(os.path.join(self.tmp_output_dir, 'query_list.txt'), 
                                                                            os.path.join(self.tmp_output_dir, 'ref_list.txt'), 
-                                                                           os.path.join(self.tmp_output_dir, 'results.tab'))
+                                                                           os.path.join(self.tmp_output_dir, 'results.tab'),
+                                                                           os.path.join(self.tmp_output_dir, 'error.log'))
             os.system(cmd)
-
+            
             if not os.path.isfile(os.path.join(self.tmp_output_dir,'results.tab')):
-                raise
+                errstr = 'FastANI has stopped:\n'
+                if os.path.isfile(os.path.join(self.tmp_output_dir, 'error.log')):
+                    with open(os.path.join(self.tmp_output_dir, 'error.log')) as debug:
+                        for line in debug:
+                            finalline =line
+                        errstr +=finalline
+                raise ValueError(errstr)
             
             dict_parser_distance = self._parse_fastani_results(os.path.join(self.tmp_output_dir,'results.tab'),list_leaf)
             shutil.rmtree(self.tmp_output_dir)
             return dict_parser_distance
               
-        except:
+        except ValueError as error:
+            if os.path.exists(self.tmp_output_dir):
+                shutil.rmtree(self.tmp_output_dir)
+            raise 
+        except Exception as error:
             if os.path.exists(self.tmp_output_dir):
                 shutil.rmtree(self.tmp_output_dir)
             raise
diff --git a/gtdbtk/config/config.py b/gtdbtk/config/config.py
index 8dbd02da..98e97e55 100644
--- a/gtdbtk/config/config.py
+++ b/gtdbtk/config/config.py
@@ -1,92 +1,110 @@
-# Annotation folder
-MARKER_GENE_DIR = "marker_genes"
-
-GENOME_FILE_SUFFIX = "_genomic.fna"
-
-BAC120_MARKERS = {"PFAM": ["PF00380.14.hmm", "PF00410.14.hmm", "PF00466.15.hmm", "PF01025.14.hmm", "PF02576.12.hmm", "PF03726.9.hmm"],
-                     "TIGRFAM": ["TIGR00006.HMM", "TIGR00019.HMM", "TIGR00020.HMM", "TIGR00029.HMM", "TIGR00043.HMM", "TIGR00054.HMM", "TIGR00059.HMM", "TIGR00061.HMM", "TIGR00064.HMM",
-                                 "TIGR00065.HMM", "TIGR00082.HMM", "TIGR00083.HMM", "TIGR00084.HMM", "TIGR00086.HMM", "TIGR00088.HMM", "TIGR00090.HMM", "TIGR00092.HMM", "TIGR00095.HMM",
-                                 "TIGR00115.HMM", "TIGR00116.HMM", "TIGR00138.HMM", "TIGR00158.HMM", "TIGR00166.HMM", "TIGR00168.HMM", "TIGR00186.HMM", "TIGR00194.HMM", "TIGR00250.HMM",
-                                 "TIGR00337.HMM", "TIGR00344.HMM", "TIGR00362.HMM", "TIGR00382.HMM", "TIGR00392.HMM", "TIGR00396.HMM", "TIGR00398.HMM", "TIGR00414.HMM", "TIGR00416.HMM",
-                                 "TIGR00420.HMM", "TIGR00431.HMM", "TIGR00435.HMM", "TIGR00436.HMM", "TIGR00442.HMM", "TIGR00445.HMM", "TIGR00456.HMM", "TIGR00459.HMM", "TIGR00460.HMM",
-                                 "TIGR00468.HMM", "TIGR00472.HMM", "TIGR00487.HMM", "TIGR00496.HMM", "TIGR00539.HMM", "TIGR00580.HMM", "TIGR00593.HMM", "TIGR00615.HMM", "TIGR00631.HMM",
-                                 "TIGR00634.HMM", "TIGR00635.HMM", "TIGR00643.HMM", "TIGR00663.HMM", "TIGR00717.HMM", "TIGR00755.HMM", "TIGR00810.HMM", "TIGR00922.HMM", "TIGR00928.HMM",
-                                 "TIGR00959.HMM", "TIGR00963.HMM", "TIGR00964.HMM", "TIGR00967.HMM", "TIGR01009.HMM", "TIGR01011.HMM", "TIGR01017.HMM", "TIGR01021.HMM", "TIGR01029.HMM",
-                                 "TIGR01032.HMM", "TIGR01039.HMM", "TIGR01044.HMM", "TIGR01059.HMM", "TIGR01063.HMM", "TIGR01066.HMM", "TIGR01071.HMM", "TIGR01079.HMM", "TIGR01082.HMM",
-                                 "TIGR01087.HMM", "TIGR01128.HMM", "TIGR01146.HMM", "TIGR01164.HMM", "TIGR01169.HMM", "TIGR01171.HMM", "TIGR01302.HMM", "TIGR01391.HMM", "TIGR01393.HMM",
-                                 "TIGR01394.HMM", "TIGR01510.HMM", "TIGR01632.HMM", "TIGR01951.HMM", "TIGR01953.HMM", "TIGR02012.HMM", "TIGR02013.HMM", "TIGR02027.HMM", "TIGR02075.HMM",
-                                 "TIGR02191.HMM", "TIGR02273.HMM", "TIGR02350.HMM", "TIGR02386.HMM", "TIGR02397.HMM", "TIGR02432.HMM", "TIGR02729.HMM", "TIGR03263.HMM", "TIGR03594.HMM",
-                                 "TIGR03625.HMM", "TIGR03632.HMM", "TIGR03654.HMM", "TIGR03723.HMM", "TIGR03725.HMM", "TIGR03953.HMM"]}
-
-AR122_MARKERS = {"PFAM": ["PF01868.11.hmm", "PF01282.14.hmm", "PF01655.13.hmm", "PF01092.14.hmm", "PF01000.21.hmm", "PF00368.13.hmm", "PF00827.12.hmm", "PF01269.12.hmm", "PF00466.15.hmm",
-                             "PF01015.13.hmm", "PF13685.1.hmm", "PF02978.14.hmm", "PF04919.7.hmm", "PF01984.15.hmm", "PF04104.9.hmm", "PF00410.14.hmm", "PF01798.13.hmm", "PF01864.12.hmm",
-                             "PF01990.12.hmm", "PF07541.7.hmm", "PF04019.7.hmm", "PF00900.15.hmm", "PF01090.14.hmm", "PF02006.11.hmm", "PF01157.13.hmm", "PF01191.14.hmm", "PF01866.12.hmm",
-                             "PF01198.14.hmm", "PF01496.14.hmm", "PF00687.16.hmm", "PF03874.11.hmm", "PF01194.12.hmm", "PF01200.13.hmm", "PF13656.1.hmm", "PF01280.15.hmm"],
-                    "TIGRFAM": ["TIGR00468.HMM", "TIGR01060.HMM", "TIGR03627.HMM", "TIGR01020.HMM", "TIGR02258.HMM", "TIGR00293.HMM", "TIGR00389.HMM", "TIGR01012.HMM", "TIGR00490.HMM", "TIGR03677.HMM",
-                                "TIGR03636.HMM", "TIGR03722.HMM", "TIGR00458.HMM", "TIGR00291.HMM", "TIGR00670.HMM", "TIGR00064.HMM", "TIGR03629.HMM", "TIGR00021.HMM", "TIGR03672.HMM", "TIGR00111.HMM",
-                                "TIGR03684.HMM", "TIGR01077.HMM", "TIGR01213.HMM", "TIGR01080.HMM", "TIGR00501.HMM", "TIGR00729.HMM", "TIGR01038.HMM", "TIGR00270.HMM", "TIGR03628.HMM", "TIGR01028.HMM",
-                                "TIGR00521.HMM", "TIGR03671.HMM", "TIGR00240.HMM", "TIGR02390.HMM", "TIGR02338.HMM", "TIGR00037.HMM", "TIGR02076.HMM", "TIGR00335.HMM", "TIGR01025.HMM", "TIGR00471.HMM",
-                                "TIGR00336.HMM", "TIGR00522.HMM", "TIGR02153.HMM", "TIGR02651.HMM", "TIGR03674.HMM", "TIGR00323.HMM", "TIGR00134.HMM", "TIGR02236.HMM", "TIGR03683.HMM", "TIGR00491.HMM",
-                                "TIGR00658.HMM", "TIGR03680.HMM", "TIGR00392.HMM", "TIGR00422.HMM", "TIGR00279.HMM", "TIGR01052.HMM", "TIGR00442.HMM", "TIGR00308.HMM", "TIGR00398.HMM", "TIGR00456.HMM",
-                                "TIGR00549.HMM", "TIGR00408.HMM", "TIGR00432.HMM", "TIGR00264.HMM", "TIGR00982.HMM", "TIGR00324.HMM", "TIGR01952.HMM", "TIGR03626.HMM", "TIGR03670.HMM", "TIGR00337.HMM",
-                                "TIGR01046.HMM", "TIGR01018.HMM", "TIGR00936.HMM", "TIGR00463.HMM", "TIGR01309.HMM", "TIGR03653.HMM", "TIGR00042.HMM", "TIGR02389.HMM", "TIGR00307.HMM", "TIGR03673.HMM",
-                                "TIGR00373.HMM", "TIGR01008.HMM", "TIGR00283.HMM", "TIGR00425.HMM", "TIGR00405.HMM", "TIGR03665.HMM", "TIGR00448.HMM"]}
-
-RPS23_MARKERS = {"PFAM": ["PF00687.16.hmm","PF00466.15.hmm","PF00298.14.hmm","PF03946.9.hmm","PF00238.14.hmm",
-                            "PF00252.13.hmm","PF00861.17.hmm","PF00181.18.hmm","PF00237.14.hmm",
-                            "PF03947.13.hmm","PF00297.17.hmm","PF00573.17.hmm","PF00281.14.hmm",
-                            "PF00673.16.hmm","PF00338.17.hmm","PF00411.14.hmm","PF00416.17.hmm",
-                            "PF00312.17.hmm","PF00366.15.hmm","PF00203.16.hmm","PF00189.15.hmm",
-                            "PF00333.15.hmm","PF03719.10.hmm","PF00177.16.hmm","PF00410.14.hmm",
-                            "PF00380.14.hmm","PF00164.20.hmm"],
-                    "TIGRFAM": []}
-
-GENERIC_PATH = '/srv/db/gtdbtk/'
-
-TIGRFAM_HMMS = '/srv/db/gtdbtk/markers//tigrfam/tigrfam.hmm'
-PFAM_HMM_DIR = '/srv/db/gtdbtk/markers/pfam/'
-
-
-# Information for aligning genomes
-DEFAULT_DOMAIN_THRESHOLD = 10.0
-AR_MARKER_COUNT = 122
-BAC_MARKER_COUNT = 120
-
-# Path to MSA and Taxonomy
-MSA_FOLDER = GENERIC_PATH + "msa/"
-CONCAT_BAC120 = MSA_FOLDER + "gtdb_r80_bac120.faa"
-CONCAT_AR122 = MSA_FOLDER + "gtdb_r80_ar122.faa"
-#CONCAT_RPS23 = MSA_FOLDER + "gtdb_r80_rps23.faa"
-
-TAX_FOLDER = GENERIC_PATH + "taxonomy/"
-TAXONOMY_FILE = TAX_FOLDER + "gtdb_r80_taxonomy.tsv"
-
-# Path to canonical masks
-MASK_DIR = GENERIC_PATH + "masks/"
-MASK_BAC120 = "gtdb_r80_bac120.mask"
-#MASK_BAC120 = "gtdb_r80_bac120_untrimmed.mask"
-MASK_AR122 = "gtdb_r80_ar122.mask"
-MASK_RPS23 = "gtdb_r80_rps23.mask"
-
-# Path to pplacer data
-PPLACER_DIR = GENERIC_PATH + "pplacer/"
-#PPLACER_DIR = GENERIC_PATH + "pplacer/notrimmed/"
-
-PPLACER_BAC120_REF_PKG = "gtdb_r80_bac120.refpkg"
-PPLACER_AR122_REF_PKG = "gtdb_r80_ar122.refpkg"
-PPLACER_RPS23_REF_PKG = "gtdb_r80_rps23.refpkg"
-
-FASTANI_DIR = GENERIC_PATH + "fastani/"
-FASTANI_SPECIES_THRESHOLD = 95.0
-FASTANI_GENOMES = FASTANI_DIR + "database/"
-FASTANI_GENOMES_EXT = GENOME_FILE_SUFFIX
-
-#Relative Evolution Distance
-RED_MIN_SUPPORT = 0.0
-RED_MIN_CHILDREN = 2
-RED_DIST_BAC_DICT = {"d__":0.00,"p__":0.344460726186,"c__":0.492670538599,"o__":0.648322059106,"f__":0.78784418792,"g__":0.940152273956}
-RED_DIST_ARC_DICT = {"d__":0.00,"p__":0.35531101963,"c__":0.508396357807,"o__":0.660914720355,"f__":0.728629307816,"g__":0.880665740097}
-
-
-
-
+############################
+#EDIT FROM HERE
+############################
+
+# Path to MSA and Taxonomy
+GENERIC_PATH = "/srv/db/gtdbtk/official/release83/"
+
+############################
+#If all downloaded data is in the same folder 
+# There is no need of editing variable below this point 
+############################
+
+
+TIGRFAM_HMMS = GENERIC_PATH + 'markers/tigrfam/tigrfam.hmm'
+PFAM_HMM_DIR = GENERIC_PATH + 'markers/pfam/'
+MSA_FOLDER = GENERIC_PATH + "msa/"
+MASK_DIR = GENERIC_PATH + "masks/"
+PPLACER_DIR = GENERIC_PATH + "pplacer/"
+FASTANI_DIR = GENERIC_PATH + "fastani/"
+TAX_FOLDER = GENERIC_PATH + "taxonomy/"
+
+
+
+############################
+#STOP EDIT
+############################
+
+
+
+
+
+BAC120_MARKERS = {"PFAM": ["PF00380.14.hmm", "PF00410.14.hmm", "PF00466.15.hmm", "PF01025.14.hmm", "PF02576.12.hmm", "PF03726.9.hmm"],
+                     "TIGRFAM": ["TIGR00006.HMM", "TIGR00019.HMM", "TIGR00020.HMM", "TIGR00029.HMM", "TIGR00043.HMM", "TIGR00054.HMM", "TIGR00059.HMM", "TIGR00061.HMM", "TIGR00064.HMM",
+                                 "TIGR00065.HMM", "TIGR00082.HMM", "TIGR00083.HMM", "TIGR00084.HMM", "TIGR00086.HMM", "TIGR00088.HMM", "TIGR00090.HMM", "TIGR00092.HMM", "TIGR00095.HMM",
+                                 "TIGR00115.HMM", "TIGR00116.HMM", "TIGR00138.HMM", "TIGR00158.HMM", "TIGR00166.HMM", "TIGR00168.HMM", "TIGR00186.HMM", "TIGR00194.HMM", "TIGR00250.HMM",
+                                 "TIGR00337.HMM", "TIGR00344.HMM", "TIGR00362.HMM", "TIGR00382.HMM", "TIGR00392.HMM", "TIGR00396.HMM", "TIGR00398.HMM", "TIGR00414.HMM", "TIGR00416.HMM",
+                                 "TIGR00420.HMM", "TIGR00431.HMM", "TIGR00435.HMM", "TIGR00436.HMM", "TIGR00442.HMM", "TIGR00445.HMM", "TIGR00456.HMM", "TIGR00459.HMM", "TIGR00460.HMM",
+                                 "TIGR00468.HMM", "TIGR00472.HMM", "TIGR00487.HMM", "TIGR00496.HMM", "TIGR00539.HMM", "TIGR00580.HMM", "TIGR00593.HMM", "TIGR00615.HMM", "TIGR00631.HMM",
+                                 "TIGR00634.HMM", "TIGR00635.HMM", "TIGR00643.HMM", "TIGR00663.HMM", "TIGR00717.HMM", "TIGR00755.HMM", "TIGR00810.HMM", "TIGR00922.HMM", "TIGR00928.HMM",
+                                 "TIGR00959.HMM", "TIGR00963.HMM", "TIGR00964.HMM", "TIGR00967.HMM", "TIGR01009.HMM", "TIGR01011.HMM", "TIGR01017.HMM", "TIGR01021.HMM", "TIGR01029.HMM",
+                                 "TIGR01032.HMM", "TIGR01039.HMM", "TIGR01044.HMM", "TIGR01059.HMM", "TIGR01063.HMM", "TIGR01066.HMM", "TIGR01071.HMM", "TIGR01079.HMM", "TIGR01082.HMM",
+                                 "TIGR01087.HMM", "TIGR01128.HMM", "TIGR01146.HMM", "TIGR01164.HMM", "TIGR01169.HMM", "TIGR01171.HMM", "TIGR01302.HMM", "TIGR01391.HMM", "TIGR01393.HMM",
+                                 "TIGR01394.HMM", "TIGR01510.HMM", "TIGR01632.HMM", "TIGR01951.HMM", "TIGR01953.HMM", "TIGR02012.HMM", "TIGR02013.HMM", "TIGR02027.HMM", "TIGR02075.HMM",
+                                 "TIGR02191.HMM", "TIGR02273.HMM", "TIGR02350.HMM", "TIGR02386.HMM", "TIGR02397.HMM", "TIGR02432.HMM", "TIGR02729.HMM", "TIGR03263.HMM", "TIGR03594.HMM",
+                                 "TIGR03625.HMM", "TIGR03632.HMM", "TIGR03654.HMM", "TIGR03723.HMM", "TIGR03725.HMM", "TIGR03953.HMM"]}
+
+AR122_MARKERS = {"PFAM": ["PF01868.11.hmm", "PF01282.14.hmm", "PF01655.13.hmm", "PF01092.14.hmm", "PF01000.21.hmm", "PF00368.13.hmm", "PF00827.12.hmm", "PF01269.12.hmm", "PF00466.15.hmm",
+                             "PF01015.13.hmm", "PF13685.1.hmm", "PF02978.14.hmm", "PF04919.7.hmm", "PF01984.15.hmm", "PF04104.9.hmm", "PF00410.14.hmm", "PF01798.13.hmm", "PF01864.12.hmm",
+                             "PF01990.12.hmm", "PF07541.7.hmm", "PF04019.7.hmm", "PF00900.15.hmm", "PF01090.14.hmm", "PF02006.11.hmm", "PF01157.13.hmm", "PF01191.14.hmm", "PF01866.12.hmm",
+                             "PF01198.14.hmm", "PF01496.14.hmm", "PF00687.16.hmm", "PF03874.11.hmm", "PF01194.12.hmm", "PF01200.13.hmm", "PF13656.1.hmm", "PF01280.15.hmm"],
+                    "TIGRFAM": ["TIGR00468.HMM", "TIGR01060.HMM", "TIGR03627.HMM", "TIGR01020.HMM", "TIGR02258.HMM", "TIGR00293.HMM", "TIGR00389.HMM", "TIGR01012.HMM", "TIGR00490.HMM", "TIGR03677.HMM",
+                                "TIGR03636.HMM", "TIGR03722.HMM", "TIGR00458.HMM", "TIGR00291.HMM", "TIGR00670.HMM", "TIGR00064.HMM", "TIGR03629.HMM", "TIGR00021.HMM", "TIGR03672.HMM", "TIGR00111.HMM",
+                                "TIGR03684.HMM", "TIGR01077.HMM", "TIGR01213.HMM", "TIGR01080.HMM", "TIGR00501.HMM", "TIGR00729.HMM", "TIGR01038.HMM", "TIGR00270.HMM", "TIGR03628.HMM", "TIGR01028.HMM",
+                                "TIGR00521.HMM", "TIGR03671.HMM", "TIGR00240.HMM", "TIGR02390.HMM", "TIGR02338.HMM", "TIGR00037.HMM", "TIGR02076.HMM", "TIGR00335.HMM", "TIGR01025.HMM", "TIGR00471.HMM",
+                                "TIGR00336.HMM", "TIGR00522.HMM", "TIGR02153.HMM", "TIGR02651.HMM", "TIGR03674.HMM", "TIGR00323.HMM", "TIGR00134.HMM", "TIGR02236.HMM", "TIGR03683.HMM", "TIGR00491.HMM",
+                                "TIGR00658.HMM", "TIGR03680.HMM", "TIGR00392.HMM", "TIGR00422.HMM", "TIGR00279.HMM", "TIGR01052.HMM", "TIGR00442.HMM", "TIGR00308.HMM", "TIGR00398.HMM", "TIGR00456.HMM",
+                                "TIGR00549.HMM", "TIGR00408.HMM", "TIGR00432.HMM", "TIGR00264.HMM", "TIGR00982.HMM", "TIGR00324.HMM", "TIGR01952.HMM", "TIGR03626.HMM", "TIGR03670.HMM", "TIGR00337.HMM",
+                                "TIGR01046.HMM", "TIGR01018.HMM", "TIGR00936.HMM", "TIGR00463.HMM", "TIGR01309.HMM", "TIGR03653.HMM", "TIGR00042.HMM", "TIGR02389.HMM", "TIGR00307.HMM", "TIGR03673.HMM",
+                                "TIGR00373.HMM", "TIGR01008.HMM", "TIGR00283.HMM", "TIGR00425.HMM", "TIGR00405.HMM", "TIGR03665.HMM", "TIGR00448.HMM"]}
+
+RPS23_MARKERS = {"PFAM": ["PF00687.16.hmm","PF00466.15.hmm","PF00298.14.hmm","PF03946.9.hmm","PF00238.14.hmm",
+                            "PF00252.13.hmm","PF00861.17.hmm","PF00181.18.hmm","PF00237.14.hmm",
+                            "PF03947.13.hmm","PF00297.17.hmm","PF00573.17.hmm","PF00281.14.hmm",
+                            "PF00673.16.hmm","PF00338.17.hmm","PF00411.14.hmm","PF00416.17.hmm",
+                            "PF00312.17.hmm","PF00366.15.hmm","PF00203.16.hmm","PF00189.15.hmm",
+                            "PF00333.15.hmm","PF03719.10.hmm","PF00177.16.hmm","PF00410.14.hmm",
+                            "PF00380.14.hmm","PF00164.20.hmm"],
+                    "TIGRFAM": []}
+
+# Information for aligning genomes
+DEFAULT_DOMAIN_THRESHOLD = 10.0
+AR_MARKER_COUNT = 122
+BAC_MARKER_COUNT = 120
+
+# Annotation folder
+MARKER_GENE_DIR = "marker_genes"
+
+
+#MSA file names
+CONCAT_BAC120 = MSA_FOLDER + "gtdb_r83_bac120.faa"
+CONCAT_AR122 = MSA_FOLDER + "gtdb_r83_ar122.faa"
+
+#Taxonomy file name
+TAXONOMY_FILE = TAX_FOLDER + "gtdb_taxonomy.tsv"
+
+#Mask file names
+MASK_BAC120 = "gtdb_r83_bac120.mask"
+MASK_AR122 = "gtdb_r83_ar122.mask"
+MASK_RPS23 = "gtdb_r83_rps23.mask"
+
+
+#Pplacer configuration
+PPLACER_OUT = "pplacer.out"
+PPLACER_JSON_OUT = "pplacer.json"
+PPLACER_BAC120_REF_PKG = "gtdb_r83_bac120.refpkg"
+PPLACER_AR122_REF_PKG = "gtdb_r83_ar122.refpkg"
+PPLACER_RPS23_REF_PKG = "gtdb_r83_rps23.refpkg"
+
+#Fastani configuration
+FASTANI_SPECIES_THRESHOLD = 95.0
+FASTANI_GENOMES = FASTANI_DIR + "database/"
+FASTANI_GENOMES_EXT = "_genomic.fna"
+
+#Relative Evolution Distance
+RED_MIN_SUPPORT = 0.0
+RED_MIN_CHILDREN = 2
+RED_DIST_BAC_DICT = {"d__":0.00,"p__":0.325108639371,"c__":0.469996864411,"o__":0.634217579236,"f__":0.777062142455,"g__":0.93671712357}
+RED_DIST_ARC_DICT = {"d__":0.00,"p__":0.227534847987,"c__":0.353206001760,"o__":0.553905413445,"f__":0.732652342427,"g__":0.910973866402}
+
+
diff --git a/gtdbtk/external/pfam_search.py b/gtdbtk/external/pfam_search.py
index b3a829c3..03d6ba57 100644
--- a/gtdbtk/external/pfam_search.py
+++ b/gtdbtk/external/pfam_search.py
@@ -101,34 +101,39 @@ def _topHit(self, pfam_file):
 
     def _workerThread(self, queueIn, queueOut):
         """Process each data item in parallel."""
-        while True:
-            gene_file = queueIn.get(block=True, timeout=None)
-            if gene_file is None:
-                break
-                
-            genome_dir, filename = os.path.split(gene_file)
-            genome_id = filename.replace(self.protein_file_suffix, '')
-            output_hit_file = os.path.join(self.output_dir, genome_id, filename.replace(self.protein_file_suffix,
-                                                                                        self.pfam_suffix))
-            dir_path = os.path.dirname(os.path.realpath(__file__))
-            pfam_search_script = os.path.join(dir_path, 'pfam_search.pl')
-            cmd = '%s -outfile %s -cpu %d -fasta %s -dir %s' % (pfam_search_script,
-                                                                    output_hit_file,
-                                                                    self.cpus_per_genome,
-                                                                    gene_file,
-                                                                    self.pfam_hmm_dir)
-            os.system(cmd)
-
-            # calculate checksum
-            checksum = sha256(output_hit_file)
-            fout = open(output_hit_file + self.checksum_suffix, 'w')
-            fout.write(checksum)
-            fout.close()
-
-            # identify top hit for each gene
-            self._topHit(output_hit_file)
-
-            queueOut.put(gene_file)
+        try:
+            while True:
+                gene_file = queueIn.get(block=True, timeout=None)
+                if gene_file is None:
+                    break
+                    
+                genome_dir, filename = os.path.split(gene_file)
+                genome_id = filename.replace(self.protein_file_suffix, '')
+                output_hit_file = os.path.join(self.output_dir, genome_id, filename.replace(self.protein_file_suffix,
+                                                                                            self.pfam_suffix))
+                dir_path = os.path.dirname(os.path.realpath(__file__))
+                pfam_search_script = os.path.join(dir_path, 'pfam_search.pl')
+                cmd = '%s -outfile %s -cpu %d -fasta %s -dir %s' % (pfam_search_script,
+                                                                        output_hit_file,
+                                                                        self.cpus_per_genome,
+                                                                        gene_file,
+                                                                        self.pfam_hmm_dir)
+                osexitcode = os.system(cmd)
+                if osexitcode == 1:
+                    raise RuntimeError("Pfam_search has crashed")
+    
+                # calculate checksum
+                checksum = sha256(output_hit_file)
+                fout = open(output_hit_file + self.checksum_suffix, 'w')
+                fout.write(checksum)
+                fout.close()
+    
+                # identify top hit for each gene
+                self._topHit(output_hit_file)
+    
+                queueOut.put(gene_file)
+        except Exception as error:
+            raise error
 
     def _writerThread(self, numDataItems, writerQueue):
         """Store or write results of worker threads in a single thread."""
@@ -179,6 +184,9 @@ def run(self, gene_files):
 
             for p in workerProc:
                 p.join()
+                if p.exitcode == 1:
+                    raise ValueError("Pfam Error")
+                
 
             writerQueue.put(None)
             writeProc.join()
diff --git a/gtdbtk/main.py b/gtdbtk/main.py
index 266b30a5..80f80443 100644
--- a/gtdbtk/main.py
+++ b/gtdbtk/main.py
@@ -105,8 +105,8 @@ def _genomes_to_process(self, genome_dir, batchfile, extension):
                 genomic_files[genome_id] = genome_file
 
         for genome_key in genomic_files.iterkeys():
-            if genome_key.startswith("RS_") or genome_key.startswith("GB_"):
-                self.logger.error("Submitted genomes start with the same prefix (RS_,GB_) as reference genomes in GTDB-Tk. This will cause issues for downstream analysis.") 
+            if genome_key.startswith("RS_") or genome_key.startswith("GB_") or genome_key.startswith("UBA"):
+                self.logger.error("Submitted genomes start with the same prefix (RS_,GB_,UBA) as reference genomes in GTDB-Tk. This will cause issues for downstream analysis.") 
                 sys.exit(-1)
                 
         if len(genomic_files) == 0:
@@ -190,7 +190,7 @@ def infer(self, options):
         self.logger.info('Inferring tree with FastTree using %s+GAMMA.' % options.prot_model)
         fasttree = FastTree(multithreaded=(options.cpus > 1))
 
-        tree_unrooted_output = os.path.join(options.out_dir, options.prefix +options.suffix+ '.unrooted.tree')
+        tree_unrooted_output = os.path.join(options.out_dir, options.prefix +'.unrooted.tree')
         tree_log = os.path.join(options.out_dir, options.prefix + '.tree.log')
         tree_output_log = os.path.join(options.out_dir, 'fasttree.log')
         fasttree.run(options.msa_file, 
@@ -214,7 +214,8 @@ def classify(self, options):
         classify.run(genomes,
                         options.align_dir,
                         options.out_dir,
-                        options.prefix)
+                        options.prefix,
+                        options.debug)
         
         self.logger.info('Done.')