diff --git a/bin/gtdb_migration_tk b/bin/gtdb_migration_tk
index f745064..e5e67a2 100755
--- a/bin/gtdb_migration_tk
+++ b/bin/gtdb_migration_tk
@@ -55,12 +55,18 @@ def print_help():
           version() + ' :::...''')
     print('''\
 
-    
+    NCBI folder to GTDB folder:
+      update_refseq  -> Update Refseq genomes.  
+      update_genbank -> Update Genbank genomes.
+      
     Information from Nomenclatural resources:
       lpsn     -> Process steps for LPSN.
       bacdive  -> Process steps for BacDive. [In Dev]
       strains  -> Set of tools to combined information from LPSN,DSMZ and Straininfo.
       
+    Miscellaneous commands:
+      list_genomes -> Produce file indicating the directory of each genome.
+      
     Test suite for data validation:
       overview      -> Compare the Metadata file from the previous version with the new one.
       compare_field -> Compare a specific metadata field between to metadata files. 
@@ -79,6 +85,56 @@ if __name__ == '__main__':
     parser = argparse.ArgumentParser(add_help=False)
     subparsers = parser.add_subparsers(help="--", dest='subparser_name')
 
+    # Misc commands
+    genome_dir_parser = subparsers.add_parser(
+        'list_genomes', formatter_class=CustomHelpFormatter, description='Produce file indicating the directory of each genome.')
+    genome_dir_parser.add_argument(
+        'genome_dir', help='base directory leading to NCBI archaeal and bacterial genome assemblies')
+    genome_dir_parser.add_argument('output_file', help='output metadata file')
+    genome_dir_parser.add_argument(
+        '--silent', help="suppress output", action='store_true')
+
+    # Steps to move NCBI genomes to GTDB folders
+    update_rfq_parser = subparsers.add_parser(
+        'update_refseq', formatter_class=CustomHelpFormatter, description='Update Refseq genomes.')
+    update_rfq_parser.add_argument('--ftp_refseq_directory', dest="ftp_refseq", required=True,
+                                   help='base directory leading the the FTP repository for refseq')
+    update_rfq_parser.add_argument('--new_refseq_directory', dest="output_dir",
+                                   required=True, help='base directory leading the new repository for refseq')
+    update_rfq_parser.add_argument('--ftp_genome_dirs_file', dest="ftp_genome_dirs", required=True,
+                                   help='metadata file listing all directories for the FTP folder (generated by genome_dirs.py)')
+    update_rfq_parser.add_argument('--old_genome_dirs_file', dest="old_genome_dirs", required=True,
+                                   help='metadata file listing all directories from the previous NCBI update date  (generated by genome_dirs.py)')
+    update_rfq_parser.add_argument('--arc_assembly_summary', required=True,
+                                   help='metadata file downloaded from ncbi.')
+    update_rfq_parser.add_argument('--bac_assembly_summary', required=True,
+                                   help='metadata file downloaded from ncbi.')
+    update_rfq_parser.add_argument('--cpus', type=int, default=1,
+                                   help='Number of cpus')
+    update_rfq_parser.add_argument(
+        '--silent', help="suppress output", action='store_true')
+
+    update_gbk_parser = subparsers.add_parser(
+        'update_genbank', formatter_class=CustomHelpFormatter, description='Update Genbank genomes.')
+    update_gbk_parser.add_argument('--ftp_genbank_directory', dest="ftp_genbank", required=True,
+                                   help='base directory leading the the FTP repository for genbank')
+    update_gbk_parser.add_argument('--new_genbank_directory', dest="output_dir",
+                                   required=True, help='base directory leading the new repository for genbank')
+    update_gbk_parser.add_argument('--ftp_genbank_genome_dirs_file', dest="ftp_genbank_genome_dirs", required=True,
+                                   help='metadata file listing all directories for the FTP folder (generated by ncbi_genome_dirs.py)')
+    update_gbk_parser.add_argument('--old_genbank_genome_dirs_file', dest="old_genbank_genome_dirs", required=True,
+                                   help='metadata file listing all directories from the previous NCBI update date  (generated by genome_dirs.py)')
+    update_gbk_parser.add_argument('--new_refseq_genome_dirs_file', dest="new_refseq_genome_dirs", required=True,
+                                   help='metadata file listing all directories from the previous NCBI update date  (generated by genome_dirs.py)')
+    update_gbk_parser.add_argument('--arc_assembly_summary', required=True,
+                                   help='Genbank metadata file downloaded from ncbi.')
+    update_gbk_parser.add_argument('--bac_assembly_summary', required=True,
+                                   help='Genbank metadata file downloaded from ncbi.')
+    update_gbk_parser.add_argument('--cpus', type=int, default=1,
+                                   help='Number of cpus')
+    update_gbk_parser.add_argument(
+        '--silent', help="suppress output", action='store_true')
+
     # Steps to update LPSN Metadata
     lpsn_parser = subparsers.add_parser('lpsn',
                                         formatter_class=CustomHelpFormatter,
@@ -192,25 +248,25 @@ if __name__ == '__main__':
                                             formatter_class=CustomHelpFormatter,
                                             help='Compare the Metadata file from the previous version with the new one.')
     overview_parser.add_argument(
-        '--previous_metadata_file', help='file indicating metadata of each genome in previous GTDB version.',required=True)
+        '--previous_metadata_file', help='file indicating metadata of each genome in previous GTDB version.', required=True)
     overview_parser.add_argument(
-        '--new_metadata_file', help='file indicating metadata of each genome in latest GTDB version.',required=True)
+        '--new_metadata_file', help='file indicating metadata of each genome in latest GTDB version.', required=True)
     overview_parser.add_argument(
         '--only_ncbi', help='Output file.', action='store_true')
     overview_parser.add_argument(
         '--silent', help="suppress output", action='store_true')
-        
+
     metafield_parser = subparsers.add_parser('compare_field',
-                                            formatter_class=CustomHelpFormatter,
-                                            help='Compare a specific metadata field between to metadata files.')
+                                             formatter_class=CustomHelpFormatter,
+                                             help='Compare a specific metadata field between to metadata files.')
     metafield_parser.add_argument(
-        '--previous_metadata_file', help='file indicating metadata of each genome in previous GTDB version.',required=True)
+        '--previous_metadata_file', help='file indicating metadata of each genome in previous GTDB version.', required=True)
     metafield_parser.add_argument(
-        '--new_metadata_file', help='file indicating metadata of each genome in latest GTDB version.',required=True)
+        '--new_metadata_file', help='file indicating metadata of each genome in latest GTDB version.', required=True)
     metafield_parser.add_argument(
-        '--field_of_interest', help='common field to compare between files.',required=True)
+        '--field_of_interest', help='common field to compare between files.', required=True)
     metafield_parser.add_argument(
-        '--output_file', help='Output file.',required=True)
+        '--output_file', help='Output file.', required=True)
     metafield_parser.add_argument(
         '--only_ncbi', help='Output file.', action='store_true')
     metafield_parser.add_argument(
diff --git a/gtdb_migration_tk/ftp_manager.py b/gtdb_migration_tk/ftp_manager.py
new file mode 100644
index 0000000..7232ee0
--- /dev/null
+++ b/gtdb_migration_tk/ftp_manager.py
@@ -0,0 +1,307 @@
+###############################################################################
+#                                                                             #
+#    This program is free software: you can redistribute it and/or modify     #
+#    it under the terms of the GNU General Public License as published by     #
+#    the Free Software Foundation, either version 3 of the License, or        #
+#    (at your option) any later version.                                      #
+#                                                                             #
+#    This program is distributed in the hope that it will be useful,          #
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of           #
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the            #
+#    GNU General Public License for more details.                             #
+#                                                                             #
+#    You should have received a copy of the GNU General Public License        #
+#    along with this program. If not, see <http://www.gnu.org/licenses/>.     #
+#                                                                             #
+###############################################################################
+
+
+import os
+import shutil
+import hashlib
+import glob
+import gzip
+import sys
+import argparse
+import tempfile
+from datetime import datetime
+import multiprocessing as mp
+import logging
+from gtdb_migration_tk.ftp_manager_tools import FTPTools
+
+
+class RefSeqManager(object):
+
+    def __init__(self, new_refseq_folder, cpus):
+
+        self.domains = ["archaea", "bacteria"]
+        self.genome_domain_dict = {}
+
+        self.threads = cpus
+
+        self.genomic_ext = "_genomic.fna"
+        self.protein_ext = "_protein.faa"
+        self.cds_ext = "_cds_from_genomic.fna"
+        self.rna_ext = "_rna_from_genomic.fna"
+
+        self.fastaExts = (self.genomic_ext, self.protein_ext)
+        self.extrafastaExts = (self.cds_ext, self.rna_ext)
+
+        self.extensions = ("_feature_table.txt", "_genomic.gbff",
+                           "_genomic.gff", "_protein.gpff", "_wgsmaster.gbff")
+        self.reports = ("_assembly_report.txt",
+                        "_assembly_stats.txt", "_hashes.txt")
+        self.allExts = self.fastaExts + self.extensions + self.reports
+        self.allbutFasta = self.extensions + self.reports
+
+        self.report_gcf = open(os.path.join(
+            new_refseq_folder, "report_gcf.log"), "w", 1)
+        self.genomes_to_review = open(os.path.join(
+            new_refseq_folder, "gid_to_review.log"), "w", 1)
+
+    def runComparison(self, ftp_refseq, new_refseq, ftp_genome_dirs, old_genome_dirs, archaea_assembly_summary, bacteria_assembly_summary):
+        '''
+        runComparison function is walking across all directories recursively
+        only folder containing latest_assembly_versions but not containing _assembly_structure
+        are of interest
+        '''
+
+        # old_dict lists all records from the previous gtdb update
+        with open(old_genome_dirs, 'r') as old_file:
+            old_dict = {old_line.split("\t")[0]: old_line.split("\t")[1].strip()
+                        for old_line in old_file}
+        print("{}:old_dict loaded".format(
+            str(datetime.now())))
+
+        # This is a one of for the transtion from the old directory structure
+        # to the new one
+        with open(old_genome_dirs, 'r') as old_file:
+            old_dict_domain = {old_line.split("\t")[0]: old_line.split("\t")[1].strip().split("/")[8]
+                               for old_line in old_file}
+        print("{}:old_dict_domain loaded ({} records)".format(
+            str(datetime.now()), len(old_dict_domain)))
+
+        # new list list all records from the ftp folder and considered as
+        # latest
+        new_list = []
+        with open(archaea_assembly_summary, 'r') as ftp_assembly_summary_file:
+            ftp_assembly_summary_file.readline()
+            new_list = [new_line.split(
+                "\t")[0] for new_line in ftp_assembly_summary_file if new_line.split("\t")[10] == "latest"]
+        self.genome_domain_dict = {arcid: "Archaea" for arcid in new_list}
+        with open(bacteria_assembly_summary, 'r') as ftp_assembly_summary_file:
+            ftp_assembly_summary_file.readline()
+            bacterial_new_list = [new_line.split(
+                "\t")[0] for new_line in ftp_assembly_summary_file if new_line.split("\t")[10] == "latest"]
+        for bacid in bacterial_new_list:
+            self.genome_domain_dict[bacid] = "Bacteria"
+        new_list.extend(bacterial_new_list)
+        print("{}:new_list loaded ({} records).".format(
+            str(datetime.now()), len(new_list)))
+
+        # new dict lists all records from FTP which are in new_list
+        new_dict = {}
+        cter = 0
+        print("loading new_dict.....")
+        with open(ftp_genome_dirs, 'r') as new_genome_dirs_file:
+            for new_line in new_genome_dirs_file:
+                cter += 1
+                # print('{}'.format(cter), end='\r')
+                new_line_split = new_line.split("\t")
+                if new_line_split[0].startswith("GCF") and new_line_split[0] in new_list:
+                    new_dict[new_line_split[0]] = new_line_split[1].strip()
+
+        print("{}:new_dict loaded ({} records).".format(
+            str(datetime.now()), len(new_dict)))
+
+        ftptools = FTPTools(
+            self.report, self.genomes_to_review, self.genome_domain_dict)
+
+        # delete genomes from the Database
+        removed_dict = {removed_key: old_dict[removed_key] for removed_key in list(
+            set(old_dict.keys()) - set(new_dict.keys()))}
+        ftptools.removeGenomes(removed_dict, old_dict_domain)
+
+        # new genomes in FTP
+        added_dict = {added_key: new_dict[added_key] for added_key in list(
+            set(new_dict.keys()) - set(old_dict.keys()))}
+        ftptools.addGenomes(added_dict, ftp_refseq, new_refseq,
+                            self.report_gcf, self.genome_domain_dict)
+
+        # delete genomes from the Database
+        removed_dict = {removed_key: old_dict[removed_key] for removed_key in list(
+            set(old_dict.keys()) - set(new_dict.keys()))}
+        ftptools.removeGenomes(removed_dict, old_dict_domain)
+
+        print("{}:Generating intersection list.....".format(str(datetime.now())))
+        intersect_list = list(
+            set(old_dict.keys()).intersection(set(new_dict.keys())))
+        print("{}:Intersection list:{} genomes".format(
+            str(datetime.now()), len(intersect_list)))
+        self.compareGenomes(
+            intersect_list, old_dict, new_dict, ftp_refseq, new_refseq, self.threads)
+        self.report_gcf.close()
+        self.genomes_to_review.close()
+
+
+class GenBankManager(object):
+
+    def __init__(self, new_genbank_folder, cpus):
+        self.domains = ["archaea", "bacteria"]
+        self.genome_domain_dict = {}
+
+        self.genomic_ext = "_genomic.fna.gz"
+        self.protein_ext = "_protein.faa.gz"
+        self.cds_ext = "_cds_from_genomic.fna.gz"
+        self.rna_ext = "_rna_from_genomic.fna.gz"
+
+        self.fastaExts = (self.genomic_ext, self.protein_ext)
+        self.extrafastaExts = (self.cds_ext, self.rna_ext)
+
+        self.extensions = ("_feature_table.txt.gz", "_genomic.gbff.gz",
+                           "_genomic.gff.gz", "_protein.gpff.gz", "_wgsmaster.gbff.gz")
+        self.reports = ("_assembly_report.txt",
+                        "_assembly_stats.txt", "_hashes.txt")
+        self.allExts = self.fastaExts + self.extensions + self.reports
+        self.allbutFasta = self.extensions + self.reports
+
+        self.threads = cpus
+
+        self.logger = logging.getLogger('timestamp')
+
+        self.report = open(os.path.join(new_genbank_folder,
+                                        "extra_gbk_report_gcf.log"), "w")
+        self.genomes_to_review = open(os.path.join(
+            new_genbank_folder, "gcaid_to_review.log"), "w", 1)
+        self.select_gca = open(os.path.join(
+            new_genbank_folder, "gca_selection.log"), "w")
+
+    def runComparison(self, ftp_genbank, new_genbank, ftp_genbank_genome_dirs, old_genbank_genome_dirs, new_refseq_genome_dirs, gbk_arc_assembly, gbk_bac_assembly):
+        '''
+        runComparison function is walking across all directories recursively
+        only folder containing latest_assembly_versions but not containing _assembly_structure
+        are of interest
+        '''
+
+        # old_dict lists all records from the previous gtdb update
+        with open(old_genbank_genome_dirs, 'r') as old_file:
+            old_dict = {old_line.split("\t")[0]: old_line.split("\t")[1].strip()
+                        for old_line in old_file}
+        self.logger.info("{}:old_dict loaded".format(str(datetime.now())))
+
+        # This is a one of for the transition from the old directory structure
+        # to the new one
+        with open(old_genbank_genome_dirs, 'r') as old_file:
+            old_dict_domain = {old_line.split("\t")[0]: old_line.split("\t")[1].strip().split("/")[8]
+                               for old_line in old_file}
+        self.logger.info(
+            "{}:old_dict_domain loaded".format(str(datetime.now())))
+
+        listGCA = self.parseAssemblySummary(
+            gbk_arc_assembly, gbk_bac_assembly, new_refseq_genome_dirs)
+        # new dict lists all records from FTP which are in new_list
+        new_dict = {}
+        num_lines = sum(1 for line in open(ftp_genbank_genome_dirs))
+        processedItems = 0
+        with open(ftp_genbank_genome_dirs, 'r') as new_genome_dirs_file:
+            for new_line in new_genome_dirs_file:
+                processedItems += 1
+                statusStr = 'New Dictionary = Finished processing %d of %d (%.2f%%) gca records.' % (
+                    processedItems, num_lines, float(processedItems) * 100 / num_lines)
+                sys.stdout.write('%s\r' % statusStr)
+                sys.stdout.flush()
+                new_line_split = new_line.split("\t")
+                if new_line_split[0].startswith("GCA") and new_line_split[0] in listGCA:
+                    new_dict[new_line_split[0]] = new_line_split[1].strip()
+        sys.stdout.write('\n')
+        self.logger.info("{}:new_dict loaded".format(str(datetime.now())))
+
+        ftptools = FTPTools(
+            self.report, self.genomes_to_review, self.genome_domain_dict)
+
+        # delete genomes from the Database
+        removed_dict = {removed_key: old_dict[removed_key] for removed_key in list(
+            set(old_dict.keys()) - set(new_dict.keys()))}
+        self.logger.info("{0} genomes to remove".format(len(removed_dict)))
+        ftptools.removeGenomes(removed_dict, old_dict_domain)
+
+        # new genomes in FTP
+        added_dict = {added_key: new_dict[added_key] for added_key in list(
+            set(new_dict.keys()) - set(old_dict.keys()))}
+        self.logger.info("{0} genomes to add".format(len(added_dict)))
+        ftptools.addGenomes(added_dict, ftp_genbank, new_genbank)
+
+        self.logger.info(
+            "{}:Generating intersection list.....".format(str(datetime.now())))
+        intersect_list = list(
+            set(old_dict.keys()).intersection(set(new_dict.keys())))
+        self.logger.info("{}:Intersection list:{} genomes".format(
+            str(datetime.now()), len(intersect_list)))
+        ftptools.compareGenomes(
+            intersect_list, old_dict, new_dict, ftp_genbank, new_genbank, self.threads)
+        self.select_gca.close()
+        self.report.close()
+        self.genomes_to_review.close()
+
+
+# Tools
+
+    def parseAssemblySummary(self, gbk_arc_assembly, gbk_bac_assembly, new_refseq_genome_dirs):
+        listGCA = []
+        dictGCF = self._populateGenomesDict(new_refseq_genome_dirs)
+        self.logger.info("parsing of dictionary is done.")
+
+        for domain, assemblyfile in [('archaea', gbk_arc_assembly), ('bacteria', gbk_bac_assembly)]:
+            num_lines = sum(1 for line in open(assemblyfile))
+            processedItems = 0
+            with open(assemblyfile, "r") as sumf:
+
+                # we discard the first line
+                sumf.readline()
+                for line in sumf:
+                    processedItems += 1
+                    statusStr = 'Finished processing %d of %d (%.2f%%) gca records.' % (
+                        processedItems, num_lines, float(processedItems) * 100 / num_lines)
+                    sys.stdout.write('%s\r' % statusStr)
+                    sys.stdout.flush()
+                    split_line = line.split("\t")
+                    gcf_access = 'G' + split_line[17][4:13]
+                    full_gca_access = split_line[0]
+                    latest = split_line[10]
+                    surveillance_info = split_line[20]
+                    if 'surveillance' in surveillance_info:
+                        continue
+                    elif latest == "latest":
+                        if not gcf_access.startswith("GCF"):
+                            formatted_gcaid = 'G' + full_gca_access[4:13]
+                            if formatted_gcaid in dictGCF:
+                                self.select_gca.write("{0} skipped because {1} in Refseq (although {0} has no GCF)\n".format(
+                                    full_gca_access, gcf_access))
+                                continue
+                            else:
+                                listGCA.append(full_gca_access)
+                        else:
+                            # if the Refseq folder is empty, we copy the genbank
+                            # folder
+                            if gcf_access in dictGCF:
+                                protein_files = glob.glob(
+                                    os.path.join(dictGCF.get(gcf_access), "*_protein.faa"))
+                                if len(protein_files) == 0:
+                                    self.select_gca.write(
+                                        "{0} associated with {1} : {1} missed files in FTP folder\n".format(full_gca_access, gcf_access))
+                                    listGCA.append(full_gca_access)
+                            else:
+                                self.select_gca.write(
+                                    "{0} associated with {1} : {1} not present in FTP folder\n".format(full_gca_access, gcf_access))
+                                listGCA.append(full_gca_access)
+                    self.genome_domain_dict[full_gca_access] = domain
+        sys.stdout.write('\n')
+        return listGCA
+
+    def _populateGenomesDict(self, genome_dirs_file):
+        temp_dict = {}
+        with open(genome_dirs_file, "r") as list_dirs:
+            for line in list_dirs:
+                temp_dict['G' + line.split("\t")[0][4:13]] = line.split(
+                    "\t")[1].rstrip()
+        return temp_dict
diff --git a/gtdb_migration_tk/ftp_manager_tools.py b/gtdb_migration_tk/ftp_manager_tools.py
new file mode 100644
index 0000000..1aad180
--- /dev/null
+++ b/gtdb_migration_tk/ftp_manager_tools.py
@@ -0,0 +1,378 @@
+###############################################################################
+#                                                                             #
+#    This program is free software: you can redistribute it and/or modify     #
+#    it under the terms of the GNU General Public License as published by     #
+#    the Free Software Foundation, either version 3 of the License, or        #
+#    (at your option) any later version.                                      #
+#                                                                             #
+#    This program is distributed in the hope that it will be useful,          #
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of           #
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the            #
+#    GNU General Public License for more details.                             #
+#                                                                             #
+#    You should have received a copy of the GNU General Public License        #
+#    along with this program. If not, see <http://www.gnu.org/licenses/>.     #
+#                                                                             #
+###############################################################################
+
+
+import os
+import shutil
+import hashlib
+import glob
+import gzip
+import sys
+import argparse
+import tempfile
+from datetime import datetime
+import multiprocessing as mp
+import logging
+
+
+class FTPTools(object):
+    def __init__(self, report, genomes_to_review, genome_domain_dict):
+        self.reports = ("_assembly_report.txt",
+                        "_assembly_stats.txt", "_hashes.txt")
+
+        self.genomic_ext = "_genomic.fna.gz"
+        self.protein_ext = "_protein.faa.gz"
+        self.cds_ext = "_cds_from_genomic.fna.gz"
+        self.rna_ext = "_rna_from_genomic.fna.gz"
+
+        self.extensions = ("_feature_table.txt.gz", "_genomic.gbff.gz",
+                           "_genomic.gff.gz", "_protein.gpff.gz", "_wgsmaster.gbff.gz")
+
+        self.fastaExts = (self.genomic_ext, self.protein_ext)
+        self.extrafastaExts = (self.cds_ext, self.rna_ext)
+        self.allExts = self.fastaExts + self.extensions + self.reports
+        self.allbutFasta = self.extensions + self.reports
+        self.report = report
+        self.genomes_to_review = genomes_to_review
+        self.genome_domain_dict = genome_domain_dict
+
+    def rreplace(self, s, old, new, occurrence):
+        '''
+         Instead of using the normal replace function, we need to implement our own.
+         Some folder are named with a .gz in the middle so we only need to replace the last .gz in the string name
+        :param s:
+        :param old:
+        :param new:
+        :param occurrence:
+        '''
+        li = s.rsplit(old, occurrence)
+        return new.join(li)
+
+    def compareGenomes(self, intersect_list, old_dict, new_dict, ftp_directory, new_directory, threads):
+        '''
+        compare the genomes existing in both folders ( FTP folder and previous gtdb update).
+
+        :param intersect_list:
+        :param old_dict:
+        :param new_dict:
+        '''
+
+        # populate worker queue with data to process
+        workerQueue = mp.Queue()
+        writerQueue = mp.Queue()
+
+        for gca_record in intersect_list:
+            gtdb_dir = old_dict.get(gca_record)
+            ftp_dir = new_dict.get(gca_record)
+            target_dir = os.path.join(
+                new_directory, ftp_dir.replace(ftp_directory, ''))
+            workerQueue.put((gtdb_dir, ftp_dir, target_dir,
+                             gca_record))
+
+        for _ in range(threads):
+            workerQueue.put((None, None, None, None))
+
+        try:
+            workerProc = [mp.Process(target=self.__workerThread, args=(
+                workerQueue, writerQueue)) for _ in range(threads)]
+            writeProc = mp.Process(target=self.__writerThread, args=(
+                len(intersect_list), writerQueue))
+
+            writeProc.start()
+
+            for p in workerProc:
+                p.start()
+
+            for p in workerProc:
+                p.join()
+
+            writerQueue.put(None)
+            writeProc.join()
+        except:
+            for p in workerProc:
+                p.terminate()
+
+            writeProc.terminate()
+
+    def __workerThread(self, queueIn, queueOut):
+        """Process each data item in parallel."""
+        while True:
+            gtdb_dir, ftp_dir, target_dir, gca_record = queueIn.get(
+                block=True, timeout=None)
+            if gca_record is None:
+                break
+
+            status_gca = self.readmd5Checksum(
+                gtdb_dir, ftp_dir, target_dir, gca_record)
+            queueOut.put(status_gca)
+
+    def __writerThread(self, numgenometoprocess, writerQueue):
+        """Store or write results of worker threads in a single thread."""
+        processedItems = 0
+        while True:
+            a = writerQueue.get(block=True, timeout=None)
+            if a is None:
+                break
+            self.report.write(a)
+
+            processedItems += 1
+            statusStr = 'Finished processing %d of %d (%.2f%%) genome pairs.' % (
+                processedItems, numgenometoprocess, float(processedItems) * 100 / numgenometoprocess)
+            sys.stdout.write('%s\r' % statusStr)
+            sys.stdout.flush()
+
+        sys.stdout.write('\n')
+
+    def addGenomes(self, added_dict, ftp_dir, new_directory, genome_domain_dict):
+        '''
+        addGenomes function insert new genomes in the GTDB database. New genomes are present in the FTP folder
+        but not in the previous version of GTDB.
+
+        :TODO: Check if the new genome is a new version of an existing genome. in that case we overwrite the previous one
+        and keep the same database id
+        This will cause a conflict with the removeGenomes function.
+
+
+        :param added_dict: dictionary of genomes to be added (genome_id:path to genome)
+        :param ftp_dir: base directory leading the the FTP repository for refseq
+        :param new_directory:base directory leading the new repository for refseq
+        '''
+
+        processedItems = 0
+        for gcf_record in added_dict:
+            processedItems += 1
+            statusStr = 'Finished processing %d of %d (%.2f%%) genome to add.' % (
+                processedItems, len(added_dict), float(processedItems) * 100 / len(added_dict))
+            sys.stdout.write('%s\r' % statusStr)
+            sys.stdout.flush()
+
+            target_dir = os.path.join(
+                new_directory, added_dict[gcf_record].replace(ftp_dir, ''))
+            shutil.copytree(added_dict[
+                            gcf_record], target_dir, ignore=shutil.ignore_patterns("*_assembly_structure"))
+            self.report.write(
+                "{0}\t{1}\tnew\n".format(genome_domain_dict.get(gcf_record).upper(), gcf_record))
+            for compressed_file in glob.glob(target_dir + "/*.gz"):
+                if not os.path.isdir(compressed_file):
+                    inF = gzip.open(compressed_file, 'rb')
+                    outF = open(
+                        self.rreplace(compressed_file, ".gz", "", 1), 'wb')
+                    outF.write(inF.read())
+                    inF.close()
+                    outF.close()
+                    os.remove(compressed_file)
+        sys.stdout.write('\n')
+
+    def removeGenomes(self, removed_dict, old_dict_domain):
+        '''
+        removeGenomes function removes all outdated genomes from the gtdb database
+        In addition it tracks the lists(name and owner) that have been modified while deleting those genomes
+
+        :param removed_dict: dictionary of genomes to delete
+        '''
+
+        for gca_record in removed_dict:
+            self.report.write(
+                "{0}\t{1}\tremoved\n".format(old_dict_domain.get(gca_record).upper(), gca_record))
+
+    def readmd5Checksum(self, gtdb_dir, ftp_dir, target_dir, genome_record):
+        '''
+        Compare the checksum of the file listed in the checksums.txt
+        '''
+        pathftpmd5 = os.path.join(ftp_dir, "md5checksums.txt")
+        pathgtdbmd5 = os.path.join(gtdb_dir, "md5checksums.txt")
+        target_pathnewmd5 = os.path.join(target_dir, "md5checksums.txt")
+        status = []
+
+        tmp_ftp_dir = tempfile.mkdtemp()
+        tmp_target = os.path.join(tmp_ftp_dir, os.path.basename(target_dir))
+        shutil.copytree(ftp_dir, tmp_target, symlinks=True,
+                        ignore=shutil.ignore_patterns("*_assembly_structure"))
+        for compressed_file in glob.glob(tmp_target + "/*.gz"):
+            if os.path.isdir(compressed_file) is False:
+                inF = gzip.open(compressed_file, 'rb')
+                try:
+                    outF = open(
+                        self.rreplace(compressed_file, ".gz", "", 1), 'wb')
+                except IOError:
+                    os.chmod(
+                        self.rreplace(compressed_file, ".gz", "", 1), 0o775)
+                    outF = open(
+                        self.rreplace(compressed_file, ".gz", "", 1), 'wb')
+                outF.write(inF.read())
+                inF.close()
+                outF.close()
+                os.remove(compressed_file)
+
+        ftpdict, ftpdict_fasta, ftpdict_faa, ftpdict_extra_fasta = self.parse_checksum(
+            tmp_target)
+        gtdbdict, gtdbdict_fasta, gtdbdict_faa, gtdbdict_extra_fasta = self.parse_checksum(
+            gtdb_dir)
+
+        # if the genomic.fna.gz or the protein.faa.gz are missing, we set this
+        # record as incomplete
+        if len(list(set(ftpdict_fasta.keys()).symmetric_difference(set(gtdbdict_fasta.keys())))) > 0:
+            self.genomes_to_review.write("tmp_target:{}\nftpdict_fasta.keys():{}\ngtdb_dir:{}\ngtdbdict_fasta.keys():{}\n\n".format(tmp_target,
+                                                                                                                                    ftpdict_fasta.keys(),
+                                                                                                                                    gtdb_dir,
+                                                                                                                                    gtdbdict_fasta.keys()))
+            status.append("incomplete")
+            shutil.copytree(tmp_target, target_dir, symlinks=True,
+                            ignore=shutil.ignore_patterns("*_assembly_structure"))
+            # we unzip of gz file
+
+        else:
+            ftp_folder = False
+            # check if genomic.fna.gz and protein.faa.gz are similar between
+            # previous gtdb and ftp
+            for key, value in ftpdict_fasta.items():
+                if value != gtdbdict_fasta.get(key):
+                    ftp_folder = True
+
+            # if one of the 2 files is different than the previous version , we
+            # use the ftp record over the previous gtdb one , we then need to
+            # re run the metadata generation
+            if ftp_folder:
+                shutil.copytree(
+                    tmp_target, target_dir, symlinks=True,
+                    ignore=shutil.ignore_patterns("*_assembly_structure"))
+                status.append("modified")
+
+            else:
+                # The 2 main fasta files haven't changed so we can copy the old
+                # gtdb folder over
+                shutil.copytree(
+                    gtdb_dir, target_dir, symlinks=True,
+                    ignore=shutil.ignore_patterns("*_assembly_structure"))
+                status.append("unmodified")
+
+                # We check if all other file of this folder are the same.
+                checksum_changed = False
+
+                for key, value in ftpdict_faa.items():
+                    if value != gtdbdict_faa.get(key):
+                        checksum_changed = True
+                        shutil.copy2(
+                            os.path.join(tmp_target, key), os.path.join(target_dir, key))
+                        status.append("new_protein")
+
+                for key, value in ftpdict.items():
+                    if value != gtdbdict.get(key):
+                        checksum_changed = True
+                        shutil.copy2(
+                            os.path.join(tmp_target, key), os.path.join(target_dir, key))
+                        status.append("new_metadata")
+
+                for key, value in ftpdict_extra_fasta.items():
+                    if value != gtdbdict_extra_fasta.get(key):
+                        checksum_changed = True
+                        shutil.copy2(
+                            os.path.join(tmp_target, key), os.path.join(target_dir, key))
+                        status.append("new_cds_rna")
+                # we copy the new checksum
+                if checksum_changed:
+                    try:
+                        shutil.copy2(pathgtdbmd5, target_pathnewmd5)
+                    except IOError:
+                        os.chmod(target_pathnewmd5, 0o664)
+                        shutil.copy2(pathftpmd5, target_pathnewmd5)
+                for report in self.reports:
+                    target_files = glob.glob(
+                        os.path.join(target_dir, "*" + report))
+                    ftp_files = glob.glob(os.path.join(ftp_dir, "*" + report))
+                    if len(target_files) == 1 and len(ftp_files) == 1:
+                        status = self.comparesha256(
+                            ftp_files[0], target_files[0], status)
+                    elif len(target_files) == 0 and len(ftp_files) == 0 and report == '_hashes.txt':
+                        status.append("old_folder_dir")
+                    elif len(target_files) == 0 and len(ftp_files) == 1 and report == '_hashes.txt':
+                        shutil.copy2(ftp_dir[0], ftp_dir[0].replace(
+                            ftp_dir.target_dir))
+                        status.append("new_hashes")
+                    else:
+                        print("########")
+                        print(target_dir)
+                        print(target_files)
+                        print(ftp_dir)
+                        print(ftp_files)
+                        print("IT SHOULDN'T HAPPEN")
+                        print("########")
+                        sys.exit()
+        status_record = "{0}\t{1}\t{2}\n".format(self.genome_domain_dict.get(
+            genome_record).upper(), genome_record, ';'.join([x for x in set(status)]))
+        shutil.rmtree(tmp_ftp_dir)
+        return status_record
+
+    def comparesha256(self, ftp_file, target_file, status):
+        '''
+        comparesha256 compares the report file
+        :param ftp_file:
+        :param target_file:
+        :param status:
+
+        '''
+        original_checksum = hashlib.md5(
+            open(ftp_file, 'rb').read()).hexdigest()
+        gtdb_checksum = hashlib.md5(open(target_file, 'rb').read()).hexdigest()
+        if original_checksum != gtdb_checksum:
+            try:
+                shutil.copy2(ftp_file, target_file)
+            except IOError:
+                os.chmod(target_file, 0o664)
+                shutil.copy2(ftp_file, target_file)
+            status.append("new_metadata")
+        return status
+
+    def parse_checksum(self, pathtodir):
+        '''
+        parse_checksum function parses the md5 checksum file.
+        It returns 2 dictionaries {file:size} : one for the fna and faa files, one for the genbank files
+        :param md5File:
+        '''
+
+        out_dict, out_dict_fasta, out_dict_faa, out_dict_extra_fasta = {}, {}, {}, {}
+
+        for name in glob.glob(os.path.join(pathtodir, '*')):
+            if name.endswith(self.extrafastaExts):
+                out_dict_extra_fasta[os.path.basename(
+                    name)] = self.sha256Calculator(name)
+                os.chmod(name, 0o664)
+            elif name.endswith(self.genomic_ext):
+                out_dict_fasta[os.path.basename(
+                    name)] = self.sha256Calculator(name)
+                os.chmod(name, 0o664)
+            elif name.endswith(self.protein_ext):
+                out_dict_faa[os.path.basename(
+                    name)] = self.sha256Calculator(name)
+                os.chmod(name, 0o664)
+            elif name.endswith(self.allbutFasta):
+                out_dict[os.path.basename(name)] = self.sha256Calculator(name)
+                os.chmod(name, 0o664)
+        return (out_dict, out_dict_fasta, out_dict_faa, out_dict_extra_fasta)
+
+    def sha256Calculator(self, file_path):
+
+        try:
+            filereader = open(file_path, "rb")
+        except:
+            raise Exception("Cannot open Fasta file: " + file_path)
+        m = hashlib.sha256()
+        for line in filereader:
+            m.update(line)
+        sha256_checksum = m.hexdigest()
+        filereader.close()
+        return sha256_checksum
diff --git a/gtdb_migration_tk/genome_manager.py b/gtdb_migration_tk/genome_manager.py
new file mode 100644
index 0000000..5c18f2f
--- /dev/null
+++ b/gtdb_migration_tk/genome_manager.py
@@ -0,0 +1,72 @@
+###############################################################################
+#                                                                             #
+#    This program is free software: you can redistribute it and/or modify     #
+#    it under the terms of the GNU General Public License as published by     #
+#    the Free Software Foundation, either version 3 of the License, or        #
+#    (at your option) any later version.                                      #
+#                                                                             #
+#    This program is distributed in the hope that it will be useful,          #
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of           #
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the            #
+#    GNU General Public License for more details.                             #
+#                                                                             #
+#    You should have received a copy of the GNU General Public License        #
+#    along with this program. If not, see <http://www.gnu.org/licenses/>.     #
+#                                                                             #
+###############################################################################
+
+import os
+import sys
+import argparse
+
+
+class DirectoryManager(object):
+    """Create file indicating directory of each genome."""
+
+    def __init__(self):
+        pass
+
+    def normaliseID(self, accession):
+        normaccession = "G" + accession[4:accession.find('.', 0)]
+        return normaccession
+
+    def run(self, database_dir, output_file):
+        """Create file indicating directory of each genome."""
+
+        fout = open(output_file, 'w')
+
+        for code in ['GCA', 'GCF']:
+            code_dir = os.path.join(database_dir, code)
+            if not os.path.exists(code_dir) or not os.path.isdir(code_dir):
+                print('we skip {}'.format(code))
+                continue
+            for first_three in os.listdir(code_dir):
+                onethird_species_dir = os.path.join(code_dir, first_three)
+                # print onethird_species_dir
+                if os.path.isfile(onethird_species_dir):
+                    continue
+                for second_three in os.listdir(onethird_species_dir):
+                    twothird_species_dir = os.path.join(
+                        onethird_species_dir, second_three)
+                    # print twothird_species_dir
+                    if os.path.isfile(twothird_species_dir):
+                        continue
+                    for third_three in os.listdir(twothird_species_dir):
+                        threethird_species_dir = os.path.join(
+                            twothird_species_dir, third_three)
+                        # print threethird_species_dir
+                        if os.path.isfile(threethird_species_dir):
+                            continue
+                        for complete_name in os.listdir(threethird_species_dir):
+                            full_path = os.path.join(
+                                threethird_species_dir, complete_name)
+                            if os.path.isfile(full_path):
+                                continue
+
+                            accession = complete_name[0:complete_name.find(
+                                '_', 4)]
+
+                            fout.write('{}\t{}\t{}\n'.format(accession, os.path.abspath(
+                                full_path), self.normaliseID(accession)))
+
+        fout.close()
diff --git a/gtdb_migration_tk/main.py b/gtdb_migration_tk/main.py
index 0a97ef8..b09d736 100644
--- a/gtdb_migration_tk/main.py
+++ b/gtdb_migration_tk/main.py
@@ -28,6 +28,8 @@
 from gtdb_migration_tk.bacdive import BacDive
 from gtdb_migration_tk.strains import Strains
 from gtdb_migration_tk.tools import Tools
+from gtdb_migration_tk.genome_manager import DirectoryManager
+from gtdb_migration_tk.ftp_manager import RefSeqManager, GenBankManager
 
 
 class OptionsParser():
@@ -90,9 +92,32 @@ def compare_selected_data(self, options):
                                 options.field_of_interest,
                                 options.output_file, options.only_ncbi)
 
+    def parse_genome_directory(self, options):
+        p = DirectoryManager()
+        p.run(options.genome_dir, options.output_file)
+
+    def update_refseq_from_ftp_files(self, options):
+        p = RefSeqManager(options.output_dir, options.cpus)
+        p.runComparison(
+            options.ftp_refseq, options.output_dir, options.ftp_genome_dirs, options.old_genome_dirs, options.arc_assembly_summary, options.bac_assembly_summary)
+
+    def update_genbank_from_ftp_files(self, options):
+
+        p = GenBankManager(options.output_dir, options.cpus)
+        p.runComparison(
+            options.ftp_genbank, options.output_dir, options.ftp_genbank_genome_dirs,
+            options.old_genbank_genome_dirs, options.new_refseq_genome_dirs,
+            options.arc_assembly_summary, options.bac_assembly_summary)
+
     def parse_options(self, options):
         """Parse user options and call the correct pipeline(s)"""
-        if options.subparser_name == 'lpsn':
+        if options.subparser_name == 'list_genomes':
+            self.parse_genome_directory(options)
+        elif options.subparser_name == 'update_refseq':
+            self.update_refseq_from_ftp_files(options)
+        elif options.subparser_name == 'update_genbank':
+            self.update_genbank_from_ftp_files(options)
+        elif options.subparser_name == 'lpsn':
             if options.lpsn_subparser_name == 'lpsn_wf':
                 self.full_lpsn_wf(options)
             elif options.lpsn_subparser_name == 'parse_html':