diff --git a/docs/source/scripts.rst b/docs/source/scripts.rst index 458504b..a07d388 100644 --- a/docs/source/scripts.rst +++ b/docs/source/scripts.rst @@ -14,31 +14,36 @@ Output files from `nanopolish eventalgin`. Please refer to :ref:`Data preparatio * Usage example -====================== ========== =================== ============================================================================================================ -Argument name(s) Required Default value Description -====================== ========== =================== ============================================================================================================ ---eventalign=FILE Yes NA Eventalign filepath, the output from nanopolish. ---summary=FILE Yes NA Eventalign summary filepath, the output from nanopolish. ---out_dir=DIR Yes NA Output directory. ---ensembl=NUM No 91 Ensembl version for gene-transcript mapping. ---species=STR No homo_sapiens Species for ensembl gene-transcript mapping. ---genome No False To run on Genomic coordinates. Without this argument, the program will run on transcriptomic coordinates. ---n_processes=NUM No 1 Number of processes to run. ---readcount_max=NUM No 1000 Maximum read counts per gene. ---resume No False With this argument, the program will resume from the previous run. -====================== ========== =================== ============================================================================================================ +================================= ========== =================== ============================================================================================================ +Argument name(s) Required Default value Description +================================= ========== =================== ============================================================================================================ +--eventalign=FILE Yes NA Eventalign filepath, the output from nanopolish. +--summary=FILE Yes NA Eventalign summary filepath, the output from nanopolish. +--out_dir=DIR Yes NA Output directory. +--ensembl=NUM No 91 Ensembl version for gene-transcript mapping. +--species=STR No homo_sapiens Species for ensembl gene-transcript mapping. +--customised_genome No False If customised genome provided. +--reference_name No NA fasta reference name. +--annotation_name No NA gtf annotation name. +--gtf_path_or_url No NA gtf file path or url. +--transcript_fasta_paths_or_urls No NA Transcript fasta paths or urls. +--skip_eventalign_indexing No False To skip indexing the eventalign nanopolish output. +--genome No False To run on Genomic coordinates. Without this argument, the program will run on transcriptomic coordinates. +--n_processes=NUM No 1 Number of processes to run. +--readcount_max=NUM No 1000 Maximum read counts per gene. +--resume No False With this argument, the program will resume from the previous run. +================================= ========== =================== ============================================================================================================ * Output ====================== ============== =============================================================================================================================================================== File name File type Description ====================== ============== =============================================================================================================================================================== -`eventalign.combined` csv Read segmentation information where multiple segments from `nanopolish eventalign` are aggregated per position. -`eventalign.index` csv File index indicating the position in the `eventalign.combin` file where the segmentation information of each read index is stored, allowing a random access. -`eventalign.log` txt Read indexes being processed. -`data.json` json Intensity level mean for each position. -`data.index` csv File index indicating the position in the `data.json` file where the intensity level means across positions of each gene is stored, allowing a random access. -`data.log` txt Gene ids being processed. +eventalign.index csv File index indicating the position in the `eventalign.txt` file (the output of nanopolish eventalign) where the segmentation information of each read index is stored, allowing a random access. +data.json json Intensity level mean for each position. +data.index csv File index indicating the position in the `data.json` file where the intensity level means across positions of each gene is stored, allowing a random access. +data.log txt Gene ids being processed. +data.readcount csv Summary of readcounts per gene. ====================== ============== =============================================================================================================================================================== ``xpore-diffmod`` @@ -65,7 +70,7 @@ Argument name(s) Required Default value Description ====================== =============== ================================================================================================================================================= File name File type Description ====================== =============== ================================================================================================================================================= -`diffmod.table` csv Output table information of differential modification rates. Please refer to :ref:`Output table description ` for the full description. -`diffmod.log` txt Gene/Transcript ids being processed. +diffmod.table csv Output table information of differential modification rates. Please refer to :ref:`Output table description ` for the full description. +diffmod.log txt Gene/Transcript ids being processed. ====================== =============== ================================================================================================================================================= diff --git a/xpore/scripts/dataprep.py b/xpore/scripts/dataprep.py index 8c303ae..4cf9c28 100644 --- a/xpore/scripts/dataprep.py +++ b/xpore/scripts/dataprep.py @@ -34,12 +34,12 @@ def get_args(): # Use customised db # These arguments will be passed to Genome from pyensembl optional.add_argument('--customised_genome', dest='customised_genome', help='if customised genome provided.',default=False,action='store_true') - optional.add_argument('--reference_name', dest='reference_name', help='reference name.',type=str) - optional.add_argument('--annotation_name', dest='annotation_name', help='annotation name.',type=str) + optional.add_argument('--reference_name', dest='reference_name', help='fasta reference name.',type=str) + optional.add_argument('--annotation_name', dest='annotation_name', help='gtf annotation name.',type=str) optional.add_argument('--gtf_path_or_url', dest='gtf_path_or_url', help='gtf file path or url.',type=str) optional.add_argument('--transcript_fasta_paths_or_urls', dest='transcript_fasta_paths_or_urls', help='transcript fasta paths or urls.',type=str) - optional.add_argument('--skip_eventalign_index', dest='skip_eventalign_index', help='skip indexing the eventalign nanopolish output.',default=False,action='store_true') + optional.add_argument('--skip_eventalign_indexing', dest='skip_eventalign_indexing', help='skip indexing the eventalign nanopolish output.',default=False,action='store_true') # parser.add_argument('--features', dest='features', help='Signal features to extract.',type=list,default=['norm_mean']) optional.add_argument('--genome', dest='genome', help='to run on Genomic coordinates. Without this argument, the program will run on transcriptomic coordinates',default=False,action='store_true') @@ -642,7 +642,7 @@ def main(): misc.makedirs(out_dir) #todo: check every level. # (1) For each read, combine multiple events aligned to the same positions, the results from nanopolish eventalign, into a single event per position. - if not args.skip_eventalign_index: + if not args.skip_eventalign_indexing: parallel_index(eventalign_filepath,summary_filepath,chunk_size,out_dir,n_processes,resume) # (2) Create a .json file, where the info of all reads are stored per position, for modelling.