xpore v1.0

GoekeLab · Mar 7, 2021 · 0ce76b0 · 0ce76b0
1 parent d527fc7
commit 0ce76b0
Show file tree

Hide file tree

Showing 2 changed files with 30 additions and 25 deletions.
diff --git a/docs/source/scripts.rst b/docs/source/scripts.rst
@@ -14,31 +14,36 @@ Output files from `nanopolish eventalgin`. Please refer to :ref:`Data preparatio
 
 * Usage example
 
-======================  ==========  ===================  ============================================================================================================
-Argument name(s)         Required    Default value         Description
-======================  ==========  ===================  ============================================================================================================
---eventalign=FILE        Yes         NA                    Eventalign filepath, the output from nanopolish.         
---summary=FILE           Yes         NA                    Eventalign summary filepath, the output from nanopolish.
---out_dir=DIR            Yes         NA                    Output directory.
---ensembl=NUM            No          91                    Ensembl version for gene-transcript mapping.
---species=STR            No          homo_sapiens          Species for ensembl gene-transcript mapping.
---genome                 No          False                 To run on Genomic coordinates. Without this argument, the program will run on transcriptomic coordinates.
---n_processes=NUM        No          1                     Number of processes to run.
---readcount_max=NUM      No          1000                  Maximum read counts per gene.
---resume                 No          False                 With this argument, the program will resume from the previous run.
-======================  ==========  ===================  ============================================================================================================
+=================================   ==========  ===================  ============================================================================================================
+Argument name(s)                    Required    Default value         Description
+=================================   ==========  ===================  ============================================================================================================
+--eventalign=FILE                   Yes         NA                    Eventalign filepath, the output from nanopolish.         
+--summary=FILE                      Yes         NA                    Eventalign summary filepath, the output from nanopolish.
+--out_dir=DIR                       Yes         NA                    Output directory.
+--ensembl=NUM                       No          91                    Ensembl version for gene-transcript mapping.
+--species=STR                       No          homo_sapiens          Species for ensembl gene-transcript mapping.
+--customised_genome                 No          False                 If customised genome provided.
+--reference_name                    No          NA                    fasta reference name.
+--annotation_name                   No          NA                    gtf annotation name.
+--gtf_path_or_url                   No          NA                    gtf file path or url.
+--transcript_fasta_paths_or_urls    No          NA                    Transcript fasta paths or urls.
+--skip_eventalign_indexing          No          False                 To skip indexing the eventalign nanopolish output.
+--genome                            No          False                 To run on Genomic coordinates. Without this argument, the program will run on transcriptomic coordinates.
+--n_processes=NUM                   No          1                     Number of processes to run.
+--readcount_max=NUM                 No          1000                  Maximum read counts per gene.
+--resume                            No          False                 With this argument, the program will resume from the previous run.
+=================================   ==========  ===================  ============================================================================================================
 
 * Output
 
 ======================  ==============  ===============================================================================================================================================================
 File name               File type       Description
 ======================  ==============  ===============================================================================================================================================================
-`eventalign.combined`   csv             Read segmentation information where multiple segments from `nanopolish eventalign` are aggregated per position.
-`eventalign.index`      csv             File index indicating the position in the `eventalign.combin` file where the segmentation information of each read index is stored, allowing a random access.
-`eventalign.log`        txt             Read indexes being processed.
-`data.json`             json            Intensity level mean for each position.
-`data.index`            csv             File index indicating the position in the `data.json` file where the intensity level means across positions of each gene is stored, allowing a random access.
-`data.log`              txt             Gene ids being processed.
+eventalign.index        csv             File index indicating the position in the `eventalign.txt` file (the output of nanopolish eventalign) where the segmentation information of each read index is stored, allowing a random access.
+data.json               json            Intensity level mean for each position.
+data.index              csv             File index indicating the position in the `data.json` file where the intensity level means across positions of each gene is stored, allowing a random access.
+data.log                txt             Gene ids being processed.
+data.readcount          csv             Summary of readcounts per gene.
 ======================  ==============  ===============================================================================================================================================================
 
 ``xpore-diffmod``
@@ -65,7 +70,7 @@ Argument name(s)      Required    Default value       Description
 ======================  ===============     =================================================================================================================================================
 File name                File type           Description
 ======================  ===============     =================================================================================================================================================
-`diffmod.table`          csv                 Output table information of differential modification rates. Please refer to :ref:`Output table description <outputtable>` for the full description.   
-`diffmod.log`            txt                 Gene/Transcript ids being processed.
+diffmod.table            csv                 Output table information of differential modification rates. Please refer to :ref:`Output table description <outputtable>` for the full description.   
+diffmod.log              txt                 Gene/Transcript ids being processed.
 ======================  ===============     =================================================================================================================================================
 
diff --git a/xpore/scripts/dataprep.py b/xpore/scripts/dataprep.py
@@ -34,12 +34,12 @@ def get_args():
     # Use customised db
     # These arguments will be passed to Genome from pyensembl
     optional.add_argument('--customised_genome', dest='customised_genome', help='if customised genome provided.',default=False,action='store_true')
-    optional.add_argument('--reference_name', dest='reference_name', help='reference name.',type=str)
-    optional.add_argument('--annotation_name', dest='annotation_name', help='annotation name.',type=str)
+    optional.add_argument('--reference_name', dest='reference_name', help='fasta reference name.',type=str)
+    optional.add_argument('--annotation_name', dest='annotation_name', help='gtf annotation name.',type=str)
     optional.add_argument('--gtf_path_or_url', dest='gtf_path_or_url', help='gtf file path or url.',type=str)
     optional.add_argument('--transcript_fasta_paths_or_urls', dest='transcript_fasta_paths_or_urls', help='transcript fasta paths or urls.',type=str)
 
-    optional.add_argument('--skip_eventalign_index', dest='skip_eventalign_index', help='skip indexing the eventalign nanopolish output.',default=False,action='store_true')
+    optional.add_argument('--skip_eventalign_indexing', dest='skip_eventalign_indexing', help='skip indexing the eventalign nanopolish output.',default=False,action='store_true')
 
     # parser.add_argument('--features', dest='features', help='Signal features to extract.',type=list,default=['norm_mean'])
     optional.add_argument('--genome', dest='genome', help='to run on Genomic coordinates. Without this argument, the program will run on transcriptomic coordinates',default=False,action='store_true') 
@@ -642,7 +642,7 @@ def main():
     misc.makedirs(out_dir) #todo: check every level.
 
     # (1) For each read, combine multiple events aligned to the same positions, the results from nanopolish eventalign, into a single event per position.
-    if not args.skip_eventalign_index:
+    if not args.skip_eventalign_indexing:
         parallel_index(eventalign_filepath,summary_filepath,chunk_size,out_dir,n_processes,resume)
 
     # (2) Create a .json file, where the info of all reads are stored per position, for modelling.