Skip to content

Commit

Permalink
Staging new (#5)
Browse files Browse the repository at this point in the history
* Point to GTF etc provided in new way

* marker cluster renumbering for newer scanpy-scripts

* Improved marker renumbering fix

* Speed up cluster renumbering

* Correct output typo

* Attempt to solve mytery missing file issue

* Final fix to Jon idiocy

* Don't publish clusters as symlink

* Localise trivial processes

* Marker renumbering fix

* Fix to the last fix

* Add marker resolution fix
  • Loading branch information
pinin4fjords authored May 21, 2020
1 parent 0639221 commit c2aaf2a
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 22 deletions.
16 changes: 16 additions & 0 deletions bin/renumberClusters.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/usr/bin/env Rscript

library(data.table)

cl <- commandArgs(trailingOnly = TRUE)

infile <- cl[1]
outfile <- cl[2]

clusters <- fread(infile, check.names=FALSE)

if (min(clusters[,c(-1,-2)]) == 0){
clusters <- cbind(clusters[,c(1,2)], clusters[,c(-1,-2)]+1)
}

fwrite(clusters, file=outfile, sep="\t", quote=FALSE, row.names = FALSE)
3 changes: 3 additions & 0 deletions envs/r-data.table.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
name: r-data.table
dependencies:
- r-data.table
54 changes: 32 additions & 22 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ if (isDroplet && isSmart){
// See what other inputs are provided

RAW_MATRIX = Channel.fromPath( "$resultsRoot/${params.rawMatrix}", checkIfExists: true)
REFERENCE_FASTA = Channel.fromPath( "${params.referenceFasta}", checkIfExists: true ).first()
REFERENCE_GTF = Channel.fromPath( "${params.referenceGtf}", checkIfExists: true ).first()
REFERENCE_FASTA = Channel.fromPath( "$resultsRoot/${params.referenceFasta}", checkIfExists: true ).first()
REFERENCE_GTF = Channel.fromPath( "$resultsRoot/${params.referenceGtf}", checkIfExists: true ).first()

if ( tertiaryWorkflow == 'scanpy-workflow' || tertiaryWorkflow == 'scanpy-galaxy' ){
expressionTypes = expressionTypes + [ 'raw_filtered', 'filtered_normalised' ]
Expand Down Expand Up @@ -93,6 +93,8 @@ RAW_TPM_MATRIX.into{

process reference_manifest_lines {

executor 'local'

input:
file(referenceFasta) from REFERENCE_FASTA
file(referenceGtf) from REFERENCE_GTF
Expand Down Expand Up @@ -311,6 +313,8 @@ process finalise_software {

process mark_perplexities {

executor 'local'

input:
file tSNE from SCANPY_TSNE

Expand Down Expand Up @@ -343,6 +347,8 @@ process tsne_to_tsv {

process tsne_lines {

executor 'local'

input:
set val(perplexity), file(embeddings) from TSV_EMBEDDINGS

Expand Down Expand Up @@ -533,6 +539,8 @@ BIG_MATRICES

process matrix_lines {

executor 'local'

input:
set val(expressionType), file(matrixRows), file(matrixCols), file(matrixContent), file(tsvMatrix) from MTX_MATRIX_ROWNAMES.join(MTX_MATRIX_COLNAMES_FOR_MANIFEST_LINES).join(MTX_MATRIX_CONTENT).join(TSV_AND_NOTSV_MATRICES)

Expand All @@ -553,6 +561,10 @@ process matrix_lines {

process renumber_clusters {

publishDir "$resultsRoot/bundle", mode: 'copy', overwrite: true

conda "${workflow.projectDir}/envs/r-data.table.yml"

memory { 5.GB * task.attempt }
errorStrategy { task.exitStatus == 130 || task.exitStatus == 137 ? 'retry' : 'finish' }
maxRetries 20
Expand All @@ -564,66 +576,65 @@ process renumber_clusters {
file 'clusters_for_bundle.txt' into FINAL_CLUSTERS

"""
#!/usr/bin/env Rscript
clusters <- read.delim('possibly_misnumbered_clusters.txt', check.names=FALSE)
if (min(clusters[,c(-1,-2)]) == 0){
clusters[,c(-1,-2)] <- clusters[,c(-1,-2)]+1
}
write.table(clusters, file='clusters_for_bundle.txt', sep="\t", quote=FALSE, row.names = FALSE)
renumberClusters.R possibly_misnumbered_clusters.txt clusters_for_bundle.txt.tmp
mv clusters_for_bundle.txt.tmp clusters_for_bundle.txt
"""
}

// Find out what resolutions are represented by the marker files

process mark_marker_resolutions {

executor 'local'

input:
file markersFile from SCANPY_MARKERS

output:
set stdout, file (markersFile) into MARKERS_BY_RESOLUTION

"""
echo $markersFile | grep -o -E '[0-9]+' | tr '\\n' '\\0'
echo $markersFile | grep -o -E '[0-9]+' | tr -d \'\\n\'
"""
}

// Convert the marker files to tsv

process renumber_markers_to_tsv {
process renumber_markers {

publishDir "$resultsRoot/bundle", mode: 'move', overwrite: true

memory { 5.GB * task.attempt }
errorStrategy { task.exitStatus == 130 || task.exitStatus == 137 ? 'retry' : 'finish' }
maxRetries 20

input:
set val(resolution), file(markersFile) from MARKERS_BY_RESOLUTION
set val(resolution), file('markers.tsv') from MARKERS_BY_RESOLUTION

output:
set val(resolution), file("markers_*.tsv") into TSV_MARKERS
set val(resolution), file("markers_${resolution}.tsv") into TSV_MARKERS

"""
#!/usr/bin/env Rscript
markers <- read.csv('${markersFile}', check.names = FALSE)
markers <- read.delim('markers.tsv', check.names = FALSE)
if (min(markers\$groups) == 0){
if ('groups' %in% names(markers) && min(markers\$groups) == 0){
markers\$groups <- markers\$groups + 1
}else if ('cluster' %in% names(markers) && min(markers\$cluster) == 0){
markers\$cluster <- markers\$cluster + 1
}
write.table(markers, file='markers_${resolution}.tsv', sep="\t", quote=FALSE, row.names=FALSE)
dir.create('out', showWarnings = FALSE)
write.table(markers, file='markers_${resolution}.tsv', sep="\\t", quote=FALSE, row.names=FALSE)
"""
}

// Combine the listing of markers files for the manifest

process markers_lines {

executor 'local'

input:
set val(resolution), file(markersFile) from TSV_MARKERS

Expand Down Expand Up @@ -693,7 +704,6 @@ if ( tertiaryWorkflow == 'scanpy-workflow' || tertiaryWorkflow == 'scanpy-galaxy

output:
file "MANIFEST"
file(clusters)

"""
cp $startingManifest MANIFEST
Expand Down

0 comments on commit c2aaf2a

Please sign in to comment.