Skip to content

Commit

Permalink
Merge branch 'messyForest' into Multiplex_Major_Patch
Browse files Browse the repository at this point in the history
  • Loading branch information
andredsim committed Oct 25, 2024
2 parents 77cae13 + 773d88c commit 657e2b5
Show file tree
Hide file tree
Showing 14 changed files with 943 additions and 280 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,8 @@ Imports:
Rsamtools,
methods,
Rcpp,
xgboost
xgboost,
Matrix
VignetteBuilder:
knitr
LazyData: true
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -78,4 +78,5 @@ import(SummarizedExperiment)
import(S4Vectors, except=c(rename, setequal, setdiff, intersect,cor))
useDynLib(bambu, .registration = TRUE)
import(xgboost)
import(Matrix)
import(BSgenome)
83 changes: 83 additions & 0 deletions R/bambu-assignDist.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
#' Create equivilence classes and assign to transcripts
#' @inheritParams bambu
#' @import data.table
#' @noRd
assignReadClasstoTranscripts <- function(readClassList, annotations, isoreParameters, verbose, demultiplexed, spatial) {
metadata(readClassList)$readClassDist <- calculateDistTable(readClassList, annotations, isoreParameters, verbose)
readClassList = splitReadClassFiles(readClassList)
readClassDt <- genEquiRCs(metadata(readClassList)$readClassDist, annotations, verbose)
readClassDt$eqClass.match = match(readClassDt$eqClassById,metadata(readClassList)$eqClassById)
readClassDt <- simplifyNames(readClassDt)
readClassDt = readClassDt %>% group_by(eqClassId, gene_sid) %>%
mutate(multi_align = length(unique(txid))>1) %>% ungroup() %>% mutate(aval = 1) %>%
data.table()

#return non-em counts
ColData = generateColData(colnames(metadata(readClassList)$countMatrix), clusters = NULL, demultiplexed, spatial)
quantData <- SummarizedExperiment(assays = SimpleList(
counts = generateUniqueCounts(readClassDt, metadata(readClassList)$countMatrix, annotations)),
rowRanges = annotations,
colData = ColData)
colnames(quantData) = ColData$id
if(sum(metadata(readClassList)$incompatibleCountMatrix)==0){
metadata(quantData)$incompatibleCounts = NULL
} else{
metadata(quantData)$incompatibleCounts = generateIncompatibleCounts(metadata(readClassList)$incompatibleCountMatrix, annotations)
}
metadata(quantData)$nonuniqueCounts = generateNonUniqueCounts(readClassDt, metadata(readClassList)$countMatrix, annotations)
metadata(quantData)$readClassDt = readClassDt
metadata(quantData)$countMatrix = metadata(readClassList)$countMatrix
metadata(quantData)$incompatibleCountMatrix = metadata(readClassList)$incompatibleCountMatrix
metadata(quantData)$sampleNames = metadata(readClassList)$sampleNames
return(quantData)

}


generateUniqueCounts <- function(readClassDt, countMatrix, annotations){
x = readClassDt %>% filter(!multi_align & !is.na(eqClass.match))
uniqueCounts = countMatrix[x$eqClass.match,]
uniqueCounts.tx = sparse.model.matrix(~ factor(x$txid) - 1)
uniqueCounts = t(uniqueCounts.tx) %*% uniqueCounts
rownames(uniqueCounts) = names(annotations)[match(as.numeric(levels(factor(x$txid))),mcols(annotations)$txid)]
counts = sparseMatrix(length(annotations), ncol(uniqueCounts), x = 0)
rownames(counts) = names(annotations)
counts[rownames(uniqueCounts),] = uniqueCounts
return(counts)

counts.total = colSums(countMatrix) + colSums(incompatibleCountMatrix)
counts.total[counts.total==0] = 1
counts.CPM = counts/counts.total * 10^6

}

generateIncompatibleCounts <- function(incompatibleCountMatrix, annotations){
genes = levels(factor(unique(mcols(annotations)$GENEID)))
rownames(incompatibleCountMatrix) = genes[as.numeric(rownames(incompatibleCountMatrix))]
geneMat = sparseMatrix(length(genes), ncol(incompatibleCountMatrix), x = 0)
rownames(geneMat) = genes
geneMat[rownames(incompatibleCountMatrix),] = incompatibleCountMatrix
return(geneMat)
}

generateNonUniqueCounts <- function(readClassDt, countMatrix, annotations){
#fuse multi align RCs by gene
x = readClassDt %>% filter(multi_align & !is.na(eqClass.match))
x = x %>% distinct(eqClassId, .keep_all = TRUE)
nonuniqueCounts = countMatrix[x$eqClass.match,, drop = FALSE]
if(nrow(x)>1){
nonuniqueCounts.gene = sparse.model.matrix(~ factor(x$gene_sid) - 1)
nonuniqueCounts = t(nonuniqueCounts.gene) %*% nonuniqueCounts
}
#covert ids into gene ids
geneids = as.numeric(levels(factor(x$gene_sid)))
geneids = x$txid[match(geneids, x$gene_sid)]
geneids = mcols(annotations)$GENEID[as.numeric(geneids)]
rownames(nonuniqueCounts) = geneids
#create matrix for all annotated genes
genes = levels(factor(unique(mcols(annotations)$GENEID)))
geneMat = sparseMatrix(length(genes), ncol(nonuniqueCounts), x = 0)
rownames(geneMat) = genes
geneMat[rownames(nonuniqueCounts),] = nonuniqueCounts
return(geneMat)
}
19 changes: 11 additions & 8 deletions R/bambu-extendAnnotations-utilityExtend.R
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,8 @@ filterTranscripts <- function(combinedTranscripts, min.sampleNumber){
combinedTranscripts$NSampleTxScore >= min.sampleNumber) & (
combinedTranscripts$NSampleReadProp >= min.sampleNumber)
}
combinedTranscripts = combinedTranscripts[filterSet,]
#combinedTranscripts = combinedTranscripts[filterSet,]
combinedTranscripts$maxTxScore[!filterSet] = 0
return(combinedTranscripts)
}

Expand Down Expand Up @@ -95,8 +96,8 @@ filterTranscriptsByAnnotation <- function(rowDataCombined, annotationGrangesList
subsetTranscripts <- combindRowDataWithRanges(
rowDataCombined[!notCompatibleIds,],
exonRangesCombined[!notCompatibleIds])
exonRangesCombined <- exonRangesCombined[notCompatibleIds]
rowDataCombined <- rowDataCombined[notCompatibleIds,]
rowDataCombined$maxTxScore[grepl("compatible", rowDataCombined$readClassType) &
rowDataCombined$readClassType != "equal:compatible"]=0
}
#(2) remove transcripts below NDR threshold/identical junctions to annotations
rowDataCombined = calculateNDROnTranscripts(rowDataCombined,
Expand All @@ -122,7 +123,7 @@ filterTranscriptsByAnnotation <- function(rowDataCombined, annotationGrangesList
exonRangesCombined <- exonRangesCombined[filterSet]
rowDataCombined <- rowDataCombined[filterSet,]
}
if(sum(filterSet==0) & length(annotationGrangesList)==0) stop(
if(sum(filterSet)==0 & length(annotationGrangesList)==0) stop(
"WARNING - No annotations were provided. Please increase NDR threshold to use novel transcripts")
if(sum(filterSet)==0) message("WARNING - No novel transcripts meet the given thresholds. Try a higher NDR.")
# (3) combine novel transcripts with annotations
Expand Down Expand Up @@ -218,6 +219,7 @@ calculateNDROnTranscripts <- function(combinedTranscripts, useTxScore = FALSE){
"for NDR precision stabilization.")
message("NDR will be approximated as: (1 - Transcript Model Prediction Score)")
} else combinedTranscripts$NDR = calculateNDR(combinedTranscripts$maxTxScore, equal)
combinedTranscripts$NDR[combinedTranscripts$maxTxScore==0] = 1
return(combinedTranscripts)
}

Expand Down Expand Up @@ -750,10 +752,11 @@ isore.estimateDistanceToAnnotations <- function(seReadClass,
primarySecondaryDistStartEnd = min.primarySecondaryDistStartEnd,
ignore.strand = FALSE)
distTable$readCount <- assays(seReadClass)$counts[distTable$readClassId, ]
if (additionalFiltering)
distTable <- left_join(distTable, select(readClassTable,
readClassId, confidenceType), by = "readClassId") %>%
mutate(relativeReadCount = readCount / txNumberFiltered)
# if (additionalFiltering)
# distTable <- left_join(distTable, select(readClassTable,
# readClassId, confidenceType), by = "readClassId") %>%
# mutate(relativeReadCount = readCount / txNumberFiltered)

distTable <- dplyr::select(distTable, annotationTxId, txid, readClassId,
readCount, compatible, equal,dist)
distTable <- left_join(distTable, as_tibble(mcols(annotationGrangesList)[, c("txid", "GENEID")]),
Expand Down
Loading

0 comments on commit 657e2b5

Please sign in to comment.