From cf63b42ee3f8501aafeabde7a005a93a9a2d687b Mon Sep 17 00:00:00 2001
From: Christoph Hafemeister <christoph.hafemeister@ccri.at>
Date: Thu, 13 Oct 2022 13:44:45 +0200
Subject: [PATCH] Add documentation

---
 R/demultiplexing.R  | 19 +++++++++++++++++++
 man/cb_demux_gmm.Rd | 19 +++++++++++++++++++
 2 files changed, 38 insertions(+)

diff --git a/R/demultiplexing.R b/R/demultiplexing.R
index 1630a8d..bc30137 100644
--- a/R/demultiplexing.R
+++ b/R/demultiplexing.R
@@ -11,6 +11,25 @@
 #' input column names, 'neg', or 'multi'
 #'
 #' @section Details:
+#' Barcode-based demultiplexing using Gaussian finite mixture model (GMM) that
+#' identifies per pair of barcodes four groups of cells:
+#' G1) positive for barcode 1,
+#' G2) positive for barcode 2,
+#' G3) negative for both barcodes, and
+#' G4) positive for both barcodes.
+#' For each cell and barcode pair, we calculate the mean and relative differences
+#' (difference over the mean) of log10-transformed counts (pseudo-count added).
+#' In a first iteration, we use a 1-dimensional mixture model
+#' (mclust package; MClustSSC method; parameters: modelNames=”E”, G = 3) with
+#' relative differences as input and the following training data: the 50 cells
+#' with highest/lowest relative difference as positives for G1 and G2, 50 cells
+#' closest to the mean of G1 and G2 as G3. Based on the resulting classification
+#' we generate synthetic G4 training data by sampling cells assigned to
+#' G1 and G2 and combining their barcode counts (barcode 1 from G1 cells,
+#' barcode 2 from G2 cells). These synthetic doublet cells are added to the
+#' training data and a final 2-dimensional mixture model (parameters:
+#' modelNames=”VVV”, G = 4) with relative difference and mean as input is used
+#' for classification.
 #'
 #' @export
 #'
diff --git a/man/cb_demux_gmm.Rd b/man/cb_demux_gmm.Rd
index 3d7af84..2fd71f9 100644
--- a/man/cb_demux_gmm.Rd
+++ b/man/cb_demux_gmm.Rd
@@ -24,6 +24,25 @@ De-multiplex
 }
 \section{Details}{
 
+Barcode-based demultiplexing using Gaussian finite mixture model (GMM) that
+identifies per pair of barcodes four groups of cells:
+G1) positive for barcode 1,
+G2) positive for barcode 2,
+G3) negative for both barcodes, and
+G4) positive for both barcodes.
+For each cell and barcode pair, we calculate the mean and relative differences
+(difference over the mean) of log10-transformed counts (pseudo-count added).
+In a first iteration, we use a 1-dimensional mixture model
+(mclust package; MClustSSC method; parameters: modelNames=”E”, G = 3) with
+relative differences as input and the following training data: the 50 cells
+with highest/lowest relative difference as positives for G1 and G2, 50 cells
+closest to the mean of G1 and G2 as G3. Based on the resulting classification
+we generate synthetic G4 training data by sampling cells assigned to
+G1 and G2 and combining their barcode counts (barcode 1 from G1 cells,
+barcode 2 from G2 cells). These synthetic doublet cells are added to the
+training data and a final 2-dimensional mixture model (parameters:
+modelNames=”VVV”, G = 4) with relative difference and mean as input is used
+for classification.
 }
 
 \examples{