Skip to content

Commit

Permalink
update xml comments for normalization module
Browse files Browse the repository at this point in the history
module Fitting.quantilenormalization is obsolete
#281
  • Loading branch information
bvenn committed Jul 14, 2023
1 parent 054e62d commit 9f97bf5
Show file tree
Hide file tree
Showing 2 changed files with 99 additions and 32 deletions.
9 changes: 3 additions & 6 deletions src/FSharp.Stats/Fitting/QuantileNormalization.fs
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
namespace FSharp.Stats.Fitting

(*
Module for quantile normalization
*)

[<System.Obsolete("Use Signal.Normalization.quantile matrix.Transpose instead!")>]
module QuantileNormalization =

open FSharp.Stats
Expand All @@ -11,6 +10,7 @@ module QuantileNormalization =
// http://en.wikipedia.org/wiki/Quantile_normalization
// technique for making two distributions or more identical in statistical properties.
// to normalize two or more distributions to each other, rank the original values and group them by rank, then set to the average of the original values.
[<System.Obsolete("use Signal.Normalization.quantile matrix.Transpose instead!")>]
let quantileNorm (colSeq : seq<array<float>>) =

// Helper function to group RankedValue by rank and calculate average of orignal values
Expand Down Expand Up @@ -39,6 +39,3 @@ module QuantileNormalization =
rawRanks
|> Seq.map (fun col ->
col |> Array.map (fun (r,v) -> if rankValueMap.ContainsKey r then rankValueMap.[r] else nan ))



122 changes: 96 additions & 26 deletions src/FSharp.Stats/Signal/Normalization.fs
Original file line number Diff line number Diff line change
Expand Up @@ -4,33 +4,62 @@ open FSharp.Stats

module Normalization =

/// z normalization using the population standard deviation
//Bortz J., Schuster C., Statistik für Human- und Sozialwissenschaftler, 7 (2010), p. 35
/// <summary>
/// z score normalization/transformation using the population standard deviation.
/// </summary>
/// <param name="yData">collection of values to be transformed</param>
/// <returns>transformed yData in unchanged order</returns>
/// <example>
/// <code>
/// // transform data, such that data has zero mean and population standard deviation of
/// Normalization.zScoreTransformPopulation (vector [|1.1;5.3;-9.0;13.2;17.3;-2.3|])
/// </code>
/// </example>
/// <remarks>Bortz J., Schuster C., Statistik für Human- und Sozialwissenschaftler, 7 (2010), p. 35</remarks>
let zScoreTransformPopulation (yVal:Vector<float>) =
let yMean = Seq.mean yVal
let std = Seq.stDevPopulation yVal
yVal |> Vector.map (fun x -> (x - yMean) / std)

/// z normalization using the sample standard deviation
//Bortz J., Schuster C., Statistik für Human- und Sozialwissenschaftler, 7 (2010), p. 35
/// <summary>
/// z score normalization/transformation using the sample standard deviation. Rarely used since variance is not equal to 1.
/// </summary>
/// <param name="yData">collection of values to be transformed</param>
/// <returns>transformed yData in unchanged order</returns>
/// <example>
/// <code>
/// // transform data, such that data has zero mean and sample standard deviation of 1
/// Normalization.zScoreTransform (vector [|1.1;5.3;-9.0;13.2;17.3;-2.3|])
/// </code>
/// </example>
/// <remarks>Bortz J., Schuster C., Statistik für Human- und Sozialwissenschaftler, 7 (2010), p. 35</remarks>
let zScoreTransform (yVal:Vector<float>) =
let yMean = Seq.mean yVal
let std = Seq.stDev yVal
yVal |> Vector.map (fun x -> (x - yMean) / std)

/// Summary of the median of ratios (mor) normalization with normed data and determined correctionfactors.
/// Summary of the median of ratios (mor) normalization with normed data, determined correctionfactors, and transformation function.
type MorResult = {
CorrFactors : seq<float>
NormedData : Matrix<float>
NormFunction : matrix -> matrix
} with static member Create cf nd f = {CorrFactors=cf;NormedData=nd;NormFunction=f}

/// As used by Deseq2, see: https://github.com/hbctraining/DGE_workshop/blob/master/lessons/02_DGE_count_normalization.md
///
/// Rows are genes, columns are samples
///
/// The additional function is applied on all values of the matrix when calculating the normalization factors. By this, a zero in the original dataset will still remain zero.
let medianOfRatiosBy (f: float -> float) (data:Matrix<float>) =

/// <summary>
/// Median of ratios normalization As used by Deseq2, see: https://github.com/hbctraining/DGE_workshop/blob/master/lessons/02_DGE_count_normalization.md .
/// Rows are genes, columns are samples
/// </summary>
/// <param name="f">The transformation function is applied on all values of the matrix before calculating the normalization factors.</param>
/// <param name="data">data matrix with columns as features (samples,time points) and rows as measured entities (genes,proteins).</param>
/// <returns>Normalized data matrix with correction factors and normalization function.</returns>
/// <example>
/// <code>
/// // raw data with proteins as rows and samples as columns
/// let myData = Matrix.init 500 5 (fun _ _ -> rnd.NextDouble())
/// let normedData = Normalization.medianOfRatiosBy (fun x -> ln (x+1)) myData
/// </code>
/// </example>
let medianOfRatiosBy (f: float -> float) (data: Matrix<float>) =
let sampleWiseCorrectionFactors =
data
|> Matrix.mapiRows (fun _ v ->
Expand All @@ -47,17 +76,37 @@ module Normalization =
)
MorResult.Create sampleWiseCorrectionFactors (normData data) normData

/// As used by Deseq2, see: https://github.com/hbctraining/DGE_workshop/blob/master/lessons/02_DGE_count_normalization.md
///
/// Rows are genes, columns are samples

/// <summary>
/// Median of ratios normalization As used by Deseq2, see: https://github.com/hbctraining/DGE_workshop/blob/master/lessons/02_DGE_count_normalization.md .
/// Rows are genes, columns are samples
/// </summary>
/// <param name="data">data matrix with columns as features (samples,time points) and rows as measured entities (genes,proteins).</param>
/// <returns>Normalized data matrix with correction factors and normalization function.</returns>
/// <example>
/// <code>
/// // raw data with proteins as rows and samples as columns
/// let myData = Matrix.init 500 5 (fun _ _ -> rnd.NextDouble())
/// let normedData = Normalization.medianOfRatios myData
/// </code>
/// </example>
let medianOfRatios (data:Matrix<float>) =
medianOfRatiosBy id data

/// As used by Deseq2, see: https://github.com/hbctraining/DGE_workshop/blob/master/lessons/02_DGE_count_normalization.md
///
/// Columns are genes, rows are samples
///
/// The additional function is applied on all values of the matrix when calculating the normalization factors. By this, a zero in the original dataset will still remain zero.
/// <summary>
/// Median of ratios normalization As used by Deseq2, see: https://github.com/hbctraining/DGE_workshop/blob/master/lessons/02_DGE_count_normalization.md .
/// Columns are genes, rows are samples
/// </summary>
/// <param name="f">The transformation function is applied on all values of the matrix before calculating the normalization factors.</param>
/// <param name="data">data matrix with columns as measured entities and rows as features (samples,time points) (genes,proteins).</param>
/// <returns>Normalized data matrix with correction factors and normalization function.</returns>
/// <example>
/// <code>
/// // raw data with proteins as columns and samples as rows
/// let myData = Matrix.init 5 500 (fun _ _ -> rnd.NextDouble())
/// let normedData = Normalization.medianOfRatiosWideBy (fun x -> ln (x+1)) myData
/// </code>
/// </example>
let medianOfRatiosWideBy (f: float -> float) (data:Matrix<float>) =
let sampleWiseCorrectionFactors =
data
Expand All @@ -75,15 +124,36 @@ module Normalization =
)
MorResult.Create sampleWiseCorrectionFactors (normData data) normData

/// As used by Deseq2, see: https://github.com/hbctraining/DGE_workshop/blob/master/lessons/02_DGE_count_normalization.md
///
/// Columns are genes, rows are samples
/// <summary>
/// Median of ratios normalization As used by Deseq2, see: https://github.com/hbctraining/DGE_workshop/blob/master/lessons/02_DGE_count_normalization.md .
/// Columns are genes, rows are samples
/// </summary>
/// <param name="data">data matrix with columns as measured entities and rows as features (samples,time points) (genes,proteins).</param>
/// <returns>Normalized data matrix with correction factors and normalization function.</returns>
/// <example>
/// <code>
/// // raw data with proteins as columns and samples as rows
/// let myData = Matrix.init 5 500 (fun _ _ -> rnd.NextDouble())
/// let normedData = Normalization.medianOfRatiosWide myData
/// </code>
/// </example>
let medianOfRatiosWide (data:Matrix<float>) =
medianOfRatiosWideBy id data

/// Quantile normalization with equal number of elements for each sample.
///
/// Rows are genes, columns are samples
/// <summary>
/// Quantile normalization with equal number of elements (rows) for each sample (column).
/// Column mean and column standard deviation are qual after normalization.
/// Rows are genes, columns are samples.
/// </summary>
/// <param name="data">data matrix with columns as measured entities and rows as features (samples,time points) (genes,proteins).</param>
/// <returns>Normalized data matrix.</returns>
/// <example>
/// <code>
/// // raw data with proteins as rows and samples as columns
/// let myData = Matrix.init 500 5 (fun _ _ -> rnd.NextDouble())
/// let normedData = Normalization.quantile myData
/// </code>
/// </example>
let quantile (data:Matrix<float>) =
data
|> Matrix.mapCols (Seq.indexed >> Seq.sortBy snd)
Expand Down

0 comments on commit 9f97bf5

Please sign in to comment.