From c82a4ab66a2d4a0eb1820f81ca58c157647c3679 Mon Sep 17 00:00:00 2001
From: mrazomej <manuel.razo.m@gmail.com>
Date: Wed, 15 Jan 2025 07:20:28 -0800
Subject: [PATCH] almost working again

---
 src/utils.jl | 1419 ++++++++++++++++++++++++++++++++------------------
 1 file changed, 915 insertions(+), 504 deletions(-)

diff --git a/src/utils.jl b/src/utils.jl
index 7d555a1..f28f2ec 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -1,8 +1,5 @@
 ##
 
-# Import basic libraries
-import Logging
-
 # Import package to handle dataframes
 import DataFrames as DF
 import CSV
@@ -437,8 +434,8 @@ function _extract_R(
     return DataArrays(
         R̲̲,                    # bc_count
         n̲ₜ,                   # bc_total
-        size(R̲̲⁽ⁿ⁾, 2),       # n_neutral
-        size(R̲̲⁽ᵐ⁾, 2),       # n_bc
+        length(neutral_ids),   # n_neutral
+        length(bc_ids),       # n_bc
         bc_ids,               # bc_ids
         neutral_ids,          # neutral_ids
         "env1",              # envs
@@ -450,68 +447,6 @@ function _extract_R(
     )
 end
 
-# ============================================================================
-# Case: Single condition, no neutrals
-# ============================================================================
-
-@doc raw"""
-    _extract_R(
-        data::DF.AbstractDataFrame, id_col::Symbol, time_col::Symbol,
-        count_col::Symbol, neutral_col::Nothing, rep_col::Nothing, 
-        env_col::Nothing, genotype_col::Nothing
-    )
-
-Extractor for single condition experiment without neutral lineages.
-This handles the case where:
-- There is a single experimental condition
-- No neutral lineages (neutral_col is Nothing)
-- No replicates (rep_col is Nothing)
-- No environment variations (env_col is Nothing)
-- No genotype groupings (genotype_col is Nothing)
-
-# Returns
-- `DataArrays`: Struct containing processed barcode data including:
-    - Single matrix for counts
-    - Single vector for totals
-    - Basic metadata (no neutrals, replicates, environments, or genotypes)
-"""
-function _extract_R(
-    data::DF.AbstractDataFrame,
-    id_col::Symbol,
-    time_col::Symbol,
-    count_col::Symbol,
-    neutral_col::Nothing,
-    rep_col::Nothing,
-    env_col::Nothing,
-    genotype_col::Nothing
-)::DataArrays
-    # Extract unique time points
-    timepoints = _extract_timepoints(data, time_col)
-
-    # Process all barcodes as mutants
-    R̲̲⁽ᵐ⁾, bc_ids = _process_mutant_barcodes_single(
-        data, nothing, id_col, time_col, count_col, timepoints
-    )
-
-    # Calculate totals
-    n̲ₜ = vec(sum(R̲̲⁽ᵐ⁾, dims=2))
-
-    return DataArrays(
-        R̲̲⁽ᵐ⁾,                # bc_count
-        n̲ₜ,                   # bc_total
-        0,                    # n_neutral
-        size(R̲̲⁽ᵐ⁾, 2),       # n_bc
-        bc_ids,               # bc_ids
-        Vector{Any}(),        # neutral_ids
-        "env1",              # envs
-        1,                   # n_env
-        1,                   # n_rep
-        length(timepoints),  # n_time
-        "N/A",              # genotypes
-        0                    # n_geno
-    )
-end
-
 # ============================================================================
 # Case: Multiple replicates
 # ============================================================================
@@ -585,8 +520,8 @@ function _extract_R(
     return DataArrays(
         R̲̲,                    # bc_count
         n̲ₜ,                   # bc_total
-        size(R̲̲⁽ⁿ⁾[1], 2),    # n_neutral
-        size(R̲̲⁽ᵐ⁾[1], 2),    # n_bc
+        length(neutral_ids),    # n_neutral
+        length(bc_ids),    # n_bc
         bc_ids,               # bc_ids
         neutral_ids,          # neutral_ids
         "env1",              # envs
@@ -1097,8 +1032,349 @@ function data_to_arrays(
     )
 end # function
 
+# ------------------------------------------------------------------------------
+# Convert ADVI output to dataframe
 # ------------------------------------------------------------------------------
 
+@doc raw"""
+    ADVIOutput
+
+Struct to store processed output from Automatic Differentiation Variational
+Inference (ADVI).
+
+# Fields
+- `df::DataFrame`: DataFrame containing parameter estimates and metadata
+- `var_groups::Vector{String}`: Names of variable groups in the model
+- `var_range::Vector{UnitRange{Int}}`: Index ranges for each variable group
+- `n_time::Union{Int,Vector{Int}}`: Number of time points
+- `n_rep::Int`: Number of experimental replicates
+- `n_env::Int`: Number of unique environments
+- `n_neutral::Int`: Number of neutral barcodes
+- `n_bc::Int`: Number of non-neutral barcodes
+- `n_geno::Int`: Number of genotypes
+- `neutral_ids::Vector`: Identifiers for neutral barcodes
+- `bc_ids::Vector`: Identifiers for non-neutral barcodes
+- `envs::Union{String,Vector{<:Any},Vector{Vector{<:Any}}}`: Environment
+  information, can be:
+    - `String`: When no environment information is given ("N/A")
+    - `Vector{<:Any}`: Vector of environment identifiers
+    - `Vector{Vector{<:Any}}`: Nested vector for multiple environment conditions
+"""
+struct ADVIOutput
+    df::DF.DataFrame                    # Parameter dataframe
+    var_groups::Vector{String}          # Variable groups
+    var_range::Vector{UnitRange{Int}}   # Ranges for each variable
+    n_time::Union{Int,Vector{Int}}      # Number of time points
+    n_rep::Int                          # Number of replicates
+    n_env::Int                          # Number of environments
+    n_neutral::Int                      # Number of neutral barcodes
+    n_bc::Int                           # Number of non-neutral barcodes
+    n_geno::Int                         # Number of genotypes
+    neutral_ids::Vector                 # Neutral barcode IDs
+    bc_ids::Vector                      # Non-neutral barcode IDs
+    envs::Union{String,Vector{<:Any},Vector{Vector{<:Any}}}  # Environment info
+end
+
+"""
+Extract variable groups and ranges from ADVI output.
+"""
+function extract_variable_info(
+    dist::Distributions.Sampleable, vars::Vector{<:Any}
+)
+    # Locate variable groups by identifying the first variable of each group
+    var_groups = replace.(vars[occursin.("[1]", string.(vars))], "[1]" => "")
+
+    # Define ranges for each variable
+    var_range = dist.transform.ranges_out
+
+    return var_groups, var_range
+end
+
+"""
+Create base parameter dataframe from ADVI distribution.
+"""
+function create_parameter_df(
+    dist::Distributions.Sampleable, vars::Vector{<:Any}
+)::DF.DataFrame
+    df = DF.DataFrame(hcat(dist.dist.m, dist.dist.σ), ["mean", "std"])
+    df[!, :varname] = deepcopy(vars)
+    df[!, :vartype] .= "tmp"  # placeholder to be updated
+    return df
+end
+
+"""
+Map variable names to their types.
+"""
+const varname_to_vartype = Dict(
+    "s̲ₜ" => "pop_mean_fitness",
+    "logσ̲ₜ" => "pop_std",
+    "s̲⁽ᵐ⁾" => "bc_fitness",
+    "logσ̲⁽ᵐ⁾" => "bc_std",
+    "θ̲⁽ᵐ⁾" => "bc_hyperfitness",
+    "θ̲̃⁽ᵐ⁾" => "bc_noncenter",
+    "logτ̲⁽ᵐ⁾" => "bc_deviations",
+    "logΛ̲̲" => "log_poisson",
+)
+
+"""
+Update variable types in dataframe.
+"""
+function update_variable_types!(
+    df::DF.DataFrame,
+    var_groups::Vector{String},
+    var_range::Vector{UnitRange{Int}}
+)
+    # Loop over variable groups and update vartype
+    for (i, var) in enumerate(var_groups)
+        # Update vartype
+        df[var_range[i], :vartype] .= varname_to_vartype[var]
+    end
+end
+
+"""
+Add replicate information to dataframe.
+Takes into account that n_time can be either Int (same timepoints across replicates)
+or Vector{Int} (different timepoints per replicate).
+"""
+function add_replicate_info!(
+    df::DF.DataFrame,
+    var_groups::Vector{String},
+    var_range::Vector{UnitRange{Int}},
+    output::ADVIOutput,
+    rep_col::Symbol,
+)
+    # Initialize replicate column
+    df[!, rep_col] = Vector{Any}(undef, size(df, 1))
+
+    # Get list of replicate IDs
+    reps = 1:output.n_rep
+
+    # Loop over variable groups and add replicate information
+    for (i, var_name) in enumerate(var_groups)
+        # Add replicate information for population mean fitness variables
+        if occursin("̲ₜ", var_name)
+            # For one replicate, we only have one replicate
+            if output.n_rep == 1
+                df[var_range[i], rep_col] = "R1"
+            else
+                # We need n_time-1 because we're dealing with frequency
+                # ratios
+                df[var_range[i], rep_col] = reduce(
+                    vcat,
+                    [repeat(["R$r"], output.n_time[r] - 1) for r in reps]
+                )
+            end
+        elseif var_name == "θ̲⁽ᵐ⁾"
+            # Hyperparameters don't have replicate information
+            df[var_range[i], rep_col] .= "N/A"
+        elseif var_name == "logΛ̲̲"
+            # Add replicate information for frequency variables
+            if output.n_rep == 1
+                df[var_range[i], rep_col] = "R1"
+            else
+                # Different timepoints per replicate
+                df[var_range[i], rep_col] = reduce(
+                    vcat,
+                    [repeat(
+                        ["R$r"],
+                        (output.n_bc + output.n_neutral) * output.n_time[r]
+                    )
+                     for r in reps]
+                )
+            end
+        else
+            # Other variables (like logτ̲⁽ᵐ⁾, θ̲̃⁽ᵐ⁾, logσ̲⁽ᵐ⁾)
+            if output.n_rep == 1
+                df[var_range[i], rep_col] = "R1"
+            else
+                # Multiple environments
+                df[var_range[i], rep_col] = repeat(
+                    ["R$r" for r in reps],
+                    inner=output.n_bc * output.n_env
+                )
+            end
+        end
+    end
+end
+
+"""
+Add environment information to dataframe.
+"""
+function add_environment_info!(
+    df::DF.DataFrame,
+    var_groups::Vector{String},
+    var_range::Vector{UnitRange{Int}},
+    output::ADVIOutput,
+    env_col::Symbol
+)
+    df[!, env_col] = Vector{Any}(undef, size(df, 1))
+
+    for (i, var) in enumerate(var_groups)
+        if output.n_env == 1
+            df[var_range[i], env_col] .= "env1"
+        else
+            # Add environment-specific logic here
+            if occursin("̲ₜ", var)
+                df[var_range[i], env_col] = output.envs[2:end]
+            elseif var == "θ̲⁽ᵐ⁾"
+                df[var_range[i], env_col] = repeat(unique(output.envs), output.n_bc)
+            else
+                # Add logic for other variable types
+            end
+        end
+    end
+end
+
+"""
+Add barcode ID information to dataframe.
+"""
+function add_barcode_info!(
+    df::DF.DataFrame,
+    var_groups::Vector{String},
+    var_range::Vector{UnitRange{Int}},
+    output::ADVIOutput,
+)
+    df[!, :id] = Vector{Any}(undef, size(df, 1))
+
+    for (i, var) in enumerate(var_groups)
+        if occursin("̲ₜ", var)
+            # Population mean fitness variables not associated with any barcode
+            df[var_range[i], :id] .= "N/A"
+        elseif var == "θ̲⁽ᵐ⁾"
+            if output.n_env == 1
+                # Single environment case
+                df[var_range[i], :id] = output.bc_ids
+            else
+                # Multiple environments - repeat each barcode for each
+                # environment
+                df[var_range[i], :id] = reduce(
+                    vcat,
+                    [repeat([bc], output.n_env) for bc in output.bc_ids]
+                )
+            end # if
+        elseif var == "logΛ̲̲"
+            # Get all barcode IDs
+            all_ids = [output.neutral_ids; output.bc_ids]
+
+            # Single replicate case
+            if output.n_rep == 1
+                # Repeat each barcode for each timepoint
+                df[var_range[i], :id] = reduce(
+                    vcat,
+                    [repeat([bc], typeof(output.n_time) <: Int ? output.n_time : output.n_time[1])
+                     for bc in all_ids]
+                )
+            else
+                # Multiple replicates case
+                if typeof(output.n_time) <: Int
+                    # Same timepoints across replicates
+                    df[var_range[i], :id] = reduce(
+                        vcat,
+                        [repeat([bc], output.n_time)
+                         for rep = 1:output.n_rep
+                         for bc in all_ids]
+                    )
+                else
+                    # Different timepoints per replicate
+                    df[var_range[i], :id] = reduce(
+                        vcat,
+                        [repeat([bc], output.n_time[rep])
+                         for rep = 1:output.n_rep
+                         for bc in all_ids]
+                    )
+                end
+            end
+        else
+            # Other barcode-related variables
+            if (output.n_rep == 1) && (output.n_env == 1)
+                # Single replicate and environment case
+                df[var_range[i], :id] = output.bc_ids
+            elseif (output.n_rep == 1) && (output.n_env > 1)
+                # Single replicate and multiple environments case
+                df[var_range[i], :id] = reduce(
+                    vcat,
+                    [repeat([bc], output.n_env) for bc in output.bc_ids]
+                )
+            elseif (output.n_rep > 1) && (output.n_env == 1)
+                # Multiple replicates and single environment case
+                df[var_range[i], :id] = repeat(output.bc_ids, output.n_rep)
+            else
+                # Multiple replicates and environments
+                df[var_range[i], :id] = reduce(
+                    vcat,
+                    [repeat([bc], output.n_env)
+                     for rep = 1:output.n_rep
+                     for bc in output.bc_ids]
+                )
+            end
+        end
+    end
+end
+
+"""
+Process hierarchical model samples for replicates.
+"""
+function process_hierarchical_samples!(
+    df::DF.DataFrame,
+    output::ADVIOutput,
+    n_samples::Int
+)
+    # Sample θ̲ variables
+    θ_mat = hcat(
+        [Random.rand(Distributions.Normal(x...), n_samples)
+         for x in eachrow(df[df.vartype.=="bc_hyperfitness", [:mean, :std]])]...
+    )
+
+    # Sample τ̲ variables
+    τ_mat = exp.(
+        hcat(
+            [Random.rand(Distributions.Normal(x...), n_samples)
+             for x in eachrow(df[df.vartype.=="bc_deviations", [:mean, :std]])]...
+        )
+    )
+
+    # Sample θ̲̃ variables
+    θ_tilde_mat = hcat(
+        [Random.rand(Distributions.Normal(x...), n_samples)
+         for x in eachrow(df[df.vartype.=="bc_noncenter", [:mean, :std]])]...
+    )
+
+    # Compute individual strains fitness values
+    s_mat = hcat(repeat([θ_mat], output.n_rep)...) .+ (τ_mat .* θ_tilde_mat)
+
+    # Create dataframe with results
+    df_s = DF.DataFrame(
+        hcat(
+            [StatsBase.median.(eachcol(s_mat)), StatsBase.std.(eachcol(s_mat))]...
+        ),
+        ["mean", "std"]
+    )
+
+    # Add metadata columns
+    τ_idx = occursin.("τ", string.(df.varname))
+    DF.insertcols!(
+        df_s,
+        :varname => replace.(df[τ_idx, :varname], "logτ" => "s"),
+        :vartype .=> "bc_fitness"
+    )
+
+    # Add replicate information
+    if :rep in propertynames(df)
+        DF.insertcols!(df_s, :rep => df[τ_idx, :rep])
+    end
+
+    # Add environment information
+    if :env in propertynames(df)
+        DF.insertcols!(df_s, :env => df[τ_idx, :env])
+    end
+
+    # Add barcode information
+    DF.insertcols!(df_s, :id => df[τ_idx, :id])
+
+    # Append to original dataframe
+    DF.append!(df, df_s)
+end
+
 @doc raw"""
     advi_to_df(
         data::DataFrames.AbstractDataFrame, dist::Distribution.Sampleable,
@@ -1175,12 +1451,8 @@ function advi_to_df(
     env_col::Union{Nothing,Symbol}=nothing,
     genotype_col::Union{Nothing,Symbol}=nothing,
     n_samples::Int=10_000
-)
-    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #
-    # Extract information from data
-    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #
-
-    # Convert data to arrays to obtain information
+)::DF.DataFrame
+    # Process data arrays
     data_arrays = data_to_arrays(
         data;
         id_col=id_col,
@@ -1192,442 +1464,581 @@ function advi_to_df(
         genotype_col=genotype_col
     )
 
-    # Extract number of neutral and mutant lineages
-    n_neutral = data_arrays[:n_neutral]
-    n_bc = data_arrays[:n_bc]
-
-    # Extract bc lineages IDs
-    bc_ids = data_arrays[:bc_ids]
-    # Extract neutral lineages IDs
-    neutral_ids = data_arrays[:neutral_ids]
-
-    # Extract number of replicates
-    n_rep = data_arrays[:n_rep]
-    # Extract number of time points per replicate
-    n_time = data_arrays[:n_time]
-
-    # Extract number of environments
-    n_env = data_arrays[:n_env]
-    # Extract list of environments
-    envs = data_arrays[:envs]
-    # For multi-replicate inferences replicate the list of environments when
-    # necessary
-    if (n_env > 1) & (n_rep > 1) & !(typeof(envs) <: Vector{<:Vector})
-        envs = repeat([envs], n_rep)
-    end # if
-
-    # Extract genotype information
-    genotypes = data_arrays[:genotypes]
-    n_geno = data_arrays[:n_geno]
-
-    # Extract unique replicates
-    if typeof(rep_col) <: Symbol
-        reps = sort(unique(data[:, rep_col]))
-    end # if
-    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #
-    # Convert distribution parameters into tidy dataframe
-    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #
-
-    # Extract parameters and convert to dataframe
-    df_par = DF.DataFrame(hcat(dist.dist.m, dist.dist.σ), ["mean", "std"])
-
-    # Add variable name. Notice that we use deepcopy to avoid the modification
-    # of the original variable
-    df_par[!, :varname] = deepcopy(vars)
-
-    # Locate variable groups by identifying the first variable of each group
-    var_groups = replace.(vars[occursin.("[1]", string.(vars))], "[1]" => "")
-
-    # Define ranges for each variable
-    var_range = dist.transform.ranges_out
-
-    # Count how many variables exist per group
-    var_count = length.(var_range)
-
-    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #
-    # Add vartype information
-    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #
-
-    # Define dictionary from varname to vartype
-    varname_to_vartype = Dict(
-        "s̲ₜ" => "pop_mean_fitness",
-        "logσ̲ₜ" => "pop_std",
-        "s̲⁽ᵐ⁾" => "bc_fitness",
-        "logσ̲⁽ᵐ⁾" => "bc_std",
-        "θ̲⁽ᵐ⁾" => "bc_hyperfitness",
-        "θ̲̃⁽ᵐ⁾" => "bc_noncenter",
-        "logτ̲⁽ᵐ⁾" => "bc_deviations",
-        "logΛ̲̲" => "log_poisson",
+    # Create output container
+    output = ADVIOutput(
+        DF.DataFrame(),  # Will be filled later
+        String[],       # Will be filled later
+        UnitRange{Int}[],  # Will be filled later
+        data_arrays.n_time,
+        data_arrays.n_rep,
+        data_arrays.n_env,
+        data_arrays.n_neutral,
+        data_arrays.n_bc,
+        data_arrays.neutral_ids,
+        data_arrays.bc_ids,
+        data_arrays.envs
     )
 
-    # Add column to be modified with vartype
-    df_par[!, :vartype] .= "tmp"
-    # Loop through variables
-    for (i, var) in enumerate(var_groups)
-        df_par[var_range[i], :vartype] .= varname_to_vartype[var]
-    end # for
+    # Extract variable information
+    var_groups, var_range = extract_variable_info(dist, vars)
 
-    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #
-    # Add replicate number information
-    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #
-
-    if (n_rep == 1) & (typeof(genotype_col) <: Nothing) &
-       (length(var_groups) > 5)
-        # Report error if no env or rep info is given when needed
-        error("The distribution seems to match a hierarchical model but no genotype or replicate information was provided")
-    elseif (n_rep == 1) & (length(var_groups) == 5) & (n_env == 1)
-        println("Single replicate non-hierarchical model")
-        # Add replicate for single-replicate model for consistency
-        df_par[!, :rep] .= "R1"
-    elseif (n_rep == 1) & (length(var_groups) == 7) &
-           (typeof(genotype_col) <: Symbol)
-        println("Single replicate genotype hierarchical model")
-        # Add replicate for single-replicate model for consistency
-        df_par[!, :rep] .= "R1"
-    elseif (n_rep > 1)
-        # Add replicate column to be modified
-        df_par[!, rep_col] = Vector{Any}(undef, size(df_par, 1))
-        # Loop through var groups
-        for (i, var) in enumerate(var_groups)
-            if occursin("̲ₜ", var)
-                # Add replicate for population mean fitness variables
-                df_par[var_range[i], rep_col] = reduce(
-                    vcat,
-                    [
-                        repeat([reps[j]], n_time[j] - 1) for j = 1:n_rep
-                    ]
-                )
-            elseif var == "θ̲⁽ᵐ⁾"
-                # Add information for hyperparameter that does not have
-                # replicate information
-                df_par[var_range[i], rep_col] .= "N/A"
-            elseif var == "logΛ̲̲"
-                df_par[var_range[i], rep_col] = reduce(
-                    vcat,
-                    [
-                        repeat(
-                            [reps[j]],
-                            (n_bc + n_neutral) * (n_time[j])
-                        ) for j = 1:n_rep
-                    ]
-                )
-            else
-                # Information on the other variables depends on environments
-                if n_env == 1
-                    # Add information for bc-related parameters when no
-                    # environment information is provided
-                    df_par[var_range[i], rep_col] = reduce(
-                        vcat,
-                        [
-                            repeat([reps[j]], n_bc) for j = 1:n_rep
-                        ]
-                    )
-                else
-                    df_par[var_range[i], rep_col] = reduce(
-                        vcat,
-                        [
-                            repeat([reps[j]], n_bc * n_env) for j = 1:n_rep
-                        ]
-                    )
-                end # if
-            end # if
-        end # for
-    end # if
+    # Create base parameter dataframe
+    df = create_parameter_df(dist, vars)
 
-    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #
-    # Add environment information
-    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #
-
-    if (n_env == 1)
-        # Add environment information for consistency
-        df_par[!, :env] .= "env1"
-    elseif (n_env > 1)
-        # Add replicate column to be modified
-        df_par[!, env_col] = Vector{Any}(undef, size(df_par, 1))
-        # Loop through var groups
-        for (i, var) in enumerate(var_groups)
-            if occursin("̲ₜ", var)
-                if n_rep == 1
-                    # Add environment information for single replicate
-                    df_par[var_range[i], env_col] = envs[2:end]
-                elseif n_rep > 1
-                    # Add environment information for multiple replicates
-                    df_par[var_range[i], env_col] = reduce(
-                        vcat, [env[2:end] for env in envs]
-                    )
-                end # if
-            elseif var == "θ̲⁽ᵐ⁾"
-                # Add environment information for hyperparameter
-                df_par[var_range[i], env_col] = reduce(
-                    vcat, repeat([unique(reduce(vcat, envs))], n_bc)
-                )
-            elseif var == "logΛ̲̲"
-                if n_rep == 1
-                    # Add environment informatin for each Poisson parameter for
-                    # single replicate
-                    df_par[var_range[i], env_col] = repeat(
-                        envs, (n_bc + n_neutral)
-                    )
-                elseif n_rep > 1
-                    # Add environment information for each Poisson parameter for
-                    # multiple replicates
-                    df_par[var_range[i], env_col] = reduce(
-                        vcat,
-                        [repeat(env, (n_bc + n_neutral)) for env in envs]
-                    )
-                end # if
-            else
-                if n_rep == 1
-                    # Add environment information for each bc-related
-                    # variable for single replicate
-                    df_par[var_range[i], env_col] = reduce(
-                        vcat,
-                        repeat(unique(envs), n_bc)
-                    )
-                elseif n_rep > 1
-                    # Add environment information for each bc-related
-                    # variable for multiple replicates
-                    df_par[var_range[i], env_col] = reduce(
-                        vcat,
-                        repeat(unique(reduce(vcat, envs)), n_bc * n_rep)
-                    )
-                end #if
-            end # if
-        end # for
-    end # if
-
-    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #
-    # Add mutant id information
-    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #
-
-    # Add mutant id column. To be modified
-    df_par[!, :id] = Vector{Any}(undef, size(df_par, 1))
-
-    # Loop through var groups
-    for (i, var) in enumerate(var_groups)
-        if occursin("̲ₜ", var)
-            # Population mean fitness variables are not associated with any
-            # particular barcode.
-            df_par[var_range[i], :id] .= "N/A"
-        elseif var == "θ̲⁽ᵐ⁾"
-            if (n_env == 1) & (typeof(genotype_col) <: Nothing)
-                # Add ID information to hyperparameter for single environment
-                # and no genotypes
-                df_par[var_range[i], :id] = bc_ids
-            elseif (n_env == 1) & (typeof(genotype_col) <: Symbol)
-                # Add ID information to hyperparameter fitness for genotypes
-                df_par[var_range[i], :id] = unique(genotypes)
-            elseif n_env > 1
-                # Add ID information to hyperparameter for multiple environments
-                df_par[var_range[i], :id] = reduce(
-                    vcat,
-                    [repeat([bc], n_env) for bc in bc_ids]
-                )
-            end # if
-        elseif var == "logΛ̲̲"
-            if n_rep == 1
-                # Add ID information to Poisson parameter for single replicate
-                df_par[var_range[i], :id] = reduce(
-                    vcat,
-                    [repeat([bc], n_time) for bc in [neutral_ids; bc_ids]]
-                )
-            elseif n_rep > 1
-                df_par[var_range[i], :id] = reduce(
-                    vcat,
-                    [
-                        repeat([bc], n_time[rep])
-                        for rep = 1:n_rep
-                        for bc in [neutral_ids; bc_ids]
-                    ]
-                )
-            end # if
-        else
-            if (n_rep == 1) & (n_env == 1)
-                df_par[var_range[i], :id] = bc_ids
-            elseif (n_rep == 1) & (n_env > 1)
-                df_par[var_range[i], :id] = reduce(
-                    vcat,
-                    [repeat([bc], n_env) for bc in bc_ids]
-                )
-            elseif (n_rep > 1) & (n_env == 1)
-                df_par[var_range[i], :id] = reduce(
-                    vcat,
-                    repeat(bc_ids, n_rep)
-                )
-            elseif (n_rep > 1) & (n_env > 1)
-                df_par[var_range[i], :id] = reduce(
-                    vcat,
-                    [
-                        repeat([bc], n_env)
-                        for rep = 1:n_rep
-                        for bc in bc_ids
-                    ]
-                )
-
-            end # if
-        end # if
-    end # for
-
-    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #
-    # Add per-replicate fitness effect variables 
-    # (for hierarchical models on experimental replicates)
-    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #
-
-    if (n_rep > 1) & (length(var_groups) == 7)
-        # Sample θ̲ variables
-        θ_mat = hcat(
-            [
-                Random.rand(Distributions.Normal(x...), n_samples)
-                for x in eachrow(
-                    df_par[
-                        df_par.vartype.=="bc_hyperfitness",
-                        [:mean, :std]
-                    ]
-                )
-            ]...
-        )
-
-        # Sample τ̲ variables
-        τ_mat = exp.(
-            hcat(
-                [
-                    Random.rand(Distributions.Normal(x...), n_samples)
-                    for x in eachrow(
-                        df_par[
-                            df_par.vartype.=="bc_deviations",
-                            [:mean, :std]
-                        ]
-                    )
-                ]...
-            )
-        )
-
-        # Sample θ̲̃ variables
-        θ_tilde_mat = hcat(
-            [
-                Random.rand(Distributions.Normal(x...), n_samples)
-                for x in eachrow(
-                    df_par[
-                        df_par.vartype.=="bc_noncenter",
-                        [:mean, :std]
-                    ]
-                )
-            ]...
-        )
-
-
-        # Compute individual strains fitness values
-        s_mat = hcat(repeat([θ_mat], n_rep)...) .+ (τ_mat .* θ_tilde_mat)
-
-        # Compute mean and standard deviation and turn into dataframe
-        df_s = DF.DataFrame(
-            hcat(
-                [StatsBase.median.(eachcol(s_mat)),
-                    StatsBase.std.(eachcol(s_mat))]...
-            ),
-            ["mean", "std"]
-        )
-
-        # To add the rest of the columns, we will use the τ variables that have
-        # the same length. Let's locate such variables
-        τ_idx = occursin.("τ", string.(vars))
-
-        # Add extra columns
-        DF.insertcols!(
-            df_s,
-            :varname => replace.(df_par[τ_idx, :varname], "logτ" => "s"),
-            :vartype .=> "bc_fitness",
-            rep_col => df_par[τ_idx, rep_col],
-            :env => df_par[τ_idx, :env],
-            :id => df_par[τ_idx, :id]
-        )
-
-        # Append dataframes
-        DF.append!(df_par, df_s)
-    end # if
-
-    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #
-    # Add per strain fitness effect variables 
-    # (for hierarchical models on genotypes)
-    # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #
-
-    if (n_rep == 1) & (length(var_groups) == 7)
-        # Find unique genotypes
-        geno_unique = unique(genotypes)
-        # Define genotype indexes
-        geno_idx = indexin(genotypes, geno_unique)
-
-        # Sample θ̲ variables
-        θ_mat = hcat(
-            [
-                Random.rand(Distributions.Normal(x...), n_samples)
-                for x in eachrow(
-                    df_par[
-                        df_par.vartype.=="bc_hyperfitness",
-                        [:mean, :std]
-                    ]
-                )
-            ]...
-        )
-
-        # Sample τ̲ variables
-        τ_mat = exp.(
-            hcat(
-                [
-                    Random.rand(Distributions.Normal(x...), n_samples)
-                    for x in eachrow(
-                        df_par[
-                            df_par.vartype.=="bc_deviations",
-                            [:mean, :std]
-                        ]
-                    )
-                ]...
-            )
-        )
-
-        # Sample θ̲̃ variables
-        θ_tilde_mat = hcat(
-            [
-                Random.rand(Distributions.Normal(x...), n_samples)
-                for x in eachrow(
-                    df_par[
-                        df_par.vartype.=="bc_noncenter",
-                        [:mean, :std]
-                    ]
-                )
-            ]...
-        )
+    # Update variable types
+    update_variable_types!(df, var_groups, var_range)
 
+    # Add replicate information if needed
+    if typeof(rep_col) <: Symbol
+        add_replicate_info!(df, var_groups, var_range, output, rep_col)
+    end
 
-        # Compute individual strains fitness values
-        s_mat = θ_mat[:, geno_idx] .+ (τ_mat .* θ_tilde_mat)
+    # Add environment information if needed
+    if typeof(env_col) <: Symbol
+        add_environment_info!(df, var_groups, var_range, output, env_col)
+    end
 
-        # Compute mean and standard deviation and turn into dataframe
-        df_s = DF.DataFrame(
-            hcat(
-                [StatsBase.median.(eachcol(s_mat)),
-                    StatsBase.std.(eachcol(s_mat))]...
-            ),
-            ["mean", "std"]
-        )
+    # Add barcode information
+    add_barcode_info!(df, var_groups, var_range, output)
 
-        # To add the rest of the columns, we will use the τ variables that have
-        # the same length. Let's locate such variables
-        τ_idx = occursin.("τ", string.(vars))
-
-        # Add extra columns
-        DF.insertcols!(
-            df_s,
-            :varname => replace.(df_par[τ_idx, :varname], "logτ" => "s"),
-            :vartype .=> "bc_fitness",
-            :rep => df_par[τ_idx, :rep],
-            :env => df_par[τ_idx, :env],
-            :id => df_par[τ_idx, :id]
-        )
+    # Process hierarchical model if needed
+    if length(var_groups) == 7 && (output.n_rep > 1 || typeof(genotype_col) <: Symbol)
+        process_hierarchical_samples!(df, output, n_samples)
+    end
 
-        # Append dataframes
-        DF.append!(df_par, df_s)
-    end # if
+    return df
+end
 
-    return df_par
-end # function
\ No newline at end of file
+# ------------------------------------------------------------------------------
+# @doc raw"""
+#     advi_to_df(
+#         data::DataFrames.AbstractDataFrame, dist::Distribution.Sampleable,
+#         vars::Vector{<:Any}; kwargs
+#     )
+
+# Convert the output of automatic differentiation variational inference (ADVI) to
+# a tidy dataframe.
+
+# # Arguments
+# - `data::DataFrames.AbstractDataFrame`: Tidy dataframe used to perform the ADVI
+#   inference. See `BarBay.vi` module for the dataframe requirements.
+# - `dist::Distributions.Sampleable`: The ADVI posterior sampleable distribution
+#   object.
+# - `vars::Vector{<:Any}`: Vector of variable/parameter names from the ADVI run. 
+
+# ## Optional Keyword Arguments
+# - `id_col::Symbol=:barcode`: Name of the column in `data` containing the barcode
+#     identifier. The column may contain any type of entry.
+# - `time_col::Symbol=:time`: Name of the column in `data` defining the time point
+#   at which measurements were done. The column may contain any type of entry as
+#   long as `sort` will resulted in time-ordered names.
+# - `count_col::Symbol=:count`: Name of the column in `data` containing the raw
+#   barcode count. The column must contain entries of type `Int64`.
+# - `neutral_col::Symbol=:neutral`: Name of the column in `data` defining whether
+#   the barcode belongs to a neutral lineage or not. The column must contain
+#   entries of type `Bool`.
+# - `n_samples::Int=10_000`: Number of posterior samples to draw used for
+#   hierarchical models. Default is 10,000.
+
+# # Returns
+# - `df::DataFrames.DataFrame`: DataFrame containing summary statistics of
+# posterior samples for each parameter. Columns include:
+#     - `mean, std`: posterior mean and standard deviation for each variable.
+#     - `varname`: parameter name from the ADVI posterior distribution.
+#     - `vartype`: Description of the type of parameter. The types are:
+#         - `pop_mean_fitness`: Population mean fitness value `s̲ₜ`.
+#         - `pop_std`: (Nuisance parameter) Log of standard deviation in the
+#           likelihood function for the neutral lineages.
+#         - `bc_fitness`: Mutant relative fitness `s⁽ᵐ⁾`.
+#         - `bc_hyperfitness`: For hierarchical models, mutant hyperparameter
+#           that connects the fitness over multiple experimental replicates or
+#           multiple genotypes `θ⁽ᵐ⁾`.
+#         - `bc_noncenter`: (Nuisance parameter) For hierarchical models,
+#           non-centered samples used to connect the experimental replicates to
+#           the hyperparameter `θ̃⁽ᵐ⁾`.
+#         - `bc_deviations`: (Nuisance parameter) For hierarchical models,
+#           samples that define the log of the deviation from the hyperparameter
+#           fitness value `logτ⁽ᵐ⁾`.
+#         - `bc_std`: (Nuisance parameter) Log of standard deviation in the
+#           likelihood function for the mutant lineages.
+#         - `freq`: (Nuisance parameter) Log of the Poisson parameter used to
+#           define the frequency of each lineage.
+#     - rep: Experimental replicate number.
+#     - env: Environment for each parameter.
+#     - id: Mutant or neutral strain ID.
+
+# # Notes
+# - Converts multivariate posterior into summarized dataframe format.
+# - Adds metadata like parameter type, replicate, strain ID, etc.
+# - Can handle models with multiple replicates and environments.
+# - Can handle models with hierarchical structure on genotypes.
+# - Useful for post-processing ADVI results for further analysis and plotting.
+# """
+# function advi_to_df(
+#     data::DF.AbstractDataFrame,
+#     dist::Distributions.Sampleable,
+#     vars::Vector{<:Any};
+#     id_col::Symbol=:barcode,
+#     time_col::Symbol=:time,
+#     count_col::Symbol=:count,
+#     neutral_col::Symbol=:neutral,
+#     rep_col::Union{Nothing,Symbol}=nothing,
+#     env_col::Union{Nothing,Symbol}=nothing,
+#     genotype_col::Union{Nothing,Symbol}=nothing,
+#     n_samples::Int=10_000
+# )
+#     # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #
+#     # Extract information from data
+#     # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #
+
+#     # Convert data to arrays to obtain information
+#     data_arrays = data_to_arrays(
+#         data;
+#         id_col=id_col,
+#         time_col=time_col,
+#         count_col=count_col,
+#         neutral_col=neutral_col,
+#         rep_col=rep_col,
+#         env_col=env_col,
+#         genotype_col=genotype_col
+#     )
+
+#     # Extract number of neutral and mutant lineages
+#     n_neutral = data_arrays[:n_neutral]
+#     n_bc = data_arrays[:n_bc]
+
+#     # Extract bc lineages IDs
+#     bc_ids = data_arrays[:bc_ids]
+#     # Extract neutral lineages IDs
+#     neutral_ids = data_arrays[:neutral_ids]
+
+#     # Extract number of replicates
+#     n_rep = data_arrays[:n_rep]
+#     # Extract number of time points per replicate
+#     n_time = data_arrays[:n_time]
+
+#     # Extract number of environments
+#     n_env = data_arrays[:n_env]
+#     # Extract list of environments
+#     envs = data_arrays[:envs]
+#     # For multi-replicate inferences replicate the list of environments when
+#     # necessary
+#     if (n_env > 1) & (n_rep > 1) & !(typeof(envs) <: Vector{<:Vector})
+#         envs = repeat([envs], n_rep)
+#     end # if
+
+#     # Extract genotype information
+#     genotypes = data_arrays[:genotypes]
+#     n_geno = data_arrays[:n_geno]
+
+#     # Extract unique replicates
+#     if typeof(rep_col) <: Symbol
+#         reps = sort(unique(data[:, rep_col]))
+#     end # if
+#     # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #
+#     # Convert distribution parameters into tidy dataframe
+#     # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #
+
+#     # Extract parameters and convert to dataframe
+#     df_par = DF.DataFrame(hcat(dist.dist.m, dist.dist.σ), ["mean", "std"])
+
+#     # Add variable name. Notice that we use deepcopy to avoid the modification
+#     # of the original variable
+#     df_par[!, :varname] = deepcopy(vars)
+
+#     # Locate variable groups by identifying the first variable of each group
+#     var_groups = replace.(vars[occursin.("[1]", string.(vars))], "[1]" => "")
+
+#     # Define ranges for each variable
+#     var_range = dist.transform.ranges_out
+
+#     # Count how many variables exist per group
+#     var_count = length.(var_range)
+
+#     # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #
+#     # Add vartype information
+#     # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #
+
+#     # Define dictionary from varname to vartype
+#     varname_to_vartype = Dict(
+#         "s̲ₜ" => "pop_mean_fitness",
+#         "logσ̲ₜ" => "pop_std",
+#         "s̲⁽ᵐ⁾" => "bc_fitness",
+#         "logσ̲⁽ᵐ⁾" => "bc_std",
+#         "θ̲⁽ᵐ⁾" => "bc_hyperfitness",
+#         "θ̲̃⁽ᵐ⁾" => "bc_noncenter",
+#         "logτ̲⁽ᵐ⁾" => "bc_deviations",
+#         "logΛ̲̲" => "log_poisson",
+#     )
+
+#     # Add column to be modified with vartype
+#     df_par[!, :vartype] .= "tmp"
+#     # Loop through variables
+#     for (i, var) in enumerate(var_groups)
+#         df_par[var_range[i], :vartype] .= varname_to_vartype[var]
+#     end # for
+
+#     # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #
+#     # Add replicate number information
+#     # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #
+
+#     if (n_rep == 1) & (typeof(genotype_col) <: Nothing) &
+#        (length(var_groups) > 5)
+#         # Report error if no env or rep info is given when needed
+#         error("The distribution seems to match a hierarchical model but no genotype or replicate information was provided")
+#     elseif (n_rep == 1) & (length(var_groups) == 5) & (n_env == 1)
+#         println("Single replicate non-hierarchical model")
+#         # Add replicate for single-replicate model for consistency
+#         df_par[!, :rep] .= "R1"
+#     elseif (n_rep == 1) & (length(var_groups) == 7) &
+#            (typeof(genotype_col) <: Symbol)
+#         println("Single replicate genotype hierarchical model")
+#         # Add replicate for single-replicate model for consistency
+#         df_par[!, :rep] .= "R1"
+#     elseif (n_rep > 1)
+#         # Add replicate column to be modified
+#         df_par[!, rep_col] = Vector{Any}(undef, size(df_par, 1))
+#         # Loop through var groups
+#         for (i, var) in enumerate(var_groups)
+#             if occursin("̲ₜ", var)
+#                 # Add replicate for population mean fitness variables
+#                 df_par[var_range[i], rep_col] = reduce(
+#                     vcat,
+#                     [
+#                         repeat([reps[j]], n_time[j] - 1) for j = 1:n_rep
+#                     ]
+#                 )
+#             elseif var == "θ̲⁽ᵐ⁾"
+#                 # Add information for hyperparameter that does not have
+#                 # replicate information
+#                 df_par[var_range[i], rep_col] .= "N/A"
+#             elseif var == "logΛ̲̲"
+#                 df_par[var_range[i], rep_col] = reduce(
+#                     vcat,
+#                     [
+#                         repeat(
+#                             [reps[j]],
+#                             (n_bc + n_neutral) * (n_time[j])
+#                         ) for j = 1:n_rep
+#                     ]
+#                 )
+#             else
+#                 # Information on the other variables depends on environments
+#                 if n_env == 1
+#                     # Add information for bc-related parameters when no
+#                     # environment information is provided
+#                     df_par[var_range[i], rep_col] = reduce(
+#                         vcat,
+#                         [
+#                             repeat([reps[j]], n_bc) for j = 1:n_rep
+#                         ]
+#                     )
+#                 else
+#                     df_par[var_range[i], rep_col] = reduce(
+#                         vcat,
+#                         [
+#                             repeat([reps[j]], n_bc * n_env) for j = 1:n_rep
+#                         ]
+#                     )
+#                 end # if
+#             end # if
+#         end # for
+#     end # if
+
+#     # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #
+#     # Add environment information
+#     # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #
+
+#     if (n_env == 1)
+#         # Add environment information for consistency
+#         df_par[!, :env] .= "env1"
+#     elseif (n_env > 1)
+#         # Add replicate column to be modified
+#         df_par[!, env_col] = Vector{Any}(undef, size(df_par, 1))
+#         # Loop through var groups
+#         for (i, var) in enumerate(var_groups)
+#             if occursin("̲ₜ", var)
+#                 if n_rep == 1
+#                     # Add environment information for single replicate
+#                     df_par[var_range[i], env_col] = envs[2:end]
+#                 elseif n_rep > 1
+#                     # Add environment information for multiple replicates
+#                     df_par[var_range[i], env_col] = reduce(
+#                         vcat, [env[2:end] for env in envs]
+#                     )
+#                 end # if
+#             elseif var == "θ̲⁽ᵐ⁾"
+#                 # Add environment information for hyperparameter
+#                 df_par[var_range[i], env_col] = reduce(
+#                     vcat, repeat([unique(reduce(vcat, envs))], n_bc)
+#                 )
+#             elseif var == "logΛ̲̲"
+#                 if n_rep == 1
+#                     # Add environment informatin for each Poisson parameter for
+#                     # single replicate
+#                     df_par[var_range[i], env_col] = repeat(
+#                         envs, (n_bc + n_neutral)
+#                     )
+#                 elseif n_rep > 1
+#                     # Add environment information for each Poisson parameter for
+#                     # multiple replicates
+#                     df_par[var_range[i], env_col] = reduce(
+#                         vcat,
+#                         [repeat(env, (n_bc + n_neutral)) for env in envs]
+#                     )
+#                 end # if
+#             else
+#                 if n_rep == 1
+#                     # Add environment information for each bc-related
+#                     # variable for single replicate
+#                     df_par[var_range[i], env_col] = reduce(
+#                         vcat,
+#                         repeat(unique(envs), n_bc)
+#                     )
+#                 elseif n_rep > 1
+#                     # Add environment information for each bc-related
+#                     # variable for multiple replicates
+#                     df_par[var_range[i], env_col] = reduce(
+#                         vcat,
+#                         repeat(unique(reduce(vcat, envs)), n_bc * n_rep)
+#                     )
+#                 end #if
+#             end # if
+#         end # for
+#     end # if
+
+#     # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #
+#     # Add mutant id information
+#     # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #
+
+#     # Add mutant id column. To be modified
+#     df_par[!, :id] = Vector{Any}(undef, size(df_par, 1))
+
+#     # Loop through var groups
+#     for (i, var) in enumerate(var_groups)
+#         if occursin("̲ₜ", var)
+#             # Population mean fitness variables are not associated with any
+#             # particular barcode.
+#             df_par[var_range[i], :id] .= "N/A"
+#         elseif var == "θ̲⁽ᵐ⁾"
+#             if (n_env == 1) & (typeof(genotype_col) <: Nothing)
+#                 # Add ID information to hyperparameter for single environment
+#                 # and no genotypes
+#                 df_par[var_range[i], :id] = bc_ids
+#             elseif (n_env == 1) & (typeof(genotype_col) <: Symbol)
+#                 # Add ID information to hyperparameter fitness for genotypes
+#                 df_par[var_range[i], :id] = unique(genotypes)
+#             elseif n_env > 1
+#                 # Add ID information to hyperparameter for multiple environments
+#                 df_par[var_range[i], :id] = reduce(
+#                     vcat,
+#                     [repeat([bc], n_env) for bc in bc_ids]
+#                 )
+#             end # if
+#         elseif var == "logΛ̲̲"
+#             if n_rep == 1
+#                 # Add ID information to Poisson parameter for single replicate
+#                 df_par[var_range[i], :id] = reduce(
+#                     vcat,
+#                     [repeat([bc], n_time) for bc in [neutral_ids; bc_ids]]
+#                 )
+#             elseif n_rep > 1
+#                 df_par[var_range[i], :id] = reduce(
+#                     vcat,
+#                     [
+#                         repeat([bc], n_time[rep])
+#                         for rep = 1:n_rep
+#                         for bc in [neutral_ids; bc_ids]
+#                     ]
+#                 )
+#             end # if
+#         else
+#             if (n_rep == 1) & (n_env == 1)
+#                 df_par[var_range[i], :id] = bc_ids
+#             elseif (n_rep == 1) & (n_env > 1)
+#                 df_par[var_range[i], :id] = reduce(
+#                     vcat,
+#                     [repeat([bc], n_env) for bc in bc_ids]
+#                 )
+#             elseif (n_rep > 1) & (n_env == 1)
+#                 df_par[var_range[i], :id] = reduce(
+#                     vcat,
+#                     repeat(bc_ids, n_rep)
+#                 )
+#             elseif (n_rep > 1) & (n_env > 1)
+#                 df_par[var_range[i], :id] = reduce(
+#                     vcat,
+#                     [
+#                         repeat([bc], n_env)
+#                         for rep = 1:n_rep
+#                         for bc in bc_ids
+#                     ]
+#                 )
+
+#             end # if
+#         end # if
+#     end # for
+
+#     # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #
+#     # Add per-replicate fitness effect variables 
+#     # (for hierarchical models on experimental replicates)
+#     # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #
+
+#     if (n_rep > 1) & (length(var_groups) == 7)
+#         # Sample θ̲ variables
+#         θ_mat = hcat(
+#             [
+#                 Random.rand(Distributions.Normal(x...), n_samples)
+#                 for x in eachrow(
+#                     df_par[
+#                         df_par.vartype.=="bc_hyperfitness",
+#                         [:mean, :std]
+#                     ]
+#                 )
+#             ]...
+#         )
+
+#         # Sample τ̲ variables
+#         τ_mat = exp.(
+#             hcat(
+#                 [
+#                     Random.rand(Distributions.Normal(x...), n_samples)
+#                     for x in eachrow(
+#                         df_par[
+#                             df_par.vartype.=="bc_deviations",
+#                             [:mean, :std]
+#                         ]
+#                     )
+#                 ]...
+#             )
+#         )
+
+#         # Sample θ̲̃ variables
+#         θ_tilde_mat = hcat(
+#             [
+#                 Random.rand(Distributions.Normal(x...), n_samples)
+#                 for x in eachrow(
+#                     df_par[
+#                         df_par.vartype.=="bc_noncenter",
+#                         [:mean, :std]
+#                     ]
+#                 )
+#             ]...
+#         )
+
+
+#         # Compute individual strains fitness values
+#         s_mat = hcat(repeat([θ_mat], n_rep)...) .+ (τ_mat .* θ_tilde_mat)
+
+#         # Compute mean and standard deviation and turn into dataframe
+#         df_s = DF.DataFrame(
+#             hcat(
+#                 [StatsBase.median.(eachcol(s_mat)),
+#                     StatsBase.std.(eachcol(s_mat))]...
+#             ),
+#             ["mean", "std"]
+#         )
+
+#         # To add the rest of the columns, we will use the τ variables that have
+#         # the same length. Let's locate such variables
+#         τ_idx = occursin.("τ", string.(vars))
+
+#         # Add extra columns
+#         DF.insertcols!(
+#             df_s,
+#             :varname => replace.(df_par[τ_idx, :varname], "logτ" => "s"),
+#             :vartype .=> "bc_fitness",
+#             rep_col => df_par[τ_idx, rep_col],
+#             :env => df_par[τ_idx, :env],
+#             :id => df_par[τ_idx, :id]
+#         )
+
+#         # Append dataframes
+#         DF.append!(df_par, df_s)
+#     end # if
+
+#     # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #
+#     # Add per strain fitness effect variables 
+#     # (for hierarchical models on genotypes)
+#     # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #
+
+#     if (n_rep == 1) & (length(var_groups) == 7)
+#         # Find unique genotypes
+#         geno_unique = unique(genotypes)
+#         # Define genotype indexes
+#         geno_idx = indexin(genotypes, geno_unique)
+
+#         # Sample θ̲ variables
+#         θ_mat = hcat(
+#             [
+#                 Random.rand(Distributions.Normal(x...), n_samples)
+#                 for x in eachrow(
+#                     df_par[
+#                         df_par.vartype.=="bc_hyperfitness",
+#                         [:mean, :std]
+#                     ]
+#                 )
+#             ]...
+#         )
+
+#         # Sample τ̲ variables
+#         τ_mat = exp.(
+#             hcat(
+#                 [
+#                     Random.rand(Distributions.Normal(x...), n_samples)
+#                     for x in eachrow(
+#                         df_par[
+#                             df_par.vartype.=="bc_deviations",
+#                             [:mean, :std]
+#                         ]
+#                     )
+#                 ]...
+#             )
+#         )
+
+#         # Sample θ̲̃ variables
+#         θ_tilde_mat = hcat(
+#             [
+#                 Random.rand(Distributions.Normal(x...), n_samples)
+#                 for x in eachrow(
+#                     df_par[
+#                         df_par.vartype.=="bc_noncenter",
+#                         [:mean, :std]
+#                     ]
+#                 )
+#             ]...
+#         )
+
+
+#         # Compute individual strains fitness values
+#         s_mat = θ_mat[:, geno_idx] .+ (τ_mat .* θ_tilde_mat)
+
+#         # Compute mean and standard deviation and turn into dataframe
+#         df_s = DF.DataFrame(
+#             hcat(
+#                 [StatsBase.median.(eachcol(s_mat)),
+#                     StatsBase.std.(eachcol(s_mat))]...
+#             ),
+#             ["mean", "std"]
+#         )
+
+#         # To add the rest of the columns, we will use the τ variables that have
+#         # the same length. Let's locate such variables
+#         τ_idx = occursin.("τ", string.(vars))
+
+#         # Add extra columns
+#         DF.insertcols!(
+#             df_s,
+#             :varname => replace.(df_par[τ_idx, :varname], "logτ" => "s"),
+#             :vartype .=> "bc_fitness",
+#             :rep => df_par[τ_idx, :rep],
+#             :env => df_par[τ_idx, :env],
+#             :id => df_par[τ_idx, :id]
+#         )
+
+#         # Append dataframes
+#         DF.append!(df_par, df_s)
+#     end # if
+
+#     return df_par
+# end # function
\ No newline at end of file