diff --git a/.github/copy_to_branch.sh b/.github/copy_to_branch.sh index 068e528c..a01815ff 100755 --- a/.github/copy_to_branch.sh +++ b/.github/copy_to_branch.sh @@ -1,16 +1,15 @@ # Designate desired directories and files -FILES=( -'data-raw/'* -'man/'* -'R/'* -'tests/'* -'vignettes/'* -'DESCRIPTION' -'NAMESPACE' -'NEWS.md' -'README.md' -'inst/CITATION' -) +SRC_FOLDER_PATHS=(data-raw man R tests vignettes) +SRC_FILE_PATHS=(DESCRIPTION LICENSE NAMESPACE NEWS.md README.md inst/CITATION) + +# Get files from directories +FILES=$(find "${SRC_FOLDER_PATHS[@]}" -type f) + +# Add indicated individual files +for F in "${SRC_FILE_PATHS[@]}" +do +FILES+=" ${F}" +done echo "${FILES[@]}" @@ -22,9 +21,8 @@ git fetch # Checkout target branch git checkout $TARGET_BRANCH # copy files from the branch the action is being run upon -for F in ${FILES}; do -git checkout $SRC_BRANCH -- ${F} -done +SRC_BRANCH=$(git symbolic-ref --short HEAD) +git checkout $SRC_BRANCH -- $FILES # Commit to the repository (ignore if no changes) git add -A git diff-index --quiet HEAD || git commit -am "update files" diff --git a/.github/workflows/bioc_branch.yml b/.github/workflows/bioc_branch.yml index 3ab86b90..e169b73e 100644 --- a/.github/workflows/bioc_branch.yml +++ b/.github/workflows/bioc_branch.yml @@ -17,7 +17,6 @@ jobs: - uses: actions/checkout@v2 - name: copy env: - SRC_BRANCH: 'master' - TARGET_BRANCH: 'bioconductor' + TARGET_BRANCH: 'master' run: .github/copy_to_branch.sh shell: bash diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index 03e2c6ba..6231d5ea 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -22,7 +22,7 @@ jobs: permissions: contents: write steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: r-lib/actions/setup-pandoc@v2 @@ -41,7 +41,7 @@ jobs: - name: Deploy to GitHub pages 🚀 if: github.event_name != 'pull_request' - uses: JamesIves/github-pages-deploy-action@v4.4.1 + uses: JamesIves/github-pages-deploy-action@v4.5.0 with: clean: false branch: gh-pages diff --git a/DESCRIPTION b/DESCRIPTION index 372e879b..936a2453 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: dominoSignal Title: Cell Communication Analysis for Single Cell RNA Sequencing -Version: 0.99.2 +Version: 0.99.3 Authors@R: c( person("Christopher", "Cherry", role = c("aut"), email = "ccherry.6574@gmail.com", comment = c(ORCID = "0000-0002-5481-0055")), person("Jacob T", "Mitchell", role = c("aut", "cre"), email = "jmitch81@jhmi.edu", comment = c(ORCID = "0000-0002-5370-9692")), @@ -11,7 +11,7 @@ Authors@R: c( person("Jennifer", "Elisseeff", role = "ctb", email = "jhe@jhu.edu", comment = c(ORCID = "0000-0002-5066-1996")) ) Description: - dominoSignal is a package developed to analyze cell signaling through ligand - receptor - transcription factor networks in scRNAseq data. It takes as input information transcriptomic data, requiring counts, z-scored counts, and cluster labels, as well as information on transcription factor activation (such as from SCENIC) and a database of ligand and receptor pairings (such as from cellphoneDB). This package creates an object storing ligand - receptor - transcription factor linkages by cluster and provides several methods for exploring, summarizing, and visualizing the analysis. + dominoSignal is a package developed to analyze cell signaling through ligand - receptor - transcription factor networks in scRNAseq data. It takes as input information transcriptomic data, requiring counts, z-scored counts, and cluster labels, as well as information on transcription factor activation (such as from SCENIC) and a database of ligand and receptor pairings (such as from CellPhoneDB). This package creates an object storing ligand - receptor - transcription factor linkages by cluster and provides several methods for exploring, summarizing, and visualizing the analysis. BugReports: https://github.com/FertigLab/dominoSignal/issues Depends: R(>= 4.2.0), @@ -31,7 +31,7 @@ Imports: License: GPL-3 | file LICENSE Encoding: UTF-8 LazyData: false -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 biocViews: SystemsBiology, SingleCell, diff --git a/NAMESPACE b/NAMESPACE index 5b41425e..574e441b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -24,6 +24,7 @@ export(feat_heatmap) export(gene_network) export(incoming_signaling_heatmap) export(mean_ligand_expression) +export(mock_linkage_summary) export(plot_differential_linkages) export(rename_clusters) export(signaling_heatmap) @@ -32,6 +33,8 @@ export(summarize_linkages) export(test_differential_linkages) exportClasses(domino) exportClasses(linkage_summary) +exportMethods(print) +exportMethods(show) import(ComplexHeatmap) import(biomaRt) import(circlize) diff --git a/NEWS.md b/NEWS.md index ad565e86..e8fa2634 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,25 @@ +# dominoSignal v0.99.3 + +## create_domino Function Warnings + +- Disabled exact p-value computation for the correlation test between receptor expression and features to prevent repeated warning messages due to tied ranks during Spearman correlation calculation + +## Vignettes + +- Updated non-functional links to functional URLs +- All vignettes explicitly state the seed used when executing their code +- The dominoSignal Object vignette states the purpose of the code used to download and import data from a BioFileCache to demonstrate applications of the domino objects on a real data object that is too large to include inside the package + +## Testing Data + +- Data used for examples and unit tests is now stored in the package's data/ directory. +- All usage of the triple colon operator to access internal data from sysdata have been replaced with usage of data() +- Linkage summaries for demonstrating differential linkage testing and plotting are generated within unit test scripts rather than being stored within the package. +- Examples are run with echo = FALSE to cut down on lines of code printed in example pages. +- Examples for create_domino are run with verbose = FALSE +- Fixed example of changing colors for circos_ligand_receptor +- Fixed example of boolean representation of cor_heatmap + # dominoSignal v0.99.2 ## Package Name @@ -6,9 +28,9 @@ ## Bioconductor Standards -- Update to Vignettes presenting application of the DominoSignal pipeline on data formatted as a SingleCellExperiment object +- Update to vignettes presenting application of the DominoSignal pipeline on data formatted as a SingleCellExperiment object - Implemented caching of example data by BiocCache to meet package size limits -- Removal of depreciated scripts for running SCENIC. Tutorials for running SCENIC are still present in vignettes +- Removal of deprecated scripts for running SCENIC. Tutorials for running SCENIC are still present in vignettes - Corrected BiocCheck notes pertaining to coding practices including paste in conditional statements, functions with dontrun examples, usage of seq_len or seq_along in place of seq, and usage of vapply in place of sapply @@ -24,17 +46,17 @@ - Plotting function for differential linkages ## Package structure -- Adjustments made to meet BioConductor standards +- Adjustments made to meet Bioconductor standards # dominoSignal v0.2.1 ## Updates to domino object construction -- Uniform formats for inputs of receptor-ligand interaction databases, transcription factor activity features, and regulon gene lists for operability with alternative databases and transcription factor activation inference methods +- Uniform formats for inputs of receptor - ligand interaction databases, transcription factor activity features, and regulon gene lists for operability with alternative databases and transcription factor activation inference methods - Helper functions for reformatting pySCENIC outputs and CellPhoneDB database files to domino-readable uniform formats - Assessment of transcription factor linkage with receptors that function as a heteromeric complex based on correlation between transcription factor activity and all receptor component genes - Assessment of complex ligand expression as the mean of component gene expression for plotting functions - Minimum threshold for the percentage of cells in a cluster expressing a receptor gene for the receptor to be called active within the cluster -- Additional linkage slots for active receptors in each cluster, transcription factor-receptor linkages for each cluster, and incoming ligands for active receptors on each cluster +- Additional linkage slots for active receptors in each cluster, transcription factor - receptor linkages for each cluster, and incoming ligands for active receptors on each cluster ## Plotting functions - Chord plot of ligand expression targeting a specified receptor where chord widths correspond to the quantity of ligand expression by each cell cluster @@ -43,7 +65,7 @@ ## Bugfixes - Added host option for gene ortholog conversions using `{biomaRt}` for access to maintained mirrors -- Transcription factor-target linkages are now properly stored so that receptors in a transcription factor's regulon are excluded from linkage +- Transcription factor - target linkages are now properly stored so that receptors in a transcription factor's regulon are excluded from linkage - Ligand nodes sizes in gene networks correspond to quantity of ligand expression - `create_domino()` can be run without providing a regulon list - References to the host GitHub repository have been updated to [Elisseeff-Lab](https://github.com/Elisseeff-Lab/domino) diff --git a/R/class_definitions.R b/R/class_definitions.R index 3693abbd..00a87e76 100644 --- a/R/class_definitions.R +++ b/R/class_definitions.R @@ -2,18 +2,18 @@ #' @importClassesFrom Matrix dgCMatrix #' NULL -#' The domino Class +#' The domino class #' #' The domino class contains all information necessary to calculate receptor-ligand #' signaling. It contains z-scored expression, cell cluster labels, feature values, #' and a referenced receptor-ligand database formatted as a receptor-ligand map. #' Calculated intermediate values are also stored. #' -#' @slot db_info List of data sets from lr database. +#' @slot db_info List of data sets from ligand - receptor database #' @slot counts Raw count gene expression data #' @slot z_scores Matrix of z-scored expression data with cells as columns #' @slot clusters Named factor with cluster identity of each cell -#' @slot features Matrix of features to correlate receptor-ligand expression with. Cells are columns and features are rows. +#' @slot features Matrix of features (TFs) to correlate receptor - ligand expression with. Cells are columns and features are rows. #' @slot cor Correlation matrix of receptor expression to features. #' @slot linkages List of lists containing info linking cluster->tf->rec->lig #' @slot clust_de Data frame containing differential expression results for features by cluster. @@ -23,7 +23,7 @@ NULL #' @name domino-class #' @rdname domino-class #' @exportClass domino -#' @return an instance of class `domino ` +#' @return An instance of class `domino ` #' domino <- methods::setClass( Class = "domino", @@ -74,22 +74,24 @@ linkage_summary <- setClass( #' #' Prints a summary of a domino object #' -#' @param x Domino object -#' @return a printed description of the number of cell clusters in the object -#' @keywords internal +#' @param x A domino object +#' @param ... Additional arguments to be passed to other methods +#' @return A printed description of the number of cells and clusters in the domino object +#' @export #' @examples -#' print(dominoSignal:::pbmc_dom_built_tiny) -#' +#' example(build_domino, echo = FALSE) +#' print(pbmc_dom_built_tiny) +#' setMethod("print", "domino", function(x, ...) { if (x@misc$build) { message( "A domino object of ", length(x@clusters), " cells - Contains signaling between", - length(levels(x@clusters)), "clusters - Built with a maximum of", as.integer(x@misc$build_vars["max_tf_per_clust"]), - "TFs per cluster - and a maximum of", as.integer(x@misc$build_vars["max_rec_per_tf"]), - "receptors per TF\n" + Contains signaling between ", + length(levels(x@clusters)), " clusters + Built with a maximum of ", x@misc$build_vars["max_tf_per_clust"], + " TFs per cluster + and a maximum of ", x@misc$build_vars["max_rec_per_tf"], + " receptors per TF\n" ) } else { message(c("A domino object of ", length(x@clusters), " cells\n", "A signaling network has not been built\n"), @@ -101,13 +103,12 @@ setMethod("print", "domino", function(x, ...) { #' #' Shows content overview of domino object #' -#' @param object Domino object -#' @return a printed description of the number of cells in a domino object and its build status -#' @keywords internal +#' @param object A domino object +#' @return A printed description of cell numbers and clusters in the object +#' @export #' @examples -#' dominoSignal:::pbmc_dom_built_tiny -#' -#' show(dominoSignal:::pbmc_dom_built_tiny) +#' example(build_domino, echo = FALSE) +#' show(pbmc_dom_built_tiny) #' setMethod("show", "domino", function(object) { if (object@misc$build) { diff --git a/R/convenience_fxns.R b/R/convenience_fxns.R index fa75c837..d8d8f6a4 100644 --- a/R/convenience_fxns.R +++ b/R/convenience_fxns.R @@ -5,23 +5,18 @@ NULL #' Renames clusters in a domino object #' -#' This function reads in a receptor ligand signaling database, cell level -#' features of some kind (ie. output from pySCENIC), z-scored single cell data, -#' and cluster id for single cell data, calculates a correlation matrix between -#' receptors and other features (this is transcription factor module scores if -#' using pySCENIC), and finds features enriched by cluster. It will return a -#' domino object prepared for [build_domino()], which will calculate a signaling -#' network. +#' This function renames the clusters used to build a domino object #' -#' @param dom Domino object to rename clusters in -#' @param clust_conv Named vector of conversions from old to new clusters. Values are taken as new clusters IDs and names as old cluster IDs. -#' @param warning Logical. If TRUE, will warn if a cluster is not found in the conversion table. Default is FALSE. +#' @param dom a domino object to rename clusters in +#' @param clust_conv named vector of conversions from old to new clusters. Values are taken as new clusters IDs and names as old cluster IDs. +#' @param warning logical. If TRUE, will warn if a cluster is not found in the conversion table. Default is FALSE. #' @return A domino object with clusters renamed in all applicable slots. #' @export #' @examples +#' example(build_domino, echo = FALSE) #' new_clust <- c("CD8_T_cell" = "CD8+ T Cells", #' "CD14_monocyte" = "CD14+ Monocytes", "B_cell" = "B Cells") -#' pbmc_dom_built_tiny <- rename_clusters(dominoSignal:::pbmc_dom_built_tiny, new_clust) +#' pbmc_dom_built_tiny <- rename_clusters(pbmc_dom_built_tiny, new_clust) #' rename_clusters <- function(dom, clust_conv, warning = FALSE) { if (is.null(dom@clusters)) { @@ -73,17 +68,17 @@ rename_clusters <- function(dom, clust_conv, warning = FALSE) { } -#' Convert Genes Using Table +#' Convert genes using a table #' -#' Takes a vector of gene inputs and a conversion table and returns a +#' Takes a vector of gene inputs and a conversion table and returns a #' converted gene table #' -#' @param genes The genes to convert. -#' @param from Gene symbol type of the input (ENSG, ENSMUSG, HGNC, MGI) -#' @param to Desired gene symbol type for the output (HGNC, MGI) -#' @param conversion_table A data.frame with column names corresponding to gene symbol types (mm.ens, hs.ens, mgi, hgnc) +#' @param genes the genes to convert +#' @param from gene symbol type of the input (ENSG, ENSMUSG, HGNC, MGI) +#' @param to desired gene symbol type for the output (HGNC, MGI) +#' @param conversion_table a data frame with column names corresponding to gene symbol types (mm.ens, hs.ens, mgi, hgnc) #' and rows corresponding to the gene symbols themselves -#' @return Data frame of genes with original and corresponding converted symbols +#' @return A data frame of genes with original and corresponding converted symbols #' @keywords internal #' table_convert_genes <- function(genes, from, to, conversion_table) { diff --git a/R/data.R b/R/data.R new file mode 100644 index 00000000..887df41b --- /dev/null +++ b/R/data.R @@ -0,0 +1,47 @@ +#' SCENIC AUC subset +#' +#' A subset of SCENIC AUCs as applied to PBMC data. +#' +#' @format A list of: +#' \describe{ +#' \item{auc_tiny}{A subset of SCENIC AUCs} +#' \item{regulons_tiny}{A subset of SCENIC regulons} +#' } +#' +#' @source +#' @usage data("SCENIC") +"SCENIC" + + +#' PBMC RNAseq data subset +#' +#' A subset of the results of PBMC RNA-seq data. +#' +#' @format A list of:: +#' \describe{ +#' \item{RNA_count_tiny}{A subset of PBMC RNA-seq data: counts assay} +#' \item{RNA_zscore_tiny}{A subset of PBMC RNA-seq data: zscore assay} +#' \item{clusters_tiny}{A subset of PBMC RNA-seq data: clusters as defined by cell_type} +#' } +#' +#' @source +#' @usage data("PBMC") +"PBMC" + + +#' CellPhoneDB subset +#' +#' A list of four subsets of CellPhoneDB data. +#' +#' +#' @format A list of: +#' \describe{ +#' \item{genes_tiny}{A subet of CellPhoneDB gene_input.csv} +#' \item{proteins_tiny}{A subset of CellPhoneDB protein_input.csv} +#' \item{complexes_tiny}{A subset of CellPhoneDB complex_input.csv} +#' \item{interactions_tiny}{A subset of CellPhoneDB interaction_input.csv} +#' } +#' +#' @source +#' @usage data("CellPhoneDB") +"CellPhoneDB" diff --git a/R/differential_fxns.R b/R/differential_fxns.R index aaafb35c..fa6ed941 100644 --- a/R/differential_fxns.R +++ b/R/differential_fxns.R @@ -4,21 +4,57 @@ NULL #' Summarize linkages from multiple domino objects #' -#' Creates a linkage_summary object storing the linkages learned in different domino objects as nested lists to facilitate comparisons of networks learned by domino across subject covariates. +#' Creates a [linkage_summary()] object storing the linkages learned in different domino objects as nested lists to facilitate comparisons of networks learned by domino across subject covariates. #' -#' @param domino_results list of domino result with one domino object per subject. Names from the list must match subject_names. -#' @param subject_meta dataframe that includes the subject features by which the objects could be grouped. The first column should must be subject names +#' @param domino_results list of domino result with one domino object per subject. Names from the list must match subject_names +#' @param subject_meta data frame that includes the subject features by which the objects could be grouped. The first column should must be subject names #' @param subject_names vector of subject names in domino_results. If NULL, defaults to first column of subject_meta. -#' @return A linkage summary class object consisting of nested lists of the active transcription factors, active receptors, and incoming ligands for each cluster across multiple domino results. +#' @return A linkage summary class object consisting of nested lists of the active transcription factors, active receptors, and incoming ligands for each cluster across multiple domino results #' @export -#' @examples -#' dom_ls <- dominoSignal:::dom_ls_tiny +#' @examples +#' example(build_domino, echo = FALSE) +#' +#' #create alternative clustering by shuffling cluster assignments +#' clusters_tiny_alt <- setNames( +#' PBMC$clusters_tiny[c(121:240, 1:120, 241:360)], +#' names(PBMC$clusters_tiny) +#' ) +#' clusters_tiny_alt <- as.factor(clusters_tiny_alt) +#' +#' #build an alternative domino object +#' pbmc_dom_tiny_alt <- create_domino( +#' rl_map = rl_map_tiny, +#' features = SCENIC$auc_tiny, +#' counts = PBMC$RNA_count_tiny, +#' z_scores = PBMC$RNA_zscore_tiny, +#' clusters = clusters_tiny_alt, +#' tf_targets = regulon_list_tiny, +#' use_clusters = TRUE, +#' use_complexes = TRUE, +#' remove_rec_dropout = FALSE +#' ) +#' +#' pbmc_dom_built_tiny_alt <- build_domino( +#' dom = pbmc_dom_tiny_alt, +#' min_tf_pval = .05, +#' max_tf_per_clust = Inf, +#' max_rec_per_tf = Inf, +#' rec_tf_cor_threshold = .1, +#' min_rec_percentage = 0.01 +#' ) +#' +#' #create a list of domino objects +#' dom_ls <- list( +#' dom1 = pbmc_dom_built_tiny, +#' dom2 = pbmc_dom_built_tiny_alt +#') +#' +#' #compare the linkages across the two domino objects #' meta_df <- data.frame("ID" = c("dom1", "dom2"), "group" = c("A", "B")) #' summarize_linkages( -#' domino_results = dom_ls, subject_meta = meta_df, +#' domino_results = dom_ls, subject_meta = meta_df, #' subject_names = meta_df$ID #') -#' summarize_linkages <- function(domino_results, subject_meta, subject_names = NULL) { if (!is(domino_results, "list")) { stop("domino_results must be provided as a named list where names correspond to subject names") @@ -99,16 +135,16 @@ summarize_linkages <- function(domino_results, subject_meta, subject_names = NUL #' #' Count occurrences of linkages across multiple domino results from a linkage summary #' -#' @param linkage_summary a linkage_summary object +#' @param linkage_summary a [linkage_summary()] object #' @param cluster the name of the cell cluster being compared across multiple domino results -#' @param group.by the name of the column in linkage_summary\@subject_meta by which to group subjects for counting. If NULL, only total counts of linkages for linkages in the cluster across all subjects is given. -#' @param linkage a stored linkage from the domino object. Can compare ('tfs', 'rec', 'incoming_lig', 'tfs_rec', 'rec_lig') +#' @param group.by the name of the column in `linkage_summary@subject_meta` by which to group subjects for counting. If NULL, only total counts of linkages for linkages in the cluster across all subjects is given. +#' @param linkage a stored linkage from the domino object. Can compare any of 'tfs', 'rec', 'incoming_lig', 'tfs_rec', or 'rec_lig' #' @param subject_names a vector of subject_names from the linkage_summary to be compared. If NULL, all subject_names in the linkage summary are included in counting. -#' @return a data frame with columns for the unique linkage features and the counts of how many times the linkage occured across the compared domino results. If group.by is used, counts of the linkages are also provided as columns named by the unique values of the group.by variable. +#' @return A data frame with columns for the unique linkage features and the counts of how many times the linkage occured across the compared domino results. If group.by is used, counts of the linkages are also provided as columns named by the unique values of the group.by variable. #' @export #' @examples #' count_linkage( -#' linkage_summary = dominoSignal:::linkage_sum_tiny, cluster = "C1", +#' linkage_summary = mock_linkage_summary(), cluster = "C1", #' group.by = "group", linkage = "rec") #' count_linkage <- function(linkage_summary, cluster, group.by = NULL, linkage = "rec_lig", subject_names = NULL) { @@ -145,16 +181,16 @@ count_linkage <- function(linkage_summary, cluster, group.by = NULL, linkage = " #' #' Statistical test for differential linkages across multiple domino results #' -#' @param linkage_summary a linkage_summary object +#' @param linkage_summary a [linkage_summary()] object #' @param cluster the name of the cell cluster being compared across multiple domino results -#' @param group.by the name of the column in linkage_summary\@subject_meta by which to group subjects for counting. -#' @param linkage a stored linkage from the domino object. Can compare ('tfs', 'rec', 'incoming_lig', 'tfs_rec', 'rec_lig') +#' @param group.by the name of the column in `linkage_summary@subject_meta` by which to group subjects for counting. +#' @param linkage a stored linkage from the domino object. Can compare any of 'tfs', 'rec', 'incoming_lig', 'tfs_rec', or 'rec_lig' #' @param subject_names a vector of subject_names from the linkage_summary to be compared. If NULL, all subject_names in the linkage summary are included in counting. #' @param test_name the statistical test used for comparison. #' \itemize{ #' \item{'fishers.exact'} : Fisher's exact test for the dependence of the proportion of subjects with an active linkage in the cluster on which group the subject belongs to in the group.by variable. Provides an odds ratio, p-value, and a Benjamini-Hochberg FDR-adjusted p-value (p.adj) for each linkage tested. #' } -#' @return a data frame of results from the test of the differential linkages. Rows correspond to each linkage tested. Columns correspond to: +#' @return A data frame of results from the test of the differential linkages. Rows correspond to each linkage tested. Columns correspond to: #' \itemize{ #' \item{'cluster'} : the name of the cell cluster being compared #' \item{'linkage'} : the type of linkage being compared @@ -169,8 +205,8 @@ count_linkage <- function(linkage_summary, cluster, group.by = NULL, linkage = " #' } #' @export #' @examples -#' test_differential_linkages( -#' linkage_summary = dominoSignal:::linkage_sum_tiny, cluster = "C1", group.by = "group", +#' tiny_differential_linkage_c1 <- test_differential_linkages( +#' linkage_summary = mock_linkage_summary(), cluster = "C1", group.by = "group", #' linkage = "rec", test_name = "fishers.exact" #' ) #' diff --git a/R/import_fxns.R b/R/import_fxns.R index bf357c24..ca8b793d 100644 --- a/R/import_fxns.R +++ b/R/import_fxns.R @@ -6,14 +6,14 @@ #' NULL -#' Create a receptor-ligand map from a cellphonedb signaling database +#' Create a receptor - ligand map from a CellPhoneDB signaling database #' #' Generates a data frame of ligand-receptor interactions from a CellPhoneDB database annotating the genes encoding the interacting ligands and receptors to be queried in transcriptomic data. #' -#' @param genes dataframe or file path to table of gene names in uniprot, hgnc_symbol, or ensembl format in cellphonedb database format -#' @param proteins dataframe or file path to table of protein features in cellphonedb format -#' @param interactions dataframe or file path to table of protein-protein interactions in cellphonedb format -#' @param complexes optional: dataframe or file path to table of protein complexes in cellphonedb format +#' @param genes data frame or file path to table of gene names in uniprot, hgnc_symbol, or ensembl format in CellPhoneDB database format +#' @param proteins data frame or file path to table of protein features in CellPhoneDB format +#' @param interactions data frame or file path to table of protein-protein interactions in CellPhoneDB format +#' @param complexes optional: data frame or file path to table of protein complexes in CellPhoneDB format #' @param database_name name of the database being used, stored in output #' @param gene_conv a tuple of (from, to) or (source, target) if gene conversion to orthologs is desired; options are ENSMUSG, ENSG, MGI, or HGNC #' @param gene_conv_host host for conversion; default ensembl, could also use mirrors if desired @@ -22,9 +22,11 @@ NULL #' @return Data frame where each row describes a possible receptor-ligand interaction #' @export create_rl_map_cellphonedb #' @examples -#' rl_map_tiny <- create_rl_map_cellphonedb(genes = dominoSignal:::genes_tiny, -#' proteins = dominoSignal:::proteins_tiny, interactions = dominoSignal:::interactions_tiny, -#' complexes = dominoSignal:::complexes_tiny) +#' data(CellPhoneDB) +#' rl_map_tiny <- create_rl_map_cellphonedb(genes = CellPhoneDB$genes_tiny, +#' proteins = CellPhoneDB$proteins_tiny, +#' interactions = CellPhoneDB$interactions_tiny, +#' complexes =CellPhoneDB$complexes_tiny) #' create_rl_map_cellphonedb <- function( genes, proteins, interactions, complexes = NULL, database_name = "CellPhoneDB", @@ -238,11 +240,12 @@ create_rl_map_cellphonedb <- function( #' #' Generates a list of transcription factors and the genes targeted by the transcription factor as part of their regulon inferred by pySCENIC #' -#' @param regulons Dataframe or file path to the table of the output of the grn (gene regulatory network) function from pySCENIC +#' @param regulons Data frame or file path to the table of the output of the ctx function from pySCENIC #' @return A list where names are transcription factors and the stored values are character vectors of genes in the inferred regulons #' @export create_regulon_list_scenic #' @examples -#' regulon_list_tiny <- create_regulon_list_scenic(regulons = dominoSignal:::regulons_tiny) +#' data(SCENIC) +#' regulon_list_tiny <- create_regulon_list_scenic(regulons = SCENIC$regulons_tiny) #' create_regulon_list_scenic <- function(regulons) { if (is(regulons, "character")) { @@ -293,21 +296,30 @@ create_regulon_list_scenic <- function(regulons) { #' @param tf_variance_quantile What proportion of variable features to take if using variance to threshold features. Default is 0.5. Higher numbers will keep more features. Ignored if tf_selection_method is not 'variable' #' @return A domino object #' @export create_domino -#' @examples -#' pbmc_dom_tiny_all <- create_domino( -#' rl_map = dominoSignal:::rl_map_tiny, features = dominoSignal:::auc_tiny, -#' counts = dominoSignal:::RNA_count_tiny, z_scores = dominoSignal:::RNA_zscore_tiny, -#' clusters = dominoSignal:::clusters_tiny, tf_targets = dominoSignal:::regulon_list_tiny, -#' use_clusters = FALSE, use_complexes = FALSE, +#' @examples +#' example(create_rl_map_cellphonedb, echo = FALSE) +#' example(create_regulon_list_scenic, echo = FALSE) +#' data(SCENIC) +#' data(PBMC) +#' +#' pbmc_dom_tiny <- create_domino( +#' rl_map = rl_map_tiny, features = SCENIC$auc_tiny, +#' counts = PBMC$RNA_count_tiny, z_scores = PBMC$RNA_zscore_tiny, +#' clusters = PBMC$clusters_tiny, tf_targets = regulon_list_tiny, +#' use_clusters = TRUE, use_complexes = TRUE, remove_rec_dropout = FALSE, +#' verbose = FALSE +#' ) +#' +#' pbmc_dom_tiny_no_clusters <- create_domino( +#' rl_map = rl_map_tiny, features = SCENIC$auc_tiny, +#' counts = PBMC$RNA_count_tiny, z_scores =PBMC$RNA_zscore_tiny, +#' clusters = PBMC$clusters_tiny, tf_targets = regulon_list_tiny, +#' use_clusters = FALSE, use_complexes = FALSE, #' rec_min_thresh = 0.1, remove_rec_dropout = TRUE, -#' tf_selection_method = "all") -#' -#' pbmc_dom_tiny_clustered <- create_domino( -#' rl_map = dominoSignal:::rl_map_tiny, features = dominoSignal:::auc_tiny, -#' counts = dominoSignal:::RNA_count_tiny, z_scores = dominoSignal:::RNA_zscore_tiny, -#' clusters = dominoSignal:::clusters_tiny, tf_targets = dominoSignal:::regulon_list_tiny, -#' use_clusters = TRUE, use_complexes = TRUE, remove_rec_dropout = FALSE) -#' +#' tf_selection_method = "all", +#' verbose = FALSE +#' ) +#' create_domino <- function( rl_map, features, counts = NULL, z_scores = NULL, clusters = NULL, use_clusters = TRUE, tf_targets = NULL, verbose = TRUE, @@ -467,7 +479,7 @@ create_domino <- function( # store tf_targets in linkages if they are provided as a list if (!is(tf_targets, "list")) { dom@linkages[["tf_targets"]] <- NULL - warning("tf_targets is not a list. No regulons stored") + message("tf_targets is not a list. No regulons stored") } else { dom@linkages[["tf_targets"]] <- tf_targets } @@ -515,7 +527,10 @@ create_domino <- function( rhorow[rec] <- 0 next } - cor <- stats::cor.test(rec_z_scores, tar_tf_scores, method = "spearman", alternative = "greater") + cor <- stats::cor.test( + rec_z_scores, tar_tf_scores, method = "spearman", + alternative = "greater", exact = FALSE + ) rhorow[rec] <- cor$estimate } if (length(module_rec_targets > 0)) { @@ -574,9 +589,9 @@ create_domino <- function( #' #' @param genes Vector of genes to convert. #' @param from Format of gene input (ENSMUSG, ENSG, MGI, or HGNC) -#' @param to Format of gene output (MGI, or HGNC) +#' @param to Format of gene output (MGI or HGNC) #' @param host Host to connect to. Defaults to https://www.ensembl.org following the useMart default, but can be changed to archived hosts if useMart fails to connect. -#' @return A data frame with input genes as col 1 and output as col 2 +#' @return A data frame with input genes as column 1 and converted genes as column 2 #' @keywords internal #' convert_genes <- function( @@ -633,9 +648,10 @@ convert_genes <- function( #' @return An updated RL signaling data frame #' @export #' @examples +#' example(create_rl_map_cellphonedb, echo = FALSE) #' lr_name <- data.frame("abbrev" = c("L", "R"), "full" = c("Ligand", "Receptor")) -#' rl_map_expanded <- add_rl_column(map = dominoSignal:::rl_map_tiny, map_ref = "type_A", -#' conv = lr_name, new_name = "type_A_full") +#' rl_map_expanded <- add_rl_column(map = rl_map_tiny, map_ref = "type_A", +#' conv = lr_name, new_name = "type_A_full") #' add_rl_column <- function(map, map_ref, conv, new_name) { map_in_ref <- match(map[[map_ref]], conv[, 1]) @@ -662,11 +678,12 @@ add_rl_column <- function(map, map_ref, conv, new_name) { new_map <- data.frame(new_map, stringsAsFactors = FALSE) } -#' Calculate mean ligand expression as a data.frame for plotting in circos plot +#' Calculate mean ligand expression as a data frame for plotting in circos plot #' #' Creates a data frame of mean ligand expression for use in plotting a circos #' plot of ligand expression and saving tables of mean expression. -#' +#'us + #' @param x Gene by cell expression matrix #' @param ligands Character vector of ligand genes to be quantified #' @param cell_ident Vector of cell type (identity) names for which to calculate mean ligand gene expression @@ -675,7 +692,8 @@ add_rl_column <- function(map, map_ref, conv, new_name) { #' @return A data frame of ligand expression targeting the specified receptor #' @export #' @examples -#' counts <- dom_counts(dominoSignal:::pbmc_dom_built_tiny) +#' example(build_domino, echo = FALSE) +#' counts <- dom_counts(pbmc_dom_built_tiny) #' mean_exp <- mean_ligand_expression(counts, #' ligands = c("PTPRC", "FASLG"), cell_ident = "CD14_monocyte", #' cell_barcodes = colnames(counts), destination = "FAS") diff --git a/R/plot_fxns.R b/R/plot_fxns.R index cc0e2257..f23867c5 100644 --- a/R/plot_fxns.R +++ b/R/plot_fxns.R @@ -10,24 +10,26 @@ NULL #' Create a network heatmap #' #' Creates a heatmap of the signaling network. Alternatively, the network -#' matrix can be accessed directly in the signaling slot of a domino object. +#' matrix can be accessed directly in the signaling slot of a domino object using +#' the [dom_signaling()] function. #' -#' @param dom Domino object with network built ([build_domino()]) -#' @param clusts Vector of clusters to be included. If NULL then all clusters are used. -#' @param min_thresh Minimum signaling threshold for plotting. Defaults to -Inf for no threshold. -#' @param max_thresh Maximum signaling threshold for plotting. Defaults to Inf for no threshold. -#' @param scale How to scale the values (after thresholding). Options are 'none', 'sqrt' for square root, or 'log' for log10. -#' @param normalize Options to normalize the matrix. Normalization is done after thresholding and scaling. Accepted inputs are 'none' for no normalization, 'rec_norm' to normalize to the maximum value with each receptor cluster, or 'lig_norm' to normalize to the maximum value within each ligand cluster -#' @param ... Other parameters to pass to [ComplexHeatmap::Heatmap()] -#' @return a Heatmap rendered to the active graphics device +#' @param dom domino object with network built ([build_domino()]) +#' @param clusts vector of clusters to be included. If NULL then all clusters are used. +#' @param min_thresh minimum signaling threshold for plotting. Defaults to -Inf for no threshold. +#' @param max_thresh maximum signaling threshold for plotting. Defaults to Inf for no threshold. +#' @param scale how to scale the values (after thresholding). Options are 'none', 'sqrt' for square root, or 'log' for log10. +#' @param normalize options to normalize the matrix. Normalization is done after thresholding and scaling. Accepted inputs are 'none' for no normalization, 'rec_norm' to normalize to the maximum value with each receptor cluster, or 'lig_norm' to normalize to the maximum value within each ligand cluster +#' @param ... other parameters to pass to [ComplexHeatmap::Heatmap()] +#' @return A heatmap rendered to the active graphics device #' @export signaling_heatmap #' @examples +#' example(build_domino, echo = FALSE) #' #basic usage -#' signaling_heatmap(dominoSignal:::pbmc_dom_built_tiny) +#' signaling_heatmap(pbmc_dom_built_tiny) #' #scale -#' signaling_heatmap(dominoSignal:::pbmc_dom_built_tiny, scale = "sqrt") +#' signaling_heatmap(pbmc_dom_built_tiny, scale = "sqrt") #' #normalize -#' signaling_heatmap(dominoSignal:::pbmc_dom_built_tiny, normalize = "rec_norm") +#' signaling_heatmap(pbmc_dom_built_tiny, normalize = "rec_norm") #' signaling_heatmap <- function( dom, clusts = NULL, min_thresh = -Inf, max_thresh = Inf, scale = "none", @@ -92,8 +94,9 @@ signaling_heatmap <- function( #' @return a Heatmap rendered to the active graphics device #' @export incoming_signaling_heatmap #' @examples +#' example(build_domino, echo = FALSE) #' #incoming signaling of the CD8 T cells -#' incoming_signaling_heatmap(dominoSignal:::pbmc_dom_built_tiny, "CD8_T_cell") +#' incoming_signaling_heatmap(pbmc_dom_built_tiny, "CD8_T_cell") #' incoming_signaling_heatmap <- function( dom, rec_clust, clusts = NULL, min_thresh = -Inf, max_thresh = Inf, @@ -172,33 +175,34 @@ incoming_signaling_heatmap <- function( #' #' Creates a network diagram of signaling between clusters. Nodes are clusters #' and directed edges indicate signaling from one cluster to another. Edges are -#' colored based on the color scheme of the ligand expressing cluster. +#' colored based on the color scheme of the ligand expressing cluster #' -#' @param dom Domino object with network built ([build_domino()]) -#' @param cols Named vector indicating the colors for clusters. Values are colors and names must match clusters in the domino object. If left as NULL then ggplot colors are generated for the clusters. -#' @param edge_weight Weight for determining thickness of edges on plot. Signaling values are multiplied by this value. -#' @param clusts Vector of clusters to be included in the network plot. -#' @param showOutgoingSignalingClusts Vector of clusters to plot the outgoing signaling from -#' @param showIncomingSignalingClusts Vector of clusters to plot the incoming signaling on -#' @param min_thresh Minimum signaling threshold. Values lower than the threshold will be set to the threshold. Defaults to -Inf for no threshold. -#' @param max_thresh Maximum signaling threshold for plotting. Values higher than the threshold will be set to the threshold. Defaults to Inf for no threshold. -#' @param normalize Options to normalize the signaling matrix. Accepted inputs are 'none' for no normalization, 'rec_norm' to normalize to the maximum value with each receptor cluster, or 'lig_norm' to normalize to the maximum value within each ligand cluster -#' @param scale How to scale the values (after thresholding). Options are 'none', 'sqrt' for square root, 'log' for log10, or 'sq' for square. -#' @param layout Type of layout to use. Options are 'random', 'sphere', 'circle', 'fr' for Fruchterman-Reingold force directed layout, and 'kk' for Kamada Kawai for directed layout. -#' @param scale_by How to size vertices. Options are 'lig_sig' for summed outgoing signaling, 'rec_sig' for summed incoming signaling, and 'none'. In the former two cases the values are scaled with asinh after summing all incoming or outgoing signaling. -#' @param vert_scale Integer used to scale size of vertices with our without variable scaling from size_verts_by. -#' @param plot_title Text for the plot's title. -#' @param ... Other parameters to be passed to plot when used with an igraph object. -#' @return an igraph rendered to the active graphics device +#' @param dom a domino object with network built ([build_domino()]) +#' @param cols named vector indicating the colors for clusters. Values are colors and names must match clusters in the domino object. If left as NULL then ggplot colors are generated for the clusters +#' @param edge_weight weight for determining thickness of edges on plot. Signaling values are multiplied by this value +#' @param clusts vector of clusters to be included in the network plot +#' @param showOutgoingSignalingClusts vector of clusters to plot the outgoing signaling from +#' @param showIncomingSignalingClusts vector of clusters to plot the incoming signaling on +#' @param min_thresh minimum signaling threshold. Values lower than the threshold will be set to the threshold. Defaults to -Inf for no threshold +#' @param max_thresh maximum signaling threshold for plotting. Values higher than the threshold will be set to the threshold. Defaults to Inf for no threshold +#' @param normalize options to normalize the signaling matrix. Accepted inputs are 'none' for no normalization, 'rec_norm' to normalize to the maximum value with each receptor cluster, or 'lig_norm' to normalize to the maximum value within each ligand cluster +#' @param scale how to scale the values (after thresholding). Options are 'none', 'sqrt' for square root, 'log' for log10, or 'sq' for square +#' @param layout type of layout to use. Options are 'random', 'sphere', 'circle', 'fr' for Fruchterman-Reingold force directed layout, and 'kk' for Kamada Kawai for directed layout +#' @param scale_by how to size vertices. Options are 'lig_sig' for summed outgoing signaling, 'rec_sig' for summed incoming signaling, and 'none'. In the former two cases the values are scaled with asinh after summing all incoming or outgoing signaling +#' @param vert_scale integer used to scale size of vertices with our without variable scaling from size_verts_by. +#' @param plot_title text for the plot's title. +#' @param ... other parameters to be passed to plot when used with an igraph object. +#' @return An igraph plot rendered to the active graphics device #' @export signaling_network #' @examples +#' example(build_domino, echo = FALSE) #' #basic usage -#' signaling_network(dominoSignal:::pbmc_dom_built_tiny) +#' signaling_network(pbmc_dom_built_tiny, edge_weight = 2) #' # scaling, thresholds, layouts, selecting clusters #' signaling_network( -#' dominoSignal:::pbmc_dom_built_tiny, showOutgoingSignalingClusts = "CD14_monocyte", +#' pbmc_dom_built_tiny, showOutgoingSignalingClusts = "CD14_monocyte", #' scale = "none", norm = "none", layout = "fr", scale_by = "none", -#' vert_scale = 5) +#' vert_scale = 5, edge_weight = 2) #' signaling_network <- function( dom, cols = NULL, edge_weight = 0.3, clusts = NULL, showOutgoingSignalingClusts = NULL, @@ -337,13 +341,14 @@ signaling_network <- function( #' @param cols Named vector of colors for individual genes. Genes not included in this vector will be colored according to class_cols. #' @param lig_scale FALSE or a numeric value to scale the size of ligand vertices based on z-scored expression in the data set. #' @param layout Type of layout to use. Options are 'grid', 'random', 'sphere', 'circle', 'fr' for Fruchterman-Reingold force directed layout, and 'kk' for Kamada Kawai for directed layout. -#' @param ... Other parameters to pass to plot() with an [igraph] object. See [igraph] manual for options. -#' @return an igraph rendered to the active graphics device +#' @param ... Other parameters to pass to plot() with an [igraph](https://r.igraph.org/) object. See [igraph](https://r.igraph.org/) manual for options. +#' @return An igraph plot rendered to the active graphics device #' @export gene_network #' @examples #' #basic usage +#' example(build_domino, echo = FALSE) #' gene_network( -#' dominoSignal:::pbmc_dom_built_tiny, clust = "CD8_T_cell", +#' pbmc_dom_built_tiny, clust = "CD8_T_cell", #' OutgoingSignalingClust = "CD14_monocyte") #' gene_network <- function(dom, clust = NULL, OutgoingSignalingClust = NULL, @@ -486,8 +491,7 @@ gene_network <- function(dom, clust = NULL, OutgoingSignalingClust = NULL, #' Create a heatmap of features organized by cluster #' -#' Creates a heatmap of feature expression (typically transcription factor -#' activation scores) by cells organized by cluster. +#' Creates a heatmap of transcription factor activation scores by cells grouped by cluster. #' #' @param dom Domino object with network built ([build_domino()]) #' @param bool Boolean indicating whether the heatmap should be continuous or boolean. If boolean then bool_thresh will be used to determine how to define activity as positive or negative. @@ -495,20 +499,21 @@ gene_network <- function(dom, clust = NULL, OutgoingSignalingClust = NULL, #' @param title Either a string to use as the title or a boolean describing whether to include a title. In order to pass the 'main' parameter to [ComplexHeatmap::Heatmap()] you must set title to FALSE. #' @param norm Boolean indicating whether or not to normalize the transcrption factors to their max value. #' @param feats Either a vector of features to include in the heatmap or 'all' for all features. If left NULL then the features selected for the signaling network will be shown. -#' @param ann_cols Boolean indicating whether to include cell cluster as a column annotation. Colors can be defined with cols. If FALSE then custom annotations can be passed to NMF. +#' @param ann_cols Boolean indicating whether to include cell cluster as a column annotation. Colors can be defined with cols. If FALSE then custom annotations can be passed to [ComplexHeatmap::Heatmap()]. #' @param cols Named vector of colors to annotate cells by cluster color. Values are taken as colors and names as cluster. If left as NULL then default ggplot colors will be generated. #' @param min_thresh Minimum threshold for color scaling if not a boolean heatmap #' @param max_thresh Maximum threshold for color scaling if not a boolean heatmap #' @param ... Other parameters to pass to [ComplexHeatmap::Heatmap()] . Note that to use the 'main' parameter of [ComplexHeatmap::Heatmap()] you must set title = FALSE and to use 'annCol' or 'annColors' ann_cols must be FALSE. -#' @return a Heatmap rendered to the active graphics device +#' @return A heatmap rendered to the active graphics device #' @export feat_heatmap #' @examples #' #basic usage -#' feat_heatmap(dominoSignal:::pbmc_dom_built_tiny) +#' example(build_domino, echo = FALSE) +#' feat_heatmap(pbmc_dom_built_tiny) #' #using thresholds #' feat_heatmap( -#' dominoSignal:::pbmc_dom_built_tiny, min_thresh = 0.1, -#' max_thresh = 0.6, norm = TRUE, bool = FALSE) +#' pbmc_dom_built_tiny, min_thresh = 0.1, +#' max_thresh = 0.6, norm = TRUE, bool = FALSE) #' feat_heatmap <- function( dom, feats = NULL, bool = FALSE, bool_thresh = 0.2, title = TRUE, norm = FALSE, @@ -624,15 +629,16 @@ feat_heatmap <- function( #' @param recs Either a vector of receptors to include in the heatmap or 'all' for all receptors. If left NULL then the receptors selected in the signaling network connected to the features plotted will be shown. #' @param mark_connections Boolean indicating whether to add an 'x' in cells where there is a connected receptor or TF. Default FALSE. #' @param ... Other parameters to pass to [ComplexHeatmap::Heatmap()] . Note that to use the 'main' parameter of [ComplexHeatmap::Heatmap()] you must set title = FALSE and to use 'annCol' or 'annColors' ann_cols must be FALSE. -#' @return a Heatmap rendered to the active graphics device +#' @return A heatmap rendered to the active graphics device #' @export cor_heatmap #' @examples +#' example(build_domino, echo = FALSE) #' #basic usage -#' cor_heatmap(dominoSignal:::pbmc_dom_built_tiny, title = "PBMC R-TF Correlations") +#' cor_heatmap(pbmc_dom_built_tiny, title = "PBMC R-TF Correlations") #' #show correlations above a specific value -#' cor_heatmap(dominoSignal:::pbmc_dom_built_tiny, bool = TRUE, bool_thresh = 0.25) +#' cor_heatmap(pbmc_dom_built_tiny, bool = TRUE, bool_thresh = 0.1) #' #identify combinations that are connected -#' cor_heatmap(dominoSignal:::pbmc_dom_built_tiny, bool = FALSE, mark_connections = TRUE) +#' cor_heatmap(pbmc_dom_built_tiny, bool = FALSE, mark_connections = TRUE) #' cor_heatmap <- function( dom, bool = FALSE, bool_thresh = 0.15, title = TRUE, feats = NULL, recs = NULL, @@ -711,19 +717,20 @@ cor_heatmap <- function( } } -#' Create a correlation plot between transcription factor activation score and receptor +#' Create a correlation plot between TF and receptor #' -#' Create a correlation plot between transcription factor activation score and receptor +#' Create a correlation plot between transcription factor activation score and receptor expression #' #' @param dom Domino object with network built ([build_domino()]) -#' @param tf Target TF module for plotting with receptor -#' @param rec Target receptor for plotting with TF -#' @param remove_rec_dropout Whether to remove cells with zero expression for plot. This should match the same setting as in build_domino. -#' @param ... Other parameters to pass to ggscatter. -#' @return a ggplot object +#' @param tf Target TF for plottting AUC score +#' @param rec Target receptor for plotting expression +#' @param remove_rec_dropout Whether to remove cells with zero expression for plot. This should match the same setting as in [build_domino()]. +#' @param ... Other parameters to pass to [ggpubr::ggscatter()]. +#' @return A ggplot scatter plot rendered in the active graphics device #' @export cor_scatter #' @examples -#' cor_scatter(dominoSignal:::pbmc_dom_built_tiny, "FLI1","CXCR3") +#' example(build_domino, echo = FALSE) +#' cor_scatter(pbmc_dom_built_tiny, "FLI1","CXCR3") #' cor_scatter <- function(dom, tf, rec, remove_rec_dropout = TRUE, ...) { if (remove_rec_dropout) { @@ -749,15 +756,16 @@ cor_scatter <- function(dom, tf, rec, remove_rec_dropout = TRUE, ...) { #' @param ligand_expression_threshold Minimum mean expression value of a ligand by a cell type for a chord to be rendered between the cell type and the receptor #' @param cell_idents Vector of cell types from cluster assignments in the domino object to be included in the plot. #' @param cell_colors Named vector of color names or hex codes where names correspond to the plotted cell types and the color values -#' @return renders a circos plot to the active graphics device +#' @return Renders a circos plot to the active graphics device #' @export circos_ligand_receptor #' @examples +#' example(build_domino, echo = FALSE) #' #basic usage -#' circos_ligand_receptor(dominoSignal:::pbmc_dom_built_tiny, receptor = "CXCR3") +#' circos_ligand_receptor(pbmc_dom_built_tiny, receptor = "CXCR3") #' #specify colors -#' cols = c("red", "orange", "green", "blue", "pink", "purple", "slategrey", "firebrick", "hotpink") -#' names(cols) = levels(dom_clusters(dominoSignal:::pbmc_dom_built_tiny)) -#' circos_ligand_receptor(dominoSignal:::pbmc_dom_built_tiny, receptor = "CXCR3", cell_colors = cols) +#' cols = c("red", "orange", "green") +#' names(cols) = dom_clusters(pbmc_dom_built_tiny) +#' circos_ligand_receptor(pbmc_dom_built_tiny, receptor = "CXCR3", cell_colors = cols) #' circos_ligand_receptor <- function( dom, receptor, ligand_expression_threshold = 0.01, cell_idents = NULL, @@ -882,16 +890,18 @@ circos_ligand_receptor <- function( #' #' Plot differential linkages among domino results ranked by a comparative statistic #' -#' @param differential_linkages a data.frame output from the test_differential_linkages function +#' @param differential_linkages a data frame output from the [test_differential_linkages()] function #' @param test_statistic column name of differential_linkages where the test statistic used for ranking linkages is stored (ex. 'p.value') #' @param stat_range a two value vector of the minimum and maximum values of test_statistic for plotting linkage features #' @param stat_ranking 'ascending' (lowest value of test statisic is colored red and plotted at the top) or 'descending' (highest value of test statistic is colored red and plotted at the top). #' @param group_palette a named vector of colors to use for each group being compared -#' @return a Heatmap-class object of features ranked by test_statistic annotated with the proportion of subjects that showed active linkage of the features. +#' @return A heatmap-class object of features ranked by test_statistic annotated with the proportion of subjects that showed active linkage of the features. #' @export #' @examples +#' example(build_domino, echo = FALSE) +#' example(test_differential_linkages, echo = FALSE) #' plot_differential_linkages( -#' differential_linkages = dominoSignal:::tiny_differential_linkage_c1, +#' differential_linkages = tiny_differential_linkage_c1, #' test_statistic = "p.value", #' stat_ranking = "ascending" #' ) @@ -975,7 +985,7 @@ plot_differential_linkages <- function( #' Normalizes a matrix to its max value by row or column #' #' @param mat Matrix to be normalized -#' @param dir Direction to normalize the matrix c('row', 'col') +#' @param dir Direction to normalize the matrix (either "row" for row or "col" for column) #' @return A normalized matrix in the direction specified. #' @keywords internal #' diff --git a/R/processing_fxns.R b/R/processing_fxns.R index c68532bb..ed7c041a 100644 --- a/R/processing_fxns.R +++ b/R/processing_fxns.R @@ -4,18 +4,21 @@ #' preprocessed from create_domino and returns a domino object prepared for #' plotting with the various plotting functions in this package. #' -#' @param dom Domino object from create_domino. +#' @param dom Domino object from [create_domino()]. #' @param max_tf_per_clust Maximum number of transcription factors called active in a cluster. #' @param min_tf_pval Minimum p-value from differential feature score test to call a transcription factor active in a cluster. #' @param max_rec_per_tf Maximum number of receptors to link to each transcription factor. -#' @param rec_tf_cor_threshold Minimum pearson correlation used to consider a receptor linked with a transcription factor. Increasing this will decrease the number of receptors linked to each transcription factor. +#' @param rec_tf_cor_threshold Minimum Spearman correlation used to consider a receptor linked with a transcription factor. Increasing this will decrease the number of receptors linked to each transcription factor. #' @param min_rec_percentage Minimum percentage of cells in cluster expressing a receptor for the receptor to be linked to trancription factors in that cluster. #' @return A domino object with a signaling network built #' @export #' @examples -#' pbmc_dom_tiny_built <- build_domino( -#' dom = dominoSignal:::pbmc_dom_tiny, min_tf_pval = .001, max_tf_per_clust = 25, -#' max_rec_per_tf = 25, rec_tf_cor_threshold = .25, min_rec_percentage = 0.1 +#' example(create_domino, echo = FALSE) +#' +#' #a relaxed example +#' pbmc_dom_built_tiny <- build_domino( +#' dom = pbmc_dom_tiny, min_tf_pval = .05, max_tf_per_clust = Inf, +#' max_rec_per_tf = Inf, rec_tf_cor_threshold = .1, min_rec_percentage = 0.01 #' ) #' build_domino <- function( diff --git a/R/sysdata.rda b/R/sysdata.rda index af3c611d..33274639 100644 Binary files a/R/sysdata.rda and b/R/sysdata.rda differ diff --git a/R/utils.R b/R/utils.R index 1fdc88a5..3f7be0a8 100644 --- a/R/utils.R +++ b/R/utils.R @@ -2,15 +2,16 @@ #' #' A function to pull database information from a domino object #' -#' @param dom A domino object that has been created -#' @param name_only A boolean for whether to return only the name of the database used +#' @param dom a domino object that has been created +#' @param name_only a boolean for whether to return only the name of the database used #' or the entire database that is stored. Default TRUE. #' @return A vector of unique databases used in building the domino object OR #' a data frame that includes the database information used in the domino object creation #' @export #' @examples -#' database_name <- dom_database(dominoSignal:::pbmc_dom_built_tiny) -#' full_database <- dom_database(dominoSignal:::pbmc_dom_built_tiny, name_only = FALSE) +#' example(build_domino, echo = FALSE) +#' database_name <- dom_database(pbmc_dom_built_tiny) +#' full_database <- dom_database(pbmc_dom_built_tiny, name_only = FALSE) #' dom_database <- function(dom, name_only = TRUE) { db <- slot(dom, "db_info") @@ -25,11 +26,12 @@ dom_database <- function(dom, name_only = TRUE) { #' #' A function to pull z-scored expression from a domino object #' -#' @param dom A domino object that has been created with [create_domino()] +#' @param dom a domino object that has been created with [create_domino()] #' @return A matrix containing the z-scored gene expression values for each gene (row) by cell (column) #' @export #' @examples -#' zscores <- dom_zscores(dominoSignal:::pbmc_dom_built_tiny) +#' example(build_domino, echo = FALSE) +#' zscores <- dom_zscores(pbmc_dom_built_tiny) #' dom_zscores <- function(dom) { slot(dom, "z_scores") @@ -39,11 +41,12 @@ dom_zscores <- function(dom) { #' #' A function to pull gene expression from a domino object #' -#' @param dom A domino object that has been created with [create_domino()] +#' @param dom a domino object that has been created with [create_domino()] #' @return A matrix containing the gene expression values for each gene (row) by cell (column) #' @export #' @examples -#' counts <- dom_counts(dominoSignal:::pbmc_dom_built_tiny) +#' example(build_domino, echo = FALSE) +#' counts <- dom_counts(pbmc_dom_built_tiny) #' dom_counts <- function(dom) { as.matrix(slot(dom, "counts")) @@ -53,13 +56,14 @@ dom_counts <- function(dom) { #' #' A function to pull cluster information from a domino object #' -#' @param dom A domino object that has been created with [create_domino()] -#' @param labels A boolean for whether to return the cluster labels for each cell or the clusters used for inferring communication +#' @param dom a domino object that has been created with [create_domino()] +#' @param labels a boolean for whether to return the cluster labels for each cell or the clusters used for inferring communication #' @return A vector containing either the names of the clusters used OR factors of the cluster label for each individual cell #' @export #' @examples -#' cluster_names <- dom_clusters(dominoSignal:::pbmc_dom_built_tiny) -#' cell_cluster_label <- dom_clusters(dominoSignal:::pbmc_dom_built_tiny, labels = TRUE) +#' example(build_domino, echo = FALSE) +#' cluster_names <- dom_clusters(pbmc_dom_built_tiny) +#' cell_cluster_label <- dom_clusters(pbmc_dom_built_tiny, labels = TRUE) #' dom_clusters <- function(dom, labels = FALSE) { clust <- slot(dom, "clusters") @@ -74,11 +78,12 @@ dom_clusters <- function(dom, labels = FALSE) { #' #' A function to pull transcription factor activation scores from a domino object #' -#' @param dom A domino object that has been created with [create_domino()] -#' @return A matrix containing the transcription factor activation scores for each feature (row) by cell (column) +#' @param dom a domino object that has been created with [create_domino()] +#' @return A matrix containing the transcription factor activation scores for each TF (row) by cell (column) #' @export #' @examples -#' tf_activation <- dom_tf_activation(dominoSignal:::pbmc_dom_built_tiny) +#' example(build_domino, echo = FALSE) +#' tf_activation <- dom_tf_activation(pbmc_dom_built_tiny) #' dom_tf_activation <- function(dom) { slot(dom, "features") @@ -88,12 +93,13 @@ dom_tf_activation <- function(dom) { #' #' A function to pull receptor-transcription factor correlations from a domino object #' -#' @param dom A domino object that has been created with [create_domino()] -#' @param type Either "rl" or "complex", to select between the receptor-ligand or complex correlation matrix +#' @param dom a domino object that has been created with [create_domino()] +#' @param type either "rl" or "complex", to select between the receptor-ligand or complex correlation matrix #' @return A matrix containing the correlation values for each receptor (row) by transcription factor (column) #' @export #' @examples -#' cor_matrix <- dom_correlations(dominoSignal:::pbmc_dom_built_tiny, "rl") +#' example(build_domino, echo = FALSE) +#' cor_matrix <- dom_correlations(pbmc_dom_built_tiny, "rl") #' dom_correlations <- function(dom, type = "rl") { if (type == "complex") { @@ -111,16 +117,17 @@ dom_correlations <- function(dom, type = "rl") { #' #' A function to pull linkages from a domino object #' -#' @param dom A domino object that has been created with [create_domino()] -#' @param link_type One value (out of "complexes", "receptor-ligand", +#' @param dom a domino object that has been created with [create_domino()] +#' @param link_type one value (out of "complexes", "receptor-ligand", #' "tf-target", "tf-receptor", "receptor", "incoming-ligand") used #' to select the desired type of linkage -#' @param by_cluster A boolean to indicate whether the linkages should be returned overall or by cluster +#' @param by_cluster a boolean to indicate whether the linkages should be returned overall or by cluster #' @return A list containing linkages between some combination of receptors, ligands, transcription factors, and clusters #' @export #' @examples -#' complexes <- dom_linkages(dominoSignal:::pbmc_dom_built_tiny, "complexes") -#' tf_rec_by_cluster <- dom_linkages(dominoSignal:::pbmc_dom_built_tiny, "tf-receptor", TRUE) +#' example(build_domino, echo = FALSE) +#' complexes <- dom_linkages(pbmc_dom_built_tiny, "complexes") +#' tf_rec_by_cluster <- dom_linkages(pbmc_dom_built_tiny, "tf-receptor", TRUE) #' dom_linkages <- function(dom, link_type = c( "complexes", "receptor-ligand", @@ -156,13 +163,14 @@ dom_linkages <- function(dom, link_type = c( #' #' A function to pull signaling matrices from a domino object #' -#' @param dom A domino object that has been created with [create_domino()] -#' @param cluster Either NULL to indicate global signaling or a specific cluster for which a -#' @return A data.frame containing the signaling score through each ligand (row) by each cluster (column) OR -#' a data.frame containing the global summed signaling scores between receptors (rows) and ligands (columns) of each cluster +#' @param dom a domino object that has been created with [create_domino()] +#' @param cluster either NULL to indicate global signaling or a specific cluster for which a signaling matrix is desired +#' @return A data frame containing the signaling score through each ligand (row) by each cluster (column) OR +#' a data frame containing the global summed signaling scores between receptors (rows) and ligands (columns) of each cluster #' @export #' @examples -#' monocyte_signaling <- dom_signaling(dominoSignal:::pbmc_dom_built_tiny, cluster = "CD14_monocyte") +#' example(build_domino, echo = FALSE) +#' monocyte_signaling <- dom_signaling(pbmc_dom_built_tiny, cluster = "CD14_monocyte") #' dom_signaling <- function(dom, cluster = NULL) { if (is.null(cluster)) { @@ -174,13 +182,14 @@ dom_signaling <- function(dom, cluster = NULL) { #' Access differential expression #' -#' A function to pull differential expression p values from a domino object +#' A function to pull differential expression p-values from a domino object #' -#' @param dom A domino object that has been created with [create_domino()] -#' @return A matrix containing the p values for differential expression of transcription factors (rows) in each cluster (columns) +#' @param dom a domino object that has been created with [create_domino()] +#' @return A matrix containing the p-values for differential expression of transcription factors (rows) in each cluster (columns) #' @export #' @examples -#' de_mat <- dom_de(dominoSignal:::pbmc_dom_built_tiny) +#' example(build_domino, echo = FALSE) +#' de_mat <- dom_de(pbmc_dom_built_tiny) #' dom_de <- function(dom) { slot(dom, "clust_de") @@ -190,12 +199,13 @@ dom_de <- function(dom) { #' #' A function to pull the parameters used when running [build_domino()] from a domino object #' -#' @param dom A domino object that has been created with [create_domino()] -#' @return A list containing booleans for whether the object has been created and build, and a list of the +#' @param dom a domino object that has been created with [create_domino()] +#' @return A list containing booleans for whether the object has been created and built and a list of the #' build parameters that were used in [build_domino()] to infer the signaling network #' @export #' @examples -#' build_details <- dom_info(dominoSignal:::pbmc_dom_built_tiny) +#' example(build_domino, echo = FALSE) +#' build_details <- dom_info(pbmc_dom_built_tiny) #' dom_info <- function(dom) { info <- slot(dom, "misc") @@ -212,14 +222,15 @@ dom_info <- function(dom) { #' comparing signaling networks across two separate conditions. In order to run #' this [build_domino()] must be run on the object previously. #' -#' @param dom Domino object containing a signaling network (i.e. [build_domino()] was run) -#' @param return String indicating where to collate "features", "receptors", or "ligands". If "all" then a list of all three will be returned. -#' @param clusters Vector indicating clusters to collate network items from. If left as NULL then all clusters will be included. +#' @param dom a domino object containing a signaling network (i.e. [build_domino()] was run) +#' @param return string indicating whether to collate "features", "receptors", or "ligands". If "all" then a list of all three will be returned. +#' @param clusters vector indicating clusters to collate network items from. If left as NULL then all clusters will be included. #' @return A vector containing all features, receptors, or ligands in the data set or a list containing all three. #' @export #' @examples -#' monocyte_receptors <- dom_network_items(dominoSignal:::pbmc_dom_built_tiny, "CD14_monocyte", "receptors") -#' all_tfs <- dom_network_items(dominoSignal:::pbmc_dom_built_tiny, return = "features") +#' example(build_domino, echo = FALSE) +#' monocyte_receptors <- dom_network_items(pbmc_dom_built_tiny, "CD14_monocyte", "receptors") +#' all_tfs <- dom_network_items(pbmc_dom_built_tiny, return = "features") #' dom_network_items <- function(dom, clusters = NULL, return = NULL) { if (!dom@misc[["build"]]) { @@ -261,18 +272,18 @@ dom_network_items <- function(dom, clusters = NULL, return = NULL) { #' Check input arguments #' -#' Accepts an object and rules for checking, stops if rules not met. +#' Accepts an object and rules to check against; stops if requirements are not met #' -#' @param arg The argument to check -#' @param allow_class Vector of allowed classes -#' @param allow_len Vector of allowed lengths +#' @param arg the argument to check +#' @param allow_class vector of allowed classes +#' @param allow_len vector of allowed lengths #' @param allow_range range of minimum and maximum values i.e. c(1, 5) -#' @param allow_values Vector of allowed values -#' @param need_vars Vector of required variables -#' @param need_colnames Logical for whether colnames are required -#' @param need_rownames Logical for whether rownames are required -#' @param need_names Logical for whether names are required -#' @return logical +#' @param allow_values vector of allowed values +#' @param need_vars vector of required variables +#' @param need_colnames vogical for whether colnames are required +#' @param need_rownames logical for whether rownames are required +#' @param need_names logical for whether names are required +#' @return Logical indicating whether the argument meets the requirements #' @keywords internal #' check_arg <- function(arg, allow_class = NULL, allow_len = NULL, @@ -336,10 +347,10 @@ check_arg <- function(arg, allow_class = NULL, allow_len = NULL, } -#' Read in data if an object looks like path to it. +#' Read in data if an object looks like path to it #' -#' @param obj Object to read if not already object -#' @return obj Object itself in case its not a character +#' @param obj object to read if not already object +#' @return Object itself or data read in from path #' @keywords internal read_if_char <- function(obj) { if (is(obj, "character")) { @@ -351,8 +362,8 @@ read_if_char <- function(obj) { #' Change cases of True/False syntax from Python to TRUE/FALSE R syntax #' -#' @param obj Object that will be converted -#' @return obj The converted object +#' @param obj object that will be converted +#' @return The converted object #' @keywords internal conv_py_bools <- function(obj) { for (x in colnames(obj)) { @@ -364,3 +375,118 @@ conv_py_bools <- function(obj) { return(obj) } + +#' Create a mock linkage summary object +#' +#' @return obj a linkage summary object +#' @export +mock_linkage_summary <- function() { + linkage_sum_tiny <- new("linkage_summary", + subject_meta = data.frame( + "subject_names" = paste0("P",1:6), + "group" = c(rep("G1", 3), rep("G2", 3)) + ), + subject_names = factor( + paste0("P",1:6), levels = paste0("P",1:6) + ), + subject_linkages = list( + "P1" = list( + "C1" = list( + "tfs" = c("TF1", "TF2", "TF3", "TF4"), + "rec" = c("R1", "R2", "R3", "R4"), + "incoming_lig" = c("L1", "L2", "L3", "L4"), + "tfs_rec" = c("TF1 <- R1", "TF2 <- R2", "TF3 <- R3", "TF4 <- R4"), + "rec_lig" = c("R1 <- L1", "R2 <- L2", "R3 <- L3", "R4 <- L4") + ), + "C2" = list( + "tfs" = c("TF2", "TF3", "TF4"), + "rec" = c("R2", "R3", "R4"), + "incoming_lig" = c("L2", "L3", "L4"), + "tfs_rec" = c("TF2 <- R2", "TF3 <- R3", "TF4 <- R4"), + "rec_lig" = c("R2 <- L2", "R3 <- L3", "R4 <- L4") + ) + ), + "P2" = list( + "C1" = list( + "tfs" = c("TF1", "TF2"), + "rec" = c("R1", "R2"), + "incoming_lig" = c("L1", "L2"), + "tfs_rec" = c("TF1 <- R1", "TF2 <- R2"), + "rec_lig" = c("R1 <- L1", "R2 <- L2") + ), + "C2" = list( + "tfs" = c("TF3", "TF4"), + "rec" = c("R3", "R4"), + "incoming_lig" = c("L3", "L4"), + "tfs_rec" = c("TF3 <- R3", "TF4 <- R4"), + "rec_lig" = c("R3 <- L3", "R4 <- L4") + ) + ), + "P3" = list( + "C1" = list( + "tfs" = c("TF1", "TF2"), + "rec" = c("R1", "R2"), + "incoming_lig" = c("L1", "L2"), + "tfs_rec" = c("TF1 <- R1", "TF2 <- R2"), + "rec_lig" = c("R1 <- L1", "R2 <- L2") + ), + "C2" = list( + "tfs" = c("TF3"), + "rec" = c("R3"), + "incoming_lig" = c("L3"), + "tfs_rec" = c("TF3 <- R3"), + "rec_lig" = c("R3 <- L3") + ) + ), + "P4" = list( + "C1" = list( + "tfs" = c("TF2", "TF3", "TF4"), + "rec" = c("R2", "R3", "R4"), + "incoming_lig" = c("L2", "L3", "L4"), + "tfs_rec" = c("TF2 <- R2", "TF3 <- R3", "TF4 <- R4"), + "rec_lig" = c("R2 <- L2", "R3 <- L3", "R4 <- L4") + ), + "C2" = list( + "tfs" = c("TF1", "TF2", "TF3", "TF4"), + "rec" = c("R1", "R2", "R3", "R4"), + "incoming_lig" = c("L1", "L2", "L3", "L4"), + "tfs_rec" = c("TF1 <- R1", "TF2 <- R2", "TF3 <- R3", "TF4 <- R4"), + "rec_lig" = c("R1 <- L1", "R2 <- L2", "R3 <- L3", "R4 <- L4") + ) + ), + "P5" = list( + "C1" = list( + "tfs" = c("TF3"), + "rec" = c("R3"), + "incoming_lig" = c("L3"), + "tfs_rec" = c("TF3 <- R3"), + "rec_lig" = c("R3 <- L3") + ), + "C2" = list( + "tfs" = c("TF1", "TF2", "TF3", "TF4"), + "rec" = c("R1", "R2", "R3", "R4"), + "incoming_lig" = c("L1", "L2", "L3", "L4"), + "tfs_rec" = c("TF1 <- R1", "TF2 <- R2", "TF3 <- R3", "TF4 <- R4"), + "rec_lig" = c("R1 <- L1", "R2 <- L2", "R3 <- L3", "R4 <- L4") + ) + ), + "P6" = list( + "C1" = list( + "tfs" = c(), + "rec" = c(), + "incoming_lig" = c(), + "tfs_rec" = c(), + "rec_lig" = c() + ), + "C2" = list( + "tfs" = c("TF1", "TF2", "TF3"), + "rec" = c("R1", "R2", "R3"), + "incoming_lig" = c("L1", "L2", "L3"), + "tfs_rec" = c("TF1 <- R1", "TF2 <- R2", "TF3 <- R3"), + "rec_lig" = c("R1 <- L1", "R2 <- L2", "R3 <- L3") + ) + ) + ) +) +return(linkage_sum_tiny) +} diff --git a/README.md b/README.md index 61e64e5b..3493e960 100644 --- a/README.md +++ b/README.md @@ -6,9 +6,7 @@ dominoSignal is an updated version of the original [domino](https://github.com/E ### Installation -dominoSignal is undergoing active development where aspects of how data is used, analyzed, and interpreted is subject to change as new features and fixes are implemented. **v0.99.1** of dominoSignal serves as the first stable development version during these active updates for reproducible usage. - -dominoSignal is the continuation of Domino software hosted on the [Elisseeff-Lab GitHub](https://github.com/Elisseeff-Lab/domino). The most up to date stable version is on the [FertigLab GitHub](https://github.com/FertigLab). This version of dominoSignal can be installed using the remotes package. +dominoSignal is the continuation of Domino software hosted on the [Elisseeff-Lab GitHub](https://github.com/Elisseeff-Lab/domino). dominoSignal is undergoing active development where aspects of how data is used, analyzed, and interpreted is subject to change as new features and fixes are implemented. The most up to date stable version is on the [FertigLab GitHub](https://github.com/FertigLab). This version of dominoSignal can be installed using the remotes package. ```r if(!require(remotes)){ @@ -22,7 +20,7 @@ remotes::install_github('FertigLab/dominoSignal') Here is an overview of how dominoSignal might be used in analysis of a single cell RNA sequencing data set: 1. Transcription factor activation scores are calculated (we recommend using [pySCENIC](https://pyscenic.readthedocs.io/en/latest/), but other methods can be used as well) -2. A ligand-receptor database is used to map linkages between ligands and receptors (we recommend using [CellphoneDB](https://www.cellphonedb.org/), but other methods can be used as well). +2. A ligand-receptor database is used to map linkages between ligands and receptors (we recommend using [CellPhoneDB](https://www.cellphonedb.org/), but other methods can be used as well). 3. A domino object is created using counts, z-scored counts, clustering information, and the data from steps 1 and 2. 4. Parameters such as the maximum number of transcription factors and receptors or the minimum correlation threshold (among others) are used to make a cell communication network 5. Communication networks can be extracted from within the domino object or visualized using a variety of plotting functions @@ -35,7 +33,8 @@ If you use our package in your analysis, please cite us: > Cherry C, Maestas DR, Han J, Andorko JI, Cahan P, Fertig EJ, Garmire LX, Elisseeff JH. Computational reconstruction of the signalling networks surrounding implanted biomaterials from single-cell transcriptomics. Nat Biomed Eng. 2021 Oct;5(10):1228-1238. doi: 10.1038/s41551-021-00770-5. Epub 2021 Aug 2. PMID: 34341534; PMCID: PMC9894531. -> Cherry C, Mitchell J, Nagaraj S, Krishnan K, Lvovs D, Fertig E, Elisseeff J (2024). dominoSignal: Cell Communication Analysis for Single Cell RNA Sequencing. R package version 0.99.1. +> Cherry C, Mitchell J, Nagaraj S, Krishnan K, Lvovs D, Fertig E, Elisseeff J (2024). dominoSignal: Cell Communication Analysis for Single Cell RNA Sequencing. R package version 0.99.2. ### Contact Us If you find any bugs or have questions, please let us know [here](https://github.com/FertigLab/dominoSignal/issues). +tat \ No newline at end of file diff --git a/_pkgdown.yml b/_pkgdown.yml index a37dbaa6..a8629b0c 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -32,6 +32,8 @@ reference: and produced in dominoSignal analysis contents: - domino-class + - print,domino-method + - show,domino-method - linkage_summary - title: "Analysis Functions" desc: > @@ -82,6 +84,13 @@ reference: - count_linkage - mean_ligand_expression - rename_clusters + - mock_linkage_summary +- title: "Data" + contents: + - CellPhoneDB + - PBMC + - SCENIC + navbar: type: light @@ -95,7 +104,7 @@ navbar: menu: - text: SCENIC for TF Scoring href: vignettes/articles/tf_scenic_vignette.html - - text: Using CellphoneDB + - text: Using CellPhoneDB href: vignettes/articles/cellphonedb_vignette.html - text: Domino Object Structure href: vignettes/domino_object_vignette.html @@ -127,6 +136,8 @@ news: one_page: true cran_dates: false releases: + - text: "dominoSignal v0.99.2" + href: https://github.com/FertigLab/dominoSignal/releases/tag/v0.99.2-alpha - text: "dominoSignal v0.2.1" href: https://github.com/FertigLab/dominoSignal/releases/tag/v0.2.1-alpha - text: "dominoSignal v0.2.2" diff --git a/data-raw/testdata.R b/data-raw/testdata.R deleted file mode 100644 index 4fd4cb13..00000000 --- a/data-raw/testdata.R +++ /dev/null @@ -1,296 +0,0 @@ -# generate objects for comparison in testing scripts - -library(SingleCellExperiment) -library(dominoSignal) - -# load data for generation of test results from zenodo repository -# Zenodo host of outputs from SCENIC analysis -data_url <- "https://zenodo.org/records/10951634/files" -temp_dir <- tempdir() - -pbmc_dir <- paste0(temp_dir, "/pbmc") -if (!dir.exists(pbmc_dir)) { - dir.create(pbmc_dir) -} - -# SingleCellExperiment object of preprocessed PBMC3K data -download.file(url = paste0(data_url, "/pbmc3k_sce.rds"), - destfile = paste0(pbmc_dir, "/pbmc3k_sce.rds")) -pbmc <- readRDS(paste0(pbmc_dir, "/pbmc3k_sce.rds")) - -# SCENIC input files -scenic_dir <- paste0(temp_dir, "/scenic") -if (!dir.exists(scenic_dir)) { - dir.create(scenic_dir) -} -download.file(url = paste0(data_url, "/auc_pbmc_3k.csv"), - destfile = paste0(scenic_dir, "/auc_pbmc_3k.csv")) -download.file(url = paste0(data_url, "/regulons_pbmc_3k.csv"), - destfile = paste0(scenic_dir, "/regulons_pbmc_3k.csv")) -auc <- read.table(paste0(scenic_dir, "/auc_pbmc_3k.csv"), - header = TRUE, row.names = 1, - stringsAsFactors = FALSE, sep = ",") -regulons <- read.csv(paste0(scenic_dir, "/regulons_pbmc_3k.csv")) - -# CellPhoneDB Database -cellphone_url <- "https://github.com/ventolab/cellphonedb-data/archive/refs/tags/v4.0.0.tar.gz" -cellphone_tar <- paste0(temp_dir, "/cellphoneDB_v4.tar.gz") -download.file(url = cellphone_url, destfile = cellphone_tar) -cellphone_dir <- paste0(temp_dir, "/cellphoneDB_v4") -untar(tarfile = cellphone_tar, exdir = cellphone_dir) -cellphone_data <- paste0(cellphone_dir, "/cellphonedb-data-4.0.0/data") - -interactions <- read.csv(paste0(cellphone_data, "/interaction_input.csv"), stringsAsFactors = FALSE) -complexes <- read.csv(paste0(cellphone_data, "/complex_input.csv"), stringsAsFactors = FALSE) -genes <- read.csv(paste0(cellphone_data, "/gene_input.csv"), stringsAsFactors = FALSE) -proteins <- read.csv(paste0(cellphone_data, "/protein_input.csv"), stringsAsFactors = FALSE) - -# subset the pbmc data to fewer cells to meet package requirements -RNA_features <- c( - "CCL20", "CXCR3", "CCR6", - "IL7", "IL7R", "IL2RG", - "TGFB3", "TGFBR3", - "ITGA6", "ITGB4", "NRG1" - ) -TF_features <- c( - "DBP", - "FLI1", "ZNF431", - "ZNF324", "CREM", "FOSL1" -) -name_features <- c( - "CCL20", "CXCR3", "CCR6", - "IL7", "IL7_receptor", - "TGFB3", "TGFBR3", - "integrin_a6b4_complex", "NRG1" - ) -cell_types_dwn <- c("CD8_T_cell", "CD14_monocyte", "B_cell") -n <- 120 -cell_list <- list() -set.seed(123) -for(i in seq_along(cell_types_dwn)){ - cell <- cell_types_dwn[i] - cell_barcodes <- colnames(pbmc)[pbmc$cell_type == cell] - dwn_barcodes <- sample(cell_barcodes, n, replace = FALSE) - cell_list[[cell]] <- dwn_barcodes -} -barcodes_dwn <- unlist(cell_list) -clusters_tiny <- factor(rep(names(cell_list), lengths(cell_list))) -names(clusters_tiny) <- barcodes_dwn - -counts <- assay(pbmc, "counts") -z_scores <- t(scale(t(assay(pbmc, "logcounts")))) - -RNA_count_tiny <- counts[rownames(assay(pbmc, "counts")) %in% RNA_features, - colnames(assay(pbmc, "counts")) %in% barcodes_dwn] -RNA_zscore_tiny <- z_scores[rownames(assay(pbmc, "logcounts")) %in% RNA_features, - colnames(assay(pbmc, "logcounts")) %in% barcodes_dwn] - -# subset CellPhoneDB inputs -complexes_tiny <- complexes[complexes$complex_name %in% name_features,] -genes_tiny <- genes[genes$gene_name %in% RNA_features,] -proteins_tiny <- proteins[proteins$uniprot %in% genes_tiny$uniprot,] -interactions_tiny <- interactions[ - (interactions$partner_a %in% proteins_tiny$uniprot | interactions$partner_a %in% complexes_tiny$complex_name) & - (interactions$partner_b%in% proteins_tiny$uniprot | interactions$partner_b %in% complexes_tiny$complex_name),] - - -# subset SCENIC inputs -auc <- t(auc) -rownames(auc) <- gsub("\\.\\.\\.$", "", rownames(auc)) -auc_tiny <- auc[TF_features, barcodes_dwn] - -regulons <- regulons[-1:-2, ] -colnames(regulons) <- c("TF", "MotifID", "AUC", "NES", "MotifSimilarityQvalue", "OrthologousIdentity", "Annotation", "Context", "TargetGenes", "RankAtMax") -regulons_tiny <- regulons[regulons$TF %in% TF_features,] - -# Make rl_map -rl_map_tiny <- dominoSignal::create_rl_map_cellphonedb( - genes = genes_tiny, - proteins = proteins_tiny, - interactions = interactions_tiny, - complexes = complexes_tiny -) - -# Get regulon list -regulon_list_tiny <- dominoSignal::create_regulon_list_scenic( - regulons = regulons_tiny -) - -# Create test domino object -pbmc_dom_tiny <- create_domino( - rl_map = rl_map_tiny, - features = auc_tiny, - counts = RNA_count_tiny, - z_scores = RNA_zscore_tiny, - clusters = clusters_tiny, - tf_targets = regulon_list_tiny, - use_clusters = TRUE, - use_complexes = TRUE, - remove_rec_dropout = FALSE -) - -# Create built domino object -pbmc_dom_built_tiny <- build_domino( - dom = pbmc_dom_tiny, - min_tf_pval = .05, - max_tf_per_clust = Inf, - max_rec_per_tf = Inf, - rec_tf_cor_threshold = .1, - min_rec_percentage = 0.01 -) - -# create a second tiny domino object for list comparison functions -clusters_tiny_alt <- clusters_tiny[c(121:240, 1:120, 241:360)] -names(clusters_tiny_alt) <- names(clusters_tiny) -pbmc_dom_tiny_alt <- create_domino( - rl_map = rl_map_tiny, - features = auc_tiny, - counts = RNA_count_tiny, - z_scores = RNA_zscore_tiny, - # reassigned clusters - clusters = clusters_tiny_alt, - tf_targets = regulon_list_tiny, - use_clusters = TRUE, - use_complexes = TRUE, - remove_rec_dropout = FALSE -) -pbmc_dom_built_tiny_alt <- build_domino( - dom = pbmc_dom_tiny_alt, - min_tf_pval = .05, - max_tf_per_clust = Inf, - max_rec_per_tf = Inf, - rec_tf_cor_threshold = .1, - min_rec_percentage = 0.01 -) -dom_ls_tiny <- list( - dom1 = pbmc_dom_built_tiny, - dom2 = pbmc_dom_built_tiny_alt -) - -# example linkage summary for comparitive functions -linkage_sum_tiny <- new("linkage_summary", - subject_meta = data.frame( - "subject_names" = paste0("P",1:6), - "group" = c(rep("G1", 3), rep("G2", 3)) - ), - subject_names = factor( - paste0("P",1:6), levels = paste0("P",1:6) - ), - subject_linkages = list( - "P1" = list( - "C1" = list( - "tfs" = c("TF1", "TF2", "TF3", "TF4"), - "rec" = c("R1", "R2", "R3", "R4"), - "incoming_lig" = c("L1", "L2", "L3", "L4"), - "tfs_rec" = c("TF1 <- R1", "TF2 <- R2", "TF3 <- R3", "TF4 <- R4"), - "rec_lig" = c("R1 <- L1", "R2 <- L2", "R3 <- L3", "R4 <- L4") - ), - "C2" = list( - "tfs" = c("TF2", "TF3", "TF4"), - "rec" = c("R2", "R3", "R4"), - "incoming_lig" = c("L2", "L3", "L4"), - "tfs_rec" = c("TF2 <- R2", "TF3 <- R3", "TF4 <- R4"), - "rec_lig" = c("R2 <- L2", "R3 <- L3", "R4 <- L4") - ) - ), - "P2" = list( - "C1" = list( - "tfs" = c("TF1", "TF2"), - "rec" = c("R1", "R2"), - "incoming_lig" = c("L1", "L2"), - "tfs_rec" = c("TF1 <- R1", "TF2 <- R2"), - "rec_lig" = c("R1 <- L1", "R2 <- L2") - ), - "C2" = list( - "tfs" = c("TF3", "TF4"), - "rec" = c("R3", "R4"), - "incoming_lig" = c("L3", "L4"), - "tfs_rec" = c("TF3 <- R3", "TF4 <- R4"), - "rec_lig" = c("R3 <- L3", "R4 <- L4") - ) - ), - "P3" = list( - "C1" = list( - "tfs" = c("TF1", "TF2"), - "rec" = c("R1", "R2"), - "incoming_lig" = c("L1", "L2"), - "tfs_rec" = c("TF1 <- R1", "TF2 <- R2"), - "rec_lig" = c("R1 <- L1", "R2 <- L2") - ), - "C2" = list( - "tfs" = c("TF3"), - "rec" = c("R3"), - "incoming_lig" = c("L3"), - "tfs_rec" = c("TF3 <- R3"), - "rec_lig" = c("R3 <- L3") - ) - ), - "P4" = list( - "C1" = list( - "tfs" = c("TF2", "TF3", "TF4"), - "rec" = c("R2", "R3", "R4"), - "incoming_lig" = c("L2", "L3", "L4"), - "tfs_rec" = c("TF2 <- R2", "TF3 <- R3", "TF4 <- R4"), - "rec_lig" = c("R2 <- L2", "R3 <- L3", "R4 <- L4") - ), - "C2" = list( - "tfs" = c("TF1", "TF2", "TF3", "TF4"), - "rec" = c("R1", "R2", "R3", "R4"), - "incoming_lig" = c("L1", "L2", "L3", "L4"), - "tfs_rec" = c("TF1 <- R1", "TF2 <- R2", "TF3 <- R3", "TF4 <- R4"), - "rec_lig" = c("R1 <- L1", "R2 <- L2", "R3 <- L3", "R4 <- L4") - ) - ), - "P5" = list( - "C1" = list( - "tfs" = c("TF3"), - "rec" = c("R3"), - "incoming_lig" = c("L3"), - "tfs_rec" = c("TF3 <- R3"), - "rec_lig" = c("R3 <- L3") - ), - "C2" = list( - "tfs" = c("TF1", "TF2", "TF3", "TF4"), - "rec" = c("R1", "R2", "R3", "R4"), - "incoming_lig" = c("L1", "L2", "L3", "L4"), - "tfs_rec" = c("TF1 <- R1", "TF2 <- R2", "TF3 <- R3", "TF4 <- R4"), - "rec_lig" = c("R1 <- L1", "R2 <- L2", "R3 <- L3", "R4 <- L4") - ) - ), - "P6" = list( - "C1" = list( - "tfs" = c(), - "rec" = c(), - "incoming_lig" = c(), - "tfs_rec" = c(), - "rec_lig" = c() - ), - "C2" = list( - "tfs" = c("TF1", "TF2", "TF3"), - "rec" = c("R1", "R2", "R3"), - "incoming_lig" = c("L1", "L2", "L3"), - "tfs_rec" = c("TF1 <- R1", "TF2 <- R2", "TF3 <- R3"), - "rec_lig" = c("R1 <- L1", "R2 <- L2", "R3 <- L3") - ) - ) - ) -) - -# test result object from differential linkage tests -tiny_differential_linkage_c1 <- test_differential_linkages( - linkage_summary = linkage_sum_tiny, cluster = "C1", group.by = "group", - linkage = "rec", subject_names = linkage_sum_tiny@subject_names, test_name = "fishers.exact" -) -tiny_differential_linkage_c2 <- test_differential_linkages( - linkage_summary = linkage_sum_tiny, cluster = "C2", group.by = "group", - linkage = "rec", subject_names = linkage_sum_tiny@subject_names, test_name = "fishers.exact" -) - -# Save all test files to internal sysdata object -usethis::use_data( - pbmc_dom_built_tiny, complexes_tiny, genes_tiny, proteins_tiny, interactions_tiny, - pbmc_dom_tiny, regulon_list_tiny, rl_map_tiny, regulons_tiny, clusters_tiny, - RNA_count_tiny, RNA_zscore_tiny, auc_tiny, - dom_ls_tiny, linkage_sum_tiny, tiny_differential_linkage_c1, tiny_differential_linkage_c2, - internal = TRUE -) diff --git a/data-raw/tiny_data.R b/data-raw/tiny_data.R new file mode 100644 index 00000000..50f46f60 --- /dev/null +++ b/data-raw/tiny_data.R @@ -0,0 +1,161 @@ +# generate objects for comparison in testing scripts + +library(SingleCellExperiment) +library(dominoSignal) + +# load data for generation of test results from zenodo repository +# Zenodo host of outputs from SCENIC analysis +data_url <- "https://zenodo.org/records/10951634/files" +temp_dir <- tempdir() + +pbmc_dir <- paste0(temp_dir, "/pbmc") +if (!dir.exists(pbmc_dir)) { + dir.create(pbmc_dir) +} + +# SingleCellExperiment object of preprocessed PBMC3K data +download.file(url = paste0(data_url, "/pbmc3k_sce.rds"), + destfile = paste0(pbmc_dir, "/pbmc3k_sce.rds")) +pbmc <- readRDS(paste0(pbmc_dir, "/pbmc3k_sce.rds")) + +# SCENIC input files +scenic_dir <- paste0(temp_dir, "/scenic") +if (!dir.exists(scenic_dir)) { + dir.create(scenic_dir) +} +download.file(url = paste0(data_url, "/auc_pbmc_3k.csv"), + destfile = paste0(scenic_dir, "/auc_pbmc_3k.csv")) +download.file(url = paste0(data_url, "/regulons_pbmc_3k.csv"), + destfile = paste0(scenic_dir, "/regulons_pbmc_3k.csv")) +auc <- read.table(paste0(scenic_dir, "/auc_pbmc_3k.csv"), + header = TRUE, row.names = 1, + stringsAsFactors = FALSE, sep = ",") +regulons <- read.csv(paste0(scenic_dir, "/regulons_pbmc_3k.csv")) + +# CellPhoneDB Database +cellphone_url <- "https://github.com/ventolab/cellphonedb-data/archive/refs/tags/v4.0.0.tar.gz" +cellphone_tar <- paste0(temp_dir, "/cellphoneDB_v4.tar.gz") +if(!file.exists(cellphone_tar)){ + download.file(url = cellphone_url, destfile = cellphone_tar) +} + +cellphone_dir <- paste0(temp_dir, "/cellphoneDB_v4") +untar(tarfile = cellphone_tar, exdir = cellphone_dir) +cellphone_data <- paste0(cellphone_dir, "/cellphonedb-data-4.0.0/data") + +interactions <- read.csv(paste0(cellphone_data, "/interaction_input.csv"), stringsAsFactors = FALSE) +complexes <- read.csv(paste0(cellphone_data, "/complex_input.csv"), stringsAsFactors = FALSE) +genes <- read.csv(paste0(cellphone_data, "/gene_input.csv"), stringsAsFactors = FALSE) +proteins <- read.csv(paste0(cellphone_data, "/protein_input.csv"), stringsAsFactors = FALSE) + +# subset the pbmc data to fewer cells to meet package requirements +RNA_features <- c( + "CCL20", "CXCR3", "CCR6", + "IL7", "IL7R", "IL2RG", + "TGFB3", "TGFBR3", + "ITGA6", "ITGB4", "NRG1" + ) +TF_features <- c( + "DBP", + "FLI1", "ZNF431", + "ZNF324", "CREM", "FOSL1" +) +name_features <- c( + "CCL20", "CXCR3", "CCR6", + "IL7", "IL7_receptor", + "TGFB3", "TGFBR3", + "integrin_a6b4_complex", "NRG1" + ) +cell_types_dwn <- c("CD8_T_cell", "CD14_monocyte", "B_cell") +n <- 120 +cell_list <- list() +set.seed(123) +for(i in seq_along(cell_types_dwn)){ + cell <- cell_types_dwn[i] + cell_barcodes <- colnames(pbmc)[pbmc$cell_type == cell] + dwn_barcodes <- sample(cell_barcodes, n, replace = FALSE) + cell_list[[cell]] <- dwn_barcodes +} +barcodes_dwn <- unlist(cell_list) +clusters_tiny <- factor(rep(names(cell_list), lengths(cell_list))) +names(clusters_tiny) <- barcodes_dwn + +counts <- assay(pbmc, "counts") +z_scores <- t(scale(t(assay(pbmc, "logcounts")))) + +RNA_count_tiny <- counts[rownames(assay(pbmc, "counts")) %in% RNA_features, + colnames(assay(pbmc, "counts")) %in% barcodes_dwn] +RNA_zscore_tiny <- z_scores[rownames(assay(pbmc, "logcounts")) %in% RNA_features, + colnames(assay(pbmc, "logcounts")) %in% barcodes_dwn] + +# subset CellPhoneDB inputs +complexes_tiny <- complexes[complexes$complex_name %in% name_features,] +genes_tiny <- genes[genes$gene_name %in% RNA_features,] +proteins_tiny <- proteins[proteins$uniprot %in% genes_tiny$uniprot,] +interactions_tiny <- interactions[ + (interactions$partner_a %in% proteins_tiny$uniprot | interactions$partner_a %in% complexes_tiny$complex_name) & + (interactions$partner_b%in% proteins_tiny$uniprot | interactions$partner_b %in% complexes_tiny$complex_name),] + + +# subset SCENIC inputs +auc <- t(auc) +rownames(auc) <- gsub("\\.\\.\\.$", "", rownames(auc)) +auc_tiny <- auc[TF_features, barcodes_dwn] + +regulons <- regulons[-1:-2, ] +colnames(regulons) <- c("TF", "MotifID", "AUC", "NES", "MotifSimilarityQvalue", "OrthologousIdentity", "Annotation", "Context", "TargetGenes", "RankAtMax") +regulons_tiny <- regulons[regulons$TF %in% TF_features,] + +# Make rl_map +rl_map_tiny <- dominoSignal::create_rl_map_cellphonedb( + genes = genes_tiny, + proteins = proteins_tiny, + interactions = interactions_tiny, + complexes = complexes_tiny +) + +# Get regulon list +regulon_list_tiny <- dominoSignal::create_regulon_list_scenic( + regulons = regulons_tiny +) + +# Create test domino object +pbmc_dom_tiny <- create_domino( + rl_map = rl_map_tiny, + features = auc_tiny, + counts = RNA_count_tiny, + z_scores = RNA_zscore_tiny, + clusters = clusters_tiny, + tf_targets = regulon_list_tiny, + use_clusters = TRUE, + use_complexes = TRUE, + remove_rec_dropout = FALSE +) + +# Create built domino object +pbmc_dom_built_tiny <- build_domino( + dom = pbmc_dom_tiny, + min_tf_pval = .05, + max_tf_per_clust = Inf, + max_rec_per_tf = Inf, + rec_tf_cor_threshold = .1, + min_rec_percentage = 0.01 +) + + +# save all data to be used in tests and examples +CellPhoneDB <- list(complexes_tiny=complexes_tiny, + genes_tiny=genes_tiny, + proteins_tiny=proteins_tiny, + interactions_tiny=interactions_tiny) +save(CellPhoneDB, file = "data/CellPhoneDB.RData") + +SCENIC <- list(auc_tiny=auc_tiny, + regulons_tiny=regulons_tiny) +save(SCENIC, file = "data/SCENIC.RData") + +PBMC <- list(RNA_count_tiny=RNA_count_tiny, + RNA_zscore_tiny=RNA_zscore_tiny, + clusters_tiny=clusters_tiny) +save(PBMC, file = "data/PBMC.RData") + diff --git a/data/CellPhoneDB.RData b/data/CellPhoneDB.RData new file mode 100644 index 00000000..cdff528f Binary files /dev/null and b/data/CellPhoneDB.RData differ diff --git a/data/PBMC.RData b/data/PBMC.RData new file mode 100644 index 00000000..9054731f Binary files /dev/null and b/data/PBMC.RData differ diff --git a/data/SCENIC.RData b/data/SCENIC.RData new file mode 100644 index 00000000..513b4741 Binary files /dev/null and b/data/SCENIC.RData differ diff --git a/docs/404.html b/docs/404.html index 18c6168b..89f18289 100644 --- a/docs/404.html +++ b/docs/404.html @@ -33,7 +33,7 @@ dominoSignal - 0.99.1 + 0.99.2