-
Notifications
You must be signed in to change notification settings - Fork 0
/
Functions.Rmd
2609 lines (2048 loc) · 104 KB
/
Functions.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
---
title: "Functions"
author: Athena Golfinos-Owens
date: 05/30/2023
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
# Seurat single-cell processing
## Seurat pre-processing helper (normalize through UMAP)
Run this function in the CONSOLE as it has figure outputs that cannot be seen if you are running code in-line
NOTE: this function requires user input that you can determine the values of based on the output figures
seu: name of your seurat object (individual sample, pre-processing)
out: absolute or relative path where the function will output a log of pre-processing steps
pct_doublets: expected doublet percentage. Please reference the 10X documentation for appropriate values for this argument
feature_subset: Recommend keeping this as TRUE. Would you like to be presented with the nCount_RNA, nFeature_RNA, and % mitochondrial cell violin plots to set cutoffs for filtering? If FALSE, there will be no opportunity to do so
batch_id: This is the name of the metadata column in seu@metadata that contains batch related information. This could be sample ID, tissue, etc. This is anything that may introduce technical batch effects, such as sequencing date, source animal, etc.
reduction: For a raw object from a single sample, keep this as pca. However, if you are running this pipeline on an atlas of integrated data where you used this function to do harmony integrated, you should change this to 'harmonyRNA".
harmony: This should be set to FALSE unless your atlas has been merged to contain multiple batches. If this is the case, you should change to TRUE. You should also then change your reduction to "harmonyRNA"
seurat_integration: Leave this as FALSE unless you have a specific reason for changing to TRUE.
normalize: Recommend keeping this as TRUE. You should set this to TRUE if the data is raw. ONLY Set to FALSE if the data has been integrated using Seurat integration as you cannot normalize again.
test_npcs: This sets how many PCs will be initially calculated. As you will be selecting a set number of PCs to run the UMAP with during the running of this function, this is NOT the number of PCs to use in calculation of the UMAP. This is simply the number of PCs to show in an elbow plot. Recommend leaving this as is unless you have a large dataset (>20,000 cells), as 200 might be needed to fully see where the variance wanes.
assay: Leave this as RNA unless you have a different assay you have added to the object that you would prefer to use.
Output:
The output of this function is a processed seurat object
Usage example:
seu <- seu_pp1(seu = seu, out="Z:/Computational_Toxicology/Athena/Mouse_scRNAseq/Kimmel_et_al_2019/Spleen/kimmel_seurat.RDS", remove_doublets = FALSE, feature_subset = FALSE, batch_id = 'orig.ident', reduction = 'pca', harmony = FALSE, seurat_integration = FALSE, normalize = TRUE, test_npcs = 100, assay = 'RNA')
```{r}
seu_pp1 <- function(seu, out, remove_doublets = TRUE, pct_doublets, feature_subset = TRUE, batch_id = 'orig.ident', reduction = 'pca', harmony = FALSE, seurat_integration = FALSE, normalize = TRUE, test_npcs = 100, assay = 'RNA'){
set.seed(329)
# loading in required packages
require(dplyr)
require(Seurat)
require(harmony)
require(grid)
require(DoubletFinder)
require(DescTools)
require(dplyr)
# checking to see if there's a slash at the end of your directory name
# if not it adds it for you ;)
last_char <- substr(out, nchar(out), nchar(out))
if (nchar(out) > 0 && last_char != "/" && last_char != "\\") {
out <- paste0(out, "/")
}
# add a directory within your out directory that will contain all of the outputs
out <- paste(out, 'Seurat_preprocessing_outputs_', Sys.Date(), '/', sep = '')
# this creates a log file where all of the pre-processing metrics will be saved for later referencing
sink(paste(out, '_preprocessing_console_output.txt', sep = ''), append = T)
# adding parameters to the log file
cat('\nNEW ANALYSIS\n')
cat(paste('Date of analysis: ', Sys.Date(), "\n"))
cat(paste("Output directory: ", out, "\n"))
cat(paste("Batch_ID: ", batch_id, "\n"))
cat(paste("Assay used: ", assay, "\n"))
# feature subsetting
if (isTRUE(feature_subset)){
cat('Feature Subsetting: TRUE')
seu[["percent.mt"]] <- PercentageFeatureSet(seu, pattern = "^MT-")
print(VlnPlot(seu, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3))
min <- as.integer(readline("Enter the minimum number of RNA features: "))
max <- as.integer(readline("Enter the maximum number of RNA features: "))
num_mt <- as.numeric(readline("Enter the maximum percentage of MT genes allowed: "))
cat(paste("Enter the minimum number of RNA features: ", min, "\n"))
cat(paste("Enter the maximum number of RNA features: ", max, "\n"))
cat(paste("Enter the maximum percentage of MT genes allowed:", num_mt, "\n"))
seu <- subset(seu, subset = nFeature_RNA > min & nFeature_RNA < max & percent.mt < num_mt)
}
# seurat integration step
if (isTRUE(seurat_integration)){
cat('Seurat integration: TRUE')
print(paste('Now performing seurat integration based on the metadata column', batch_id))
objects <- list()
for (x in unique(seu[[c(batch_id)]][,1])){
seu_mini <- seu[,which(seu[[c(batch_id)]] == x)]
objects <- c(objects, seu_mini)
}
features <- SelectIntegrationFeatures(object.list = objects)
immune.anchors <- FindIntegrationAnchors(object.list = objects, anchor.features = features)
seu <- IntegrateData(anchorset = immune.anchors)
DefaultAssay(seu) <- 'integrated'
#print(DimPlot(seu, group.by = batch_id))
normalize = FALSE
}
# normalization step, if indicated
if (isTRUE(normalize)) {
cat('Normalization: TRUE')
seu <- NormalizeData(seu, assay = assay)
}
# identify top 2000 variable features for your specified assay
seu <- FindVariableFeatures(seu, assay = assay)
# scaling the data within the specified assay
seu <- ScaleData(seu, assay = assay)
# run PCA so we can screen for the proper number of PCs based on variance
seu <- RunPCA(seu, verbose = FALSE, npcs = test_npcs, assay = assay)
# runs harmony to integrate all samples based on batch ID if desired
if (isTRUE(harmony)){
seu <- seu %>% harmony::RunHarmony(batch_id, plot_convergence = TRUE, reduction.save = paste('harmony', assay, sep = ''))
}
# plots the elbow plot if you ran harmony first (uses corrected PCs)
if (isTRUE(harmony)){
print(ElbowPlot(seu, ndims = test_npcs, reduction = paste('harmony', assay, sep = '')))
}
# plots the elbow plot if you did not run harmony (uses raw PCs)
if (isFALSE(harmony)){
print(ElbowPlot(seu, ndims = test_npcs))
}
# requests user input as to the number of PCs to use
npcs2 <- as.integer(readline("Enter the number of PCs to use: "))
# find neighbors based on the user input number of PCs and the desired reduction
seu <- FindNeighbors(seu, dims = 1:npcs2, reduction = reduction, assay = assay)
# runs UMAP and stores clustering information for every resolution from 0.1 to 3 by 0.1
seu <- RunUMAP(seu, dims = 1:npcs2, reduction = reduction, assay = assay)
for (x in seq.int(0.1, 3, by = 0.1)){
seu <- FindClusters(seu, resolution = x, verbose = FALSE)
}
# removing doublets
if (isTRUE(remove_doublets)){
# do a parameter sweep to help ID the correct value
sweep.res <- DoubletFinder::paramSweep_v3(seu)
sweep.stats <-DoubletFinder::summarizeSweep(sweep.res, GT = FALSE)
bcmvn <- DoubletFinder::find.pK(sweep.stats)
# writes an output file that contains your parameter sweep information
write.csv(bcmvn, file = paste(out, 'DF_stats.csv', sep = ''))
print(barplot(bcmvn$BCmetric, names.arg = bcmvn$pK, las=2))
# estimates the number of doublets based on your input
nExp <- round(ncol(seu) * pct_doublets)
# requests user input as to the pK value to use. This should be the pK value with the largest BCmetric
pk <- as.numeric(readline("Enter the pK value to use: "))
# adds doublet finder info to the output log file
cat(paste("Pre-defined pN value: 0.25", "\n"))
cat(paste("Enter the pK value to use: ", pk, "\n"))
# detect doublets vs singlets
seu <- DoubletFinder::doubletFinder_v3(seu, pN = 0.25, pK = pk, nExp = nExp, PCs = 1:npcs2)
}
sink()
# create a file that contains all of your preprocessing plots
pdf(paste(out, 'preprocessing_plots.pdf'), width = 12, height = 7)
# print the output plots
print(VlnPlot(seu, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, group.by = batch_id))
# prints an elbow plot based on the harmony reduction
if (isTRUE(harmony)){
print(ElbowPlot(seu, ndims = test_npcs, reduction = paste('harmony', assay, sep = '')))
}
# prints an elbow plot based on the pca reduction
if (isFALSE(harmony)){
print(ElbowPlot(seu, ndims = test_npcs))
}
dev.off()
# saves your seurat output to your out folder
saveRDS(seu, file = paste(out, Sys.Date(), '.RDS', sep = ''))
return(seu)
}
```
## SoupX run
Purpose: This function runs the soupx tool for removing ambient RNA "soup" from your samples
seu: Your seurat object. This function will assume that your metadata column is called "global.cluster"
tmpDir: The path to your original filtered (or raw if you want to use that) h5 file from running cellranger
outdir: Provide the path to a folder that already exists (please make the folder if it's not already made) where you would like to store record-keeping related outputs as well as your seurat object output
sample: This is a string of the name you would like to give to the object. Will be added as a project name to the seurat object, as well as part of the object name when it is saved to the output directory.
Output: This function will save a workspace image (as an rda file), as well as a seurat object containing the corrected matrix in a folder generated within the output directory
Usage:
my_output <- soupx_run(seu = seu, tmpDir = '/your/input/directory/10X/', outdir = '/your/output/directory/')
```{r}
soupX_run <- function(seu, tmpDir, outdir = './outputs/', sample){
set.seed(329)
# load required packages
require(SoupX)
require(Seurat)
require(DescTools)
require(dplyr)
# checking to see if there's a slash at the end of your input directory name
# if not it adds it for you ;)
last_char <- substr(tmpDir, nchar(tmpDir), nchar(tmpDir))
if (nchar(tmpDir) > 0 && last_char != "/" && last_char != "\\") {
tmpDir <- paste0(tmpDir, "/")
}
# checking to see if there's a slash at the end of your out directory name
# if not it adds it for you ;)
last_char <- substr(outdir, nchar(outdir), nchar(outdir))
if (nchar(outdir) > 0 && last_char != "/" && last_char != "\\") {
outdir <- paste0(outdir, "/")
}
# reading in the filtered barcode matrix and only keeping overlapping genes w seurat object
toc1 = Seurat::Read10X_h5(paste(tmpDir, "sample_filtered_feature_bc_matrix.h5", sep = ''))
toc1 = toc1[rownames(toc1) %in% rownames(seu),colnames(toc1) %in% colnames(seu)]
# reading in the raw barcode matrix and only keeping overlapping genes w seurat object
tod1 = Seurat::Read10X_h5(paste(tmpDir, "sample_raw_feature_bc_matrix.h5", sep = ''))
tod1 <- tod1[rownames(tod1) %in% rownames(toc1),colnames(tod1) %in% colnames(toc1)]
tod1 = tod1[rownames(tod1) %in% rownames(seu),colnames(tod1) %in% colnames(seu)]
# calculating the "soup"
v1t_counts = SoupChannel(tod1, toc1, calcSoupProfile = F)
# helper to make it easier to pick the range of UMIs to consider "empty" droplets
# default is 0-100 but that is not always sufficient
# if you have high background (lots of soup), this value will be on the higher end!
for (x in seq(100, 2000, by = 100)){
print(paste("Range of UMI: 0-", as.character(x), sep = ''))
print(length(v1t_counts$nDropUMIs[v1t_counts$nDropUMIs < x]))
}
# requests user input for the upper range of "empty" droplets
input <- readline("Enter upper range value: ")
# now calculating the soup using the user input range of "empty" droplets
v1t_counts <- estimateSoup(v1t_counts, soupRange = c(0, input))
# here gathering cluster names and setting them within the object
clusters <- seu$global.cluster
clusters <- clusters[names(clusters) %in% colnames(v1t_counts$toc)]
v1t_counts = setClusters(v1t_counts, clusters)
v1t_counts = autoEstCont(v1t_counts)
v1t_soupx = adjustCounts(v1t_counts)
# creating anew version of your seurat object
v1t <- CreateSeuratObject(v1t_soupx, project = sample, assay = "RNA", names.field = 1, names.delim = "_",meta.data = NULL)
# creating the out directory where we will store the workspace image as well as the seurat object
path <- paste(outdir, 'Soup_X_outputs_', Sys.Date(), '/', sep = '')
# saving your workspace image to an output folder
save(toc1, tod1, v1t, v1t_counts, v1t_soupx, clusters, tmpDir, file = paste(path, 'SoupX_env_', sample, '.rda', sep = ''))
# saving your seurat object as an output
saveRDS(v1t, paste(path, sample, '_', Sys.Date(), '.RDS', sep = ''))
# returns the seurat object
return(v1t)
}
```
## Add clustering resolutions 0.1-3
What this function does: Given an input seurat object, this is a helper function that adds a metadata column to your seurat object for each clustering resolution (increasing by 0.1) from the start resolution to the end resolution. Simply a helper to make it easier to assess multiple resolutions of interest. This function is included in the seu_pp1 wrapper, so no need to use it except in an independent circumstance.
seu: Seurat object for which you would like to add metadata columns (in intervals of 0.1) for clustering resolutions ranging from your start argument to your end argument
start: This is the starting resolution for adding clustering resolutions. Default is 0.1
end: This is the ending resolution for adding clustering resolutions. Default is 3.
Output: Returns seurat object with the added metadata
Usage:
seu_with_resolutions <- add_cluster_res(seu = seu_no_res, start = 0.5, end = 4)
```{r}
add_cluster_res <- function(seu, start = 0.1, end = 3){
set.seed(329)
# loading in required packages
require(Seurat)
# getting clustering resolutions from 0.1-3
for (x in seq.int(start, end, by = 0.1)){
seu <- FindClusters(seu, resolution = x, verbose = FALSE)
}
return(seu)
}
```
# ________________________
# Seurat cluster/marker evaluation
## Marker plotting (VlnPlot, DotPlot, DimPlot, FeaturePlot)
What does this function do? This function is meant to be used to help holistically evaluate the expression of given features within a seurat object, grouped by your choice of metadata column. It will plot a violin plot, dotplot, dimplot, and featureplot of the gene expression of your indicated features grouped by your metadata column. Meant as an evaluation/diagnostic set of plots for exploratory analyses
seu: Your seurat object
features: A string of features to screen for within the data
group.by = A string of the name of the metadata column to group by when plotting.
Output: Prints a cowplot grid of four plots showing gene expression of your given features grouped by your indicated metadata column.
```{r}
marker_plotting <- function(seu, features, group.by){
set.seed(329)
# loading in colors for the plotting functions
color_clusters <- c("#DC050C", "#FB8072", "#1965B0", "#7BAFDE", "#882E72", "#B17BA6", "#FF7F00", "#FDB462", "#E7298A", "#E78AC3","#33A02C", "#B2DF8A", "#55A1B1", "#8DD3C7", "#A6761D", "#E6AB02", "#7570B3", "#BEAED4", "#666666", "#999999", "#AA8282", "#D4B7B7", "#8600BF", "#BA5CE3", "#808000","#AEAE5C", "#1E90FF", "#00BFFF", "#56FF0D", "#FFFF00", '#00CC99', '#006666', '#006600', '#330033', '#FFCCFF', '#FF00FF', '#663300')
require(Seurat)
require(cowplot)
require(ggplot2)
p1 <- FeaturePlot(seu, features = features)
p2 <- VlnPlot(seu, features = features, group.by = group.by, cols = color_clusters, pt.size = 0) + theme(axis.title = element_blank())
p3 <- DotPlot(seu, features = features, group.by = group.by, col.min = 0) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
p4 <- DimPlot(seu, group.by = group.by, cols = color_clusters, label = T, label.box = T, repel = T)
plot_grid(p1, p2, p3, p4, nrow = 2, ncol = 2)
}
```
## Cluster evaluation (nCount, nFeature, doublet score)
What does this function do? This function is meant as a tool to help you evaluate the potential doublets within a single cell RNA-seq data set. Given a seurat object, a metadata column to group by, and a batch specific ID, you can evaluate nCount_RNA, nFeature_RNA, and doublet scores from Doublet Finder outputs.
seu: Seurat object to evaluate
group.by: A string that indicates the name of the metadata column that the data should be grouped by
batch.id: A string that indicates the name of the metadata column that stores individual sample IDs or batch-specific indications.
Output: This function prints a multi-panel figure that consists of a heatmap, and evaluation figures for nCount, nFeature and doublet score.
Usage:
cluster_eval(seu = seu, group.by = 'global.cluster', batch.id = 'orig.ident')
```{r}
#seu = my2
#group.by = 'predicted.id'
#batch.id = 'orig.ident'
cluster_eval <- function(seu, group.by, batch.id){
set.seed(329)
# loading in required packages
require(Seurat)
require(ComplexHeatmap)
require(grid)
require(DescTools)
require(dplyr)
# loading in colors for the plotting functions
color_clusters <- c("#DC050C", "#FB8072", "#1965B0", "#7BAFDE", "#882E72", "#B17BA6", "#FF7F00", "#FDB462", "#E7298A", "#E78AC3","#33A02C", "#B2DF8A", "#55A1B1", "#8DD3C7", "#A6761D", "#E6AB02", "#7570B3", "#BEAED4", "#666666", "#999999", "#AA8282", "#D4B7B7", "#8600BF", "#BA5CE3", "#808000","#AEAE5C", "#1E90FF", "#00BFFF", "#56FF0D", "#FFFF00", '#00CC99', '#006666', '#006600', '#330033', '#FFCCFF', '#FF00FF', '#663300')
# generating and wrangling a matrix that contains information for the heatmap
test <- as.data.frame(table(seu[[group.by]],
seu[[batch.id]])) %>% tidyr::pivot_wider(names_from = 'Var2',
values_from = 'Freq')
rnames <- test$Var1
test$Var1 <- NULL
rownames(test) <- rnames
total <- as.numeric(ncol(seu))
# converting this matrix into a percent matrix
pct_df <- as.matrix(test[,1:ncol(test)]/total * 100)
# plotting the heatmap
p1 <- ComplexHeatmap::Heatmap(pct_df,
name = 'Cluster %',
row_title = 'cluster ID', column_title = 'sample',
column_title_side = 'bottom',
cell_fun = function(j, i, x, y, width, height, fill)
{grid.text(sprintf("%.4f", pct_df[i, j]), x, y, gp = gpar(fontsize=10))
})
#plotting a violin plot of nCount and nFeature values
p2 <- VlnPlot(seu, features = c('nCount_RNA', 'nFeature_RNA'),
group.by = group.by, cols = color_clusters)
# wrangling and plotting doublet scores
df_meta <- as.vector(colnames([email protected])[colnames([email protected])
%like% 'pANN_%'])
p3 <- FeaturePlot(seu, features = df_meta, cols = color_clusters)
# generating a DimPlot as a nice reference
p4 <- DimPlot(seu, group.by = group.by, cols = color_clusters)
# wrangling all figures into a single panel
figure1 <- multi_panel_figure(
width = 180, height = 180,
columns = 2, rows = 2)
figure1 %<>% fill_panel(p1, row = 1, column = 1)
figure1 %<>% fill_panel(p2, row = 1, column = 2)
figure1 %<>% fill_panel(p3, row = 2, column = 1:2)
figure1
}
```
## DE genes DotPlot (all clusters)
What does this function do? This function allows us to input a seurat object, a metadata column to group by, an output directory, a number of markers, and other arguments to generate a DE genes dotplot grouped by the metadata column and saved to the output directory of the given number of arguments. It also outputs a list of genes and the corresponding statistics to the same output folder.
seu: This is the seurat object that contains the data you wish to find DE genes within
metadata: This is a string indicating the metadata column to use as a grouping variable when calculating DE genes and plotting the dotplot
out: This is a string to the directory where you would like a folder with all your results stored (the plot, the de genes list, and an RDS that has both of these in it)
n_marks: This is how many markers will be used for each of your subsets for both DE gene identification, as well as plotting in the dotplot
min_pct: This sets the minimum percent that the feature must be expressed to be considered differentially expressed. Matches the min.pct argument in FindMarkers function from Seurat
Output: This function saves a csv of marker genes shown in the dotplot, as well as a png of the DE genes dotplot to the out directory. It also saves an RDS of both of these objects as a single list object, with [[1]] being the DE genes dataframe, and [[2]] being the marker genes dotplot. Lastly, it returns the list object, again with [[1]] being the DE genes dataframe, and [[2]] being the marker genes dotplot.
```{r}
#de_dotplot(seu, 'global.cluster')
#seu <- fbs[,fbs$RNA_snn_res.0.5 %in% c(0,1,2)]
#metadata <- 'RNA_snn_res.0.5'
#n_marks = 10
seu = m1001
metadata = 'BayesSpace'
out = 'Z:\\Computational_Toxicology\\Athena\\Visium\\m1001\\'
min_pct = 0.05
n_marks = 10
dotplot_height = 12
dotplot_width = 7
de_dotplot <- function(seu, metadata, out = './figures/', n_marks = 10, min_pct = 0.1, dotplot_height = 12, dotplot_width = 7){
set.seed(329)
# loading in required packages
require(Seurat)
require(DescTools)
require(ggplot2)
# checking to make sure the out directory has a slash included at the end
# if not, it adds it for you :)
last_char <- substr(out, nchar(out), nchar(out))
if (nchar(out) > 0 && last_char != "/" && last_char != "\\") {
out <- paste0(out, "/")
}
# getting differentially expressed genes for each subset
all_g <- c()
marker_df <- data.frame()
for (y in as.character(sort(unique(seu[[metadata]][,1])))){
fm <- FindMarkers(seu, ident.1 = y, group.by = metadata, test.use = 'MAST', only.pos = TRUE, logfc.threshold = 0.05, min.pct = min_pct)
if (nrow(fm) > 0){
fm <- fm[order(-fm$avg_log2FC, fm$p_val_adj),]
fm <- fm[!rownames(fm) %like any% c('^RPL%', '^RPS%', '^MT%'),]
g <- rownames(fm[1:n_marks,])
all_g <- c(all_g, g)
gene_list <- fm[1:n_marks,]
gene_list$cluster <- y
marker_df <- rbind(marker_df, gene_list)
}
if (nrow(fm) == 0){
print(paste('No markers for cluster', y, sep = ' '))
}
}
all_g <- unique(all_g)
plot <- DotPlot(seu, features = rev(all_g), col.min = 0, group.by = metadata) + coord_flip() + theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1))
# creating a new directory that will store these objects
path <- paste(out, metadata, '_DEgenes_', Sys.Date(), '/', sep = '')
dir.create(path)
# saving the plot, the marker genes file, and the RDS with both in the set output file
saveRDS(list(marker_df, plot), file = paste(path, Sys.Date(), '_de_dotplot_markers.RDS', sep = ''))
write.csv(marker_df, file = paste(path, Sys.Date(), '_de_marker_list.csv', sep = ''))
plot
ggsave(filename = paste(path, Sys.Date(), 'de_marker_plot.png', sep = ''), width = dotplot_width, height = dotplot_height, units = 'in', dpi = 300)
return(list(marker_df, plot))
}
```
## DE genes DotPlot (1 vs 1 clusters)
```{r}
library(Seurat)
library(DescTools)
x = 0
y = 2
de_dotplot_1v1 <- function(seu, metadata, x, y){
set.seed(329)
seu <- seu[,which(seu[[metadata]][,1] %in% c(x, y))]
# getting differentially expressed genes for each subset
all_g <- c()
fm1 <- FindMarkers(seu, ident.1 = x, ident.2 = y, group.by = metadata, logfc.threshold = 0.1, test.use = 'wilcox', min.pct = 0.1, min.diff.pct = 0.05, only.pos = TRUE)
fm1 <- fm1[order(-fm1$avg_log2FC, fm1$p_val_adj),]
fm1 <- fm1[!rownames(fm1) %like any% c('^RPL%', '^RPS%', '^MT%'),]
g <- rownames(fm1[1:5,])
all_g <- c(all_g, g)
fm2 <- FindMarkers(seu, ident.1 = y, ident.2 = x, group.by = metadata, logfc.threshold = 0.1, test.use = 'wilcox', min.pct = 0.1, min.diff.pct = 0.05, only.pos = TRUE)
fm2 <- fm2[order(-fm2$avg_log2FC, fm2$p_val_adj),]
fm2 <- fm2[!rownames(fm2) %like any% c('^RPL%', '^RPS%', '^MT%'),]
g <- rownames(fm2[1:5,])
all_g <- c(all_g, g)
all_g <- unique(all_g)
DotPlot(seu, features = rev(all_g), col.min = 0, group.by = metadata) + coord_flip() + theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1))
}
```
# ____________________________
# Seurat comparison analyses
## frequency plots
```{r}
#freqs <- freq_plot(seu, metadata = 'global.cluster', split.meta = 'hpv_status', out = './figures/global_cluster_all_', out_width = 19, out_height = 10)
#SEU = epi_moc2
#metadata = 'RNA_snn_res.0.4'
#split.meta = 'K17.status'
#out = './figures/epi_moc2_'
#out_width = 8
#out_height = 8
#out_units = 'in'
#out_res = 300
#freq_plot(seurat, celltype = c('N0_Retnlg', 'N1_Tnf', 'N2_Isg', 'N3_Npm1'), metadata = 'global.cluster2', split.meta = 'tissue', out = '/Volumes/hqdinh2/Projects/HNC_SPORE/Golfinosetal2022/neut_freq_boxplot.png')
SEU = m1001
metadata = 'SCT_snn_res.0.5'
split.meta = 'BayesSpace'
graph_type = c('stacked_barplot')
out = 'Z:\\Computational_Toxicology\\Athena\\Visium\\m1001\\'
out_width = 12
out_height = 8
out_units = 'in'
out_res = 300
test_by_group = F
##################################################################
freq_plot <- function(SEU, metadata, split.meta = 'tissue_hpv', graph_type = c('stacked_barplot', 'freq_boxplot'), out, out_width = 12, out_height = 8, out_units = 'in', out_res = 300, test_by_group = TRUE){
set.seed(329)
require(ggplot2)
require(DescTools)
require(Seurat)
Idents(SEU) <- metadata
new.ident <- sort(unique(Idents(SEU)))
samples <- unique([email protected]$orig.ident)
tmp <- match(SEU$orig.ident, samples)
sample_ind <- unique(tmp)
ids <- Idents(SEU)
tmp_v <- matrix(0, nrow = length(samples), ncol = length(new.ident))
rownames(tmp_v) <- samples
total_in_sample <- rep(0, length(samples))
tmp <- SEU$orig.ident
tmp <- plyr::count(tmp)
total_in_sample = tmp$freq
names(total_in_sample) <- tmp$x
total_in_sample <- total_in_sample[match(samples, names(total_in_sample))]
for (i in 1:length(new.ident)) {
tmp <- ids[which(ids == new.ident[i])]
tmp <- SEU$orig.ident[match(names(tmp), rownames([email protected]))]
tmp <- plyr::count(tmp)
for (j in 1:nrow(tmp)) {
ind <- which(rownames(tmp_v) == tmp$x[j])
tmp_v[ind,i] <- tmp$freq[j]
}
}
colnames(tmp_v) <- new.ident
ind <- order(colnames(tmp_v))
t2 <- tmp_v
for (i in 1:nrow(t2)) {
t2[i,] = t2[i,]/sum(t2[i,])
}
t2 <- as.data.frame(t2)
t2 <- as.matrix(t2)
meta <- [email protected][,c('orig.ident', split.meta)]
rownames(meta) <- NULL
meta <- unique(meta)
df <- NULL
for (i in 1:nrow(t2)) {
for (j in 1:ncol(t2)) {
df <- rbind(df, c(rownames(t2)[i], meta[[split.meta]][which(meta$orig.ident == rownames(t2)[i])], colnames(t2)[j], as.numeric(t2[i,j])))
}
}
colnames(df) <- c('Sample', 'Group', 'Cluster', 'Freqs')
df <- as.data.frame(df)
df$Group <- as.character(df$Group)
df$Freqs <- as.numeric(as.character(df$Freqs))
if(isTRUE(test_by_group)){
all_p <- data.frame(subset = character(), p = numeric())
for (x in unique(df$Cluster)) {
f <- df[df$Cluster == x,]
groups <- unique(df$Group)
f1 <- f[f$Group == groups[1],]$Freqs
f2 <- f[f$Group == groups[2],]$Freqs
wcx <- wilcox.test(f1, f2, paired = FALSE)
row <- c(x, wcx$p.value)
all_p <- rbind(all_p, row)
all_p[,2] <- format(round(as.numeric(all_p[,2]), 2), nsmall = 2)
}
p_filtered <- all_p
sub_title <- paste(p_filtered[,1], p_filtered[,2], sep = '=')
}
if (graph_type == 'freq_boxplot'){
png(paste(out, 'freq_boxplot.png', sep = ''), width = out_width, height = out_height, units = out_units, res = out_res)
ggplot(df) +geom_boxplot(aes(x = Group, y = Freqs, color = Group, fill = Group), position = position_dodge(), alpha = 0.5, outlier.color = NA) + geom_point(aes(x = Group, y = Freqs, color = Group), alpha = 0.8, position = position_jitterdodge()) + facet_wrap(~ Cluster, scales = 'free', nrow = 2) + theme_bw() + theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.title.x = element_blank(), strip.text = element_text(size = 12)) + scale_shape_manual(values = 1:5) + ggtitle(label = paste('Subset frequency by', split.meta, sep = ' ')) + theme(legend.key.size = unit(1, 'cm'), legend.title = element_text(size=30), legend.text = element_text(size=20), axis.text=element_text(size=16),axis.title=element_text(size=30)) + labs(y= "Cell Type Frequency")
ggsave(paste(out, 'freq_boxplot.png', sep = ''), width = out_width, height = out_height, units = out_units, dpi = out_res)
dev.off()
write.csv(df, file = paste(out, 'freq_boxplot_data.csv', sep = ''))
return(df)
}
if (graph_type == 'stacked_barplot'){
png(paste(out, 'stacked_barplot.png', sep = ''), width = out_width, height = out_height, units = out_units, res = out_res)
ggplot(df) +geom_bar(aes(x = Group, y = Freqs, fill = Cluster), position = 'stack', stat = 'identity') + scale_fill_manual(values=color_clusters[1:as.numeric(length(unique(df$Cluster)))]) + theme_classic() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
ggsave(paste(out, 'stacked_barplot.png', sep = ''), width = out_width, height = out_height, units = out_units, dpi = out_res)
dev.off()
write.csv(df, file = paste(out, 'stacked_barplot_data.csv', sep = ''))
return(df)
}
}
############################################################################
```
### Wilcox test--freq boxplots
Input: dataframe that is a direct output from the previous function (frequency boxplots), out folder where you want the csv file stored
What it does: It takes the dataframe and looks for every combination of two conditions (found in the Group column). For each unique combination of two conditions, it does a wilcox test for every cluster (from Cluster column) between those conditions. It makes a file that contains cluster name, the pvalue for that cluster, the comparison that produced that p-value for that cluster, and a convenience column that indicates significance/level of significance. The csv file will be saved in the out directory you provide
Output: None. Only saves a file to the directory of your choice.
```{r}
wilcox_by_condition <- function(df = df, out){
set.seed(329)
group1 <- as.character(unique(df$Group))
group2 <- group1
wilcox <- data.frame(Cluster=character(), pval = numeric(), Comparison = character())
for (z in group1){
gp1 <- z
for (y in group2) {
gp2 <- y
if (!gp1 == gp2){
for (x in unique(df$Cluster)){
df2 <- df[df$Cluster == x,]
l <- df2[df2$Group == gp1, 'Freqs']
h <- df2[df2$Group == c(gp2), 'Freqs']
wc <- wilcox.test(l, h)
p <- wc$p.value
row <- c(x, p, paste(gp1, ' vs ', gp2, sep = ''))
wilcox <- rbind(wilcox, row)
}
}
}
}
colnames(wilcox) <- c('Cluster', 'pval', 'Comparison')
wilcox$pval[wilcox$pval == 'NaN'] <- 1000000000000000
wilcox$pval <- as.numeric(wilcox$pval)
wilcox <- wilcox %>% mutate(significance = case_when(
pval < 0.01 ~ "Very significant",
pval < 0.05 ~ "Sigificant",
pval < 0.15 ~ 'Nearly significant',
pval > 0.05 ~ "Not significant"
))
wilcox <- wilcox[!duplicated(wilcox[,c('Cluster','pval')]),]
write.csv(wilcox, file = paste(out, Sys.Date(), 'wilcox_test_results.csv', sep = ''), row.names = F)
}
```
## Volcano plots: plotting comparisons
```{r Volcano plot of two metadata}
seu <- seu
seu_name <- 'hnc_CITESEQ_FB'
meta <- 'orig.ident'
cell1 <- 'D70774'
cell2 <- 'D70792'
out='/Volumes/hqdinh2/Projects/HNC_SPORE/Golfinosetal2022/'
test.use = 'wilcox'
logfc = 0.1
pval_adj = 0.05
fccutoff = 0.1
pcutoff = 0.05
volcPlot <- function(seu, seu_name, meta, cell1, cell2, select_labs=NULL, out='/Volumes/hqdinh2/Projects/HNC_SPORE/Golfinosetal2022/', test.use = 'wilcox', logfc = 0.1, pval_adj = 0.05, fccutoff = 0.1, pcutoff = 0.05) {
set.seed(329)
require(EnhancedVolcano)
require(Seurat)
require(DescTools)
Seurat::Idents(object = seu) <- seu[[meta]]
ids <- unique([email protected])
marks_all <- Seurat::FindMarkers(seu, ident.1 = cell1, ident.2 = cell2, group.by = meta, test.use = test.use, logfc.threshold = logfc, only.pos = FALSE)
marks_all$p_val_adj <- as.numeric(marks_all$p_val_adj)
marks_all <- marks_all[!rownames(marks_all) %like any% c('%^RP%', '%^RPS%', '%^RPL%'),]
xlab <- paste('<-----', cell2, 'genes', ' log2 fold change ', cell1, 'genes', '-----> ', sep = ' ')
nums <- as.data.frame(table([email protected]))
nums$fin <- paste(nums$Var1, nums$Freq, sep = '=')
nums2 <- as.character(nums$fin)
marks_all$avg_log2FC[marks_all$avg_log2FC == c(-Inf)] <- sort(unique(marks_all$avg_log2FC))[2] - 100
marks_all$avg_log2FC[marks_all$avg_log2FC == c(Inf)] <- sort(unique(marks_all$avg_log2FC))[length(marks_all$avg_log2FC) - 1] + 100
marks_all$avg_log2FC <- as.numeric(marks_all$avg_log2FC)
if (is.null(select_labs)){
EnhancedVolcano(marks_all, lab = rownames(marks_all), x = 'avg_log2FC',
y = 'p_val_adj', title = paste(as.character(cell2), 'vs.', as.character(cell1),
sep = ' '), subtitle = paste('DE logfc =', logfc, 'Vln fc-cutoff =', fccutoff,
'Vln p-cutoff =', pcutoff), caption = paste(nums2[1], nums2[2], sep = " "),
FCcutoff = fccutoff, pCutoff = pcutoff, legendLabels = NULL, legendIconSize = 0,
legendDropLevels = TRUE, legendPosition = 'right', xlab = xlab)
}
if (!is.null(select_labs)){
EnhancedVolcano(marks_all, lab = rownames(marks_all), x = 'avg_log2FC',
y = 'p_val_adj', title = paste(as.character(cell2), 'vs.', as.character(cell1),
sep = ' '), subtitle = paste('DE logfc =', logfc, 'Vln fc-cutoff =', fccutoff,
'Vln p-cutoff =', pcutoff), caption = paste(nums2[1], nums2[2], sep = " "),
FCcutoff = fccutoff, pCutoff = pcutoff, legendLabels = NULL, legendIconSize = 0,
legendDropLevels = TRUE, legendPosition = 'right', xlab = xlab, selectLab = select_labs)
}
ggsave(paste(out, seu_name, paste(as.character(ids[1]), 'vs.', as.character(ids[2]), 'logfc', logfc, 'pvaladj', pval_adj, 'volcano_plot.png', sep = '_')), width = 12, height = 12)
write.csv(marks_all, file = paste(out, seu_name, paste(as.character(ids[1]), 'vs.', as.character(ids[2]), 'logfc', logfc, 'pvaladj', pval_adj, 'volcano_plot.csv', sep = '_')))
return(marks_all)
}
```
# ______________________________________
# Deconvolution
## SCDC deconvolution function
seu: Name of your single cell seurat object
markers_sc: Markers for all the single cell data clusters. Should be the output from the DE genes DotPlot function.
metadata: the metadata column in your seurat single cell object that was used to calculate markers_sc
sp: the spatial transcriptomics seurat object from 10X Visium data
out: Absolute or relative path to where you want the SCDC results saved
```{r}
#ck17_5 <- scdc_decon(seu = cillo_kurten, markers_sc = readRDS('/Volumes/hqdinh2/Projects/RawData_FromUWBiotech/HNCVisium_2022-05-05/singlecellmarkers_fordeconvolution.RDS'), metadata = 'mreg_cxcl9_globalcluster4', sp = ck17_5, out = '/Volumes/hqdinh2/Projects/RawData_FromUWBiotech/HNCVisium_2022-05-05/CK17-5_analysis/')
#saveRDS(sp, '/Volumes/hqdinh2/Projects/RawData_FromUWBiotech/HNCVisium_2022-05-05/CK17-5_analysis/cleaned_ck17_5_seurat_2022-07-06.RDS')
scdc_decon <- function(seu, markers_sc, metadata, sp, out){
set.seed(329)
require(Seurat)
require(DescTools)
require(dplyr)
require(BioBase)
require(SCDC)
DefaultAssay(sp) <- 'SCT'
#deconvoluting using our single cell data
hnc_til <- seu[,!seu$tissue %like% c('PBMC', 'Tonsil')]
table(hnc_til[[metadata]])
# Filter for genes that are also present in the ST data
markers_sc <- markers_sc[markers_sc$gene %in% rownames(sp), ]
# Select top 20 genes per cluster, select top by first p-value, then absolute
# diff in pct, then quota of pct.
markers_sc$pct.diff <- markers_sc$pct.1 - markers_sc$pct.2
markers_sc$log.pct.diff <- log2((markers_sc$pct.1 * 99 + 1)/(markers_sc$pct.2 * 99 + 1))
markers_sc %>% dplyr::group_by(cluster) %>% dplyr::top_n(-100, p_val) %>%
dplyr::top_n(50, pct.diff) %>% dplyr::top_n(20, log.pct.diff) -> top20
m_feats <- unique(as.character(top20$gene))
eset_SC <- ExpressionSet(assayData = as.matrix(hnc_til@assays$RNA@counts[m_feats,
]), phenoData = AnnotatedDataFrame([email protected]))
eset_ST <- ExpressionSet(assayData = as.matrix(sp@assays$Spatial@counts[m_feats,
]), phenoData = AnnotatedDataFrame([email protected]))
#running deconvolution
deconvolution <- SCDC::SCDC_prop(bulk.eset = eset_ST, sc.eset = eset_SC, ct.varname = metadata, ct.sub = as.character(unique(eset_SC[[metadata]])))
# now adding deconvolution output and adding it to the Seurat object as a new assay
sp@assays[['SCDC']] <- SeuratObject::CreateAssayObject(data = t(deconvolution$prop.est.mvw))
if (length(sp@assays$SCDC@key) == 0) {
sp@assays$SCDC@key = "scdc_"
}
DefaultAssay(sp) <- 'SCDC'
sp <- Seurat::FindSpatiallyVariableFeatures(sp, assay = "SCDC", selection.method = "markvariogram", features = rownames(sp), r.metric = 5, slot = "data")
top.clusters <- head(SpatiallyVariableFeatures(sp), 4)
SpatialPlot(object = sp, features = top.clusters, ncol = 2)
outf <- paste(out, 'seurat_allsubset_proportions.pdf', sep = '')
pdf(file = outf, width = 15, height = 20)
Seurat::SpatialPlot(sp, feature = rownames(sp@assays$SCDC@data), ncol = 6)
dev.off()
return(sp)
}
```
# __________________________
# Cell-cell communication
## Create CellChat
```{r}
creat_cellchat <- function(seurat) {
set.seed(329)
data.input<-GetAssayData(seurat,assay = "RNA", slot = "data")
#extract cell type labels
labels<-seurat$cluster
meta <- data.frame(group = labels, row.names = names(labels))
cellchat <- createCellChat(object = data.input, meta = meta, group.by = "group")
cellchat@DB <- CellChatDB.human
cellchat <- subsetData(cellchat)
cellchat <- identifyOverExpressedGenes(cellchat,thresh.p = 0.1)
cellchat <- identifyOverExpressedInteractions(cellchat)
cellchat <- computeCommunProb(cellchat)
cellchat <- computeCommunProbPathway(cellchat)
cellchat <- filterCommunication(cellchat, min.cells = 5)
cellchat <- aggregateNet(cellchat)
cellchat
}
```
## cellphoneDB functions
```{r pre-run}
#gets counts for cellphoneDB from your seurat object
cellphonedb_counts <- function(ser, meta){
Idents(ser) <- meta
musGenes <- rownames(ser)
counts <- as.data.frame(as.matrix(ser@assays$RNA[,1:ncol(ser@assays$RNA)]))
rows <- data.frame(rownames(ser))
metadata <- data.frame(Cell = rownames([email protected]),cell_type = Idents(ser))
#cellcols <- colnames(counts)
#cellcols <- cellcols[-1]
#cellrows <- metadata$Cell
#setdiff(cellcols, cellrows)
counts
#write.table(counts, file = paste(out, ser, "/counts.txt", sep = ''), quote = F, col.names = T, row.names = T, sep = "\t")
#write.table(metadata, file = paste(out, ser, "/metadata.txt", sep = ''), quote = F, col.names = T, row.names = F, sep = "\t")
}
#gets metadata for cellphoneDB from your seurat object
cellphonedb_meta <- function(ser, meta){
Idents(ser) <- meta
musGenes <- rownames(ser)
#counts <- as.data.frame(as.matrix(ser@assays$RNA[,1:ncol(ser@assays$RNA)]))
rows <- data.frame(rownames(ser))
metadata <- data.frame(Cell = rownames([email protected]),cell_type = Idents(ser))
#metadata$Cell <- NULL
#cellcols <- colnames(counts)
#cellcols <- cellcols[-1]
#cellrows <- metadata$Cell
#setdiff(cellcols, cellrows)
metadata
#write.table(counts, file = paste(out, ser, "/counts.txt", sep = ''), quote = F, col.names = T, row.names = T, sep = "\t")
#write.table(metadata, file = paste(out, ser, "/metadata.txt", sep = ''), quote = F, col.names = T, row.names = F, sep = "\t")
}
# usage
#both combined
#hnc_til1 <- hnc[,hnc$tissue == 'TIL']
#hnc_til1 <- hnc_til1[,hnc_til1$global.cluster4 %in% c('CD4', 'CD8', 'Treg', 'cDC2_CD1C', 'DC3_LAMP3', 'cDC2_CD33', 'cDC1_CLEC9A')]
#hnc_til1$global.cluster5 <- Idents(hnc_til1)
#counts <- cellphonedb_counts(hnc_til1, 'global.cluster5')
#meta <- cellphonedb_meta(hnc_til1, 'global.cluster5')
#write.table(counts, file = paste('/Volumes/hdlab/Projects/HNC_SPORE/CellphoneDB/05042022_HNC_HPV+vsHPV-toTsubsets/', 'cillo_til_counts.txt', sep = ''), quote = F, col.names = T, row.names = T, sep = "\t")
#write.table(meta, file = paste('/Volumes/hdlab/Projects/HNC_SPORE/CellphoneDB/05042022_HNC_HPV+vsHPV-toTsubsets/', "/cillo_til_metadata.txt", sep = ''), quote = F, col.names = T, row.names = F, sep = "\t")
```
```{r cpdb summary (does not use receptor a/b annotation)}
cpdb_summary <- function(path = "", senders = "all", receivers = "all", drop = "none"){
outs <- list()
expected_files <- c("significant_means.txt", "pvalues.txt")
if(sum(expected_files %in% list.files(path)) !=2){
message(paste("missing file(s):", expected_files[!expected_files %in% list.files(path)]))
break
}
# file read in (sm for significant means, pv for pvalues)
sm <- read.table(paste(path, "/", expected_files[1], sep = ""), check.names = FALSE, header = TRUE, sep = "\t")
pv <- read.table(paste(path, "/", expected_files[2], sep = ""), check.names = FALSE, header = TRUE, sep = "\t")