diff --git "a/docs/2024-05/scRNA_seq\346\225\260\346\215\256\345\210\206\346\236\220_seurat_v5___\345\270\270\347\224\250\345\221\275\344\273\244.md" "b/docs/2024-05/scRNA_seq\346\225\260\346\215\256\345\210\206\346\236\220_seurat_v5___\345\270\270\347\224\250\345\221\275\344\273\244.md" deleted file mode 100644 index f0801fa7..00000000 --- "a/docs/2024-05/scRNA_seq\346\225\260\346\215\256\345\210\206\346\236\220_seurat_v5___\345\270\270\347\224\250\345\221\275\344\273\244.md" +++ /dev/null @@ -1,15 +0,0 @@ ---- -title: "scRNA-seq数据分析(seurat v5)——常用命令" -date: 2024-05-16T01:07:17Z -draft: ["false"] -tags: [ - "fetched", - "被炸熟的虾" -] -categories: ["Acdemic"] ---- -scRNA-seq数据分析(seurat v5)——常用命令 by 被炸熟的虾 ------- -
哈喽小伙伴们,seurat官网整理了一些常用命令,方便我们调用。一起来看看吧!
Standard Seurat workflow(Seurat标准工作流程)
Seurat Object Data Access(Seurat对象数据访问)
Subsetting and merging(子集和合并)
Pseudobulk analysis(Pseudobulk分析)
Visualization in Seurat(Seurat的可视化)
Multi-Assay Features(多检测特性)
Additional resources(其他资源)
pbmc <- NormalizeData(object = pbmc)
pbmc <- FindVariableFeatures(object = pbmc)
pbmc <- ScaleData(object = pbmc)
pbmc <- RunPCA(object = pbmc)
pbmc <- FindNeighbors(object = pbmc, dims = 1:30)
pbmc <- FindClusters(object = pbmc)
pbmc <- RunUMAP(object = pbmc, dims = 1:30)
DimPlot(object = pbmc, reduction = "umap")
SCtransform 版本
pbmc <- SCTransform(object = pbmc)
pbmc <- RunPCA(object = pbmc)
pbmc <- FindNeighbors(object = pbmc, dims = 1:30)
pbmc <- FindClusters(object = pbmc)
pbmc <- RunUMAP(object = pbmc, dims = 1:30)
DimPlot(object = pbmc, reduction = "umap")
#可以使用%>%将多个命令链接在一起
pbmc <- SCTransform(pbmc) %>%
RunPCA() %>%
FindNeighbors(dims = 1:30) %>%
FindClusters() %>%
RunUMAP(dims = 1:30)
1. 细胞、基因和layer名称
#获取细胞和基因名以及总数的多种方法
colnames(pbmc)
Cells(pbmc)
#基因名称
Features(pbmc)
rownames(pbmc)
#细胞/基因的数量
num_cells <- ncol(pbmc)
num_features <- nrow(pbmc)
#对象层列表
Layers(pbmc)
#使用多模态对象列表分析
Assays(cbmc)
#检测特异性特征(基因/ADT)
Features(cbmc[["RNA"]])
Features(cbmc[["ADT"]])
#可变基因名称
VariableFeatures(pbmc)
#设置可变基因
VariableFeatures(cbmc) <- var.gene.names
#设置一个特定的assay
VariableFeatures(cbmc[["ADT"]]) <- var.gene.names
2. 确定分类标签
#设置和检索细胞标识
#将标识类设置为metadata中的现有列
Idents(object = pbmc) <- "seurat_annotations"
#查看细胞标识,获取汇总表
Idents(pbmc)
table(Idents(pbmc))
#将所有细胞的标识设置为CD4 T细胞
Idents(pbmc) <- "CD4 T cells"
#为选定的细胞组设置
pbmc.cells <- Cells(pbmc)
Idents(object = pbmc, cells = pbmc.cells[1:10]) <- "CD4 T cells"
#获取细胞标识类
Idents(object = pbmc)
levels(x = pbmc)
#将细胞标识类存储在metadata中
pbmc[["old.ident"]] <- Idents(object = pbmc)
pbmc <- StashIdent(object = pbmc, save.name = "old.ident")
#重命名标识类
pbmc <- RenameIdents(object = pbmc, `CD4 T cells` = "T Helper cells")
3. 细胞metadata
#查看存储在object@meta.data中的meta.data数据框
pbmc[[]]
#从metadata中检索特定的值
pbmc$nCount_RNA
pbmc[[c("percent.mito", "nFeature_RNA")]]
#添加metadata,参见?AddMetaData
random_group_labels <- sample(x = c("g1", "g2"), size = ncol(x = pbmc), replace = TRUE)
pbmc$groups <- random_group_labels
4. 表达式数据(在 Seurat v5 中作为layers存储)
#检索表达矩阵RNA计数矩阵中的数据
pbmc[["RNA"]]$counts
#具有相同结果的备用函数
LayerData(pbmc, assay = "RNA", layer = "counts")
#仍然支持来自Seurat v4的GetAssayData
GetAssayData(object = pbmc, assay = "RNA", slot = "counts")
#ADT计数矩阵(多模态对象)
cbmc[["ADT"]]$counts
#设置表达式数据假设为new.data 是一种新的表达式矩阵
pbmc[["RNA"]]$counts <- new.data
#具有相同结果的备用setter函数
LayerData(pbmc, assay = "RNA", layer = "counts") <- new.data
#仍然支持来自Seurat v4的SetAssayData
pbmc <- SetAssayData(object = pbmc, slot = "counts", new.data = new.data
5. 降维
#获取存储在pbmc上的细胞嵌入和基因加载[['pca']]@cell.embeddings
Embeddings(pbmc, reduction = "pca")
#存储在pbmc[['pca]]@feature.loadings中
Loadings(pbmc, reduction = "pca")
#创建自定义尺寸缩减加载矩阵是可选的
new_reduction <- CreateDimReducObject(embeddings = new.embeddings, loadings = new.loadings, key = "custom_pca")
pbmc[["custom_pca"]] <- new_reduction
6. FetchData (获取数据)
#FetchData可以访问表达式矩阵、细胞嵌入或metadata中的任何内容
#使用FetchData访问单个/小组变量
FetchData(object = pbmc, vars = c("PC_1", "nFeature_RNA", "MS4A1"), layer = "counts")
三、子集和合并
1. Seurat对象的子集
#基于标识类的子集Seurat对象,参见?SubsetData
subset(x = pbmc, idents = "B")
subset(x = pbmc, idents = c("Naive CD4 T", "CD8 T"), invert = TRUE)
#基因/特征表达水平上的子集
subset(x = pbmc, subset = MS4A1 > 2.5)
#标准的组合上的子集
subset(x = pbmc, subset = MS4A1 > 2.5 & PC_1 > 5)
subset(x = pbmc, subset = MS4A1 > 2.5, idents = "B")
#对象metadata中的子集
subset(x = pbmc, subset = groups == "g1")
#下采样每个标识类的细胞数量
subset(x = pbmc, downsample = 100)
2. 分层
#在Seurat v5中,用户现在可以将对象直接拆分为不同的层,将表达式数据保留在一个对象中,但是
#将多个样本拆分成层后可以直接进入整合工作流程
ifnb[["RNA"]] <- split(ifnb[["RNA"]], f = ifnb$stim)
Layers(ifnb)
#如果需要,例如在整合之后,这些层可以再次连接在一起
ifnb <- JoinLayers(ifnb)
3. 拆分对象
#可以根据metadata将对象拆分为多个对象的列表
#创建两个对象的列表
ifnb_list <- SplitObject(ifnb, split.by = "stim")
ifnb_list$CTRL
ifnb_list$STIM
4. 合并对象(没有integration)
integration
。如果不继续integration
,请在合并后重新加入layers
。#合并两个Seurat对象
merged_obj <- merge(x = ifnb_list$CTRL, y = ifnb_list$STIM)
merged_obj[["RNA"]] <- JoinLayers(merged_obj)
#合并两个以上的Seurat对象
merge(x = pbmc1, y = list(pbmc2, pbmc3))
5. 合并对象(有integration)
https://satijalab.org/seurat/articles/integration_introduction
merged_obj <- merge(x = ifnb_list$CTRL, y = ifnb_list$STIM)
merged_obj <- NormalizeData(merged_obj)
merged_obj <- FindVariableFeatures(merged_obj)
merged_obj <- ScaleData(merged_obj)
merged_obj <- RunPCA(merged_obj)
merged_obj <- IntegrateLayers(object = obj, method = RPCAIntegration, orig.reduction = "pca", new.reduction = "integrated.rpca",
verbose = FALSE)
#现在整合已经完成,重新加入layers
merged_obj[["RNA"]] <- JoinLayers(merged_obj)
为了从高通量单细胞mRNA数据中增加基因数量和基因表达相关性,我们从同一细胞群中的多个细胞中收集数据,制作假细胞(Pseudocell)用于网络解释。看来这个Pseudocell概念是为了弥补稀疏矩阵在计算相关性上的缺陷,毕竟零值太多,影响相关性的计算。
基于多个类别将细胞分组在一起
有关如何将列添加到metadata
的信息,请参阅https://satijalab.org/seurat/articles/de_vignette
。donor_id
#pseudobulk细胞仅按细胞类型区分
bulk <- AggregateExpression(ifnb, group.by = "seurat_annotations", return.seurat = TRUE)
Cells(bulk)
#pseudobulk细胞受刺激条件和细胞类型的影响
bulk <- AggregateExpression(ifnb, group.by = c("stim", "seurat_annotations"), return.seurat = TRUE)
Cells(bulk)
#pseudobulk细胞与刺激条件、细胞类型和供体有关
bulk <- AggregateExpression(ifnb, group.by = c("stim", "seurat_annotations", "donor_id"), return.seurat = TRUE)
Cells(bulk)
Seurat 有一个庞大的、基于 ggplot2 的绘图库。默认情况下,所有绘图函数都将返回一个 ggplot2 绘图,从而允许使用 ggplot2 轻松自定义。
#降维图
DimPlot(object = pbmc, reduction = "pca")
#维度缩减图 细胞由定量特征着色 如果可用,默认为UMAP
FeaturePlot(object = pbmc, features = "MS4A1")
#单个细胞的散点图
FeatureScatter(object = pbmc, feature1 = "MS4A1", feature2 = "PC_1")
FeatureScatter(object = pbmc, feature1 = "MS4A1", feature2 = "CD3D")
#散点图跨越各个特征 取代CellPlot
CellScatter(object = pbmc, cell1 = "AGTCTACTAGGGTG", cell2 = "CACAGATGGTTTCT")
VariableFeaturePlot(object = pbmc)
#小提琴和山脊图
VlnPlot(object = pbmc, features = c("LYZ", "CCL5", "IL32"))
RidgePlot(object = pbmc, feature = c("LYZ", "CCL5", "IL32"))
#热图(可视化scale.data)
DimHeatmap(object = pbmc, reduction = "pca", cells = 200)
#标准工作流程
pbmc <- ScaleData(pbmc, features = heatmap_markers)
DoHeatmap(object = pbmc, features = heatmap_markers)
#sctransform工作流程
pbmc <- GetResidual(pbmc, features = heatmap_markers)
DoHeatmap(object = pbmc, features = heatmap_markers)
#热图 每组最多100个细胞
DoHeatmap(pbmc, heatmap_markers, cells = subset(pbmc, downsample = 100)
#尝试新事物!注意,绘图函数现在返回ggplot2对象
#因此可以向其添加主题、标题和选项
VlnPlot(object = pbmc, features = "MS4A1", split.by = "groups")
DotPlot(object = pbmc, features = c("LYZ", "CCL5", "IL32"), split.by = "groups")
FeaturePlot(object = pbmc, features = c("MS4A1", "CD79A"), blend = TRUE)
DimPlot(object = pbmc) + DarkTheme()
DimPlot(object = pbmc) + labs(title = "2,700 PBMCs clustered using Seurat and viewed\non a two-dimensional UMAP")
Seurat 提供了许多预构建的主题,可以添加到 ggplot2 绘图中以便快速自定义。
主题 | 功能 |
---|---|
DarkTheme | 设置带有白色文本的黑色背景 |
FontSize | 设置绘图中各种元素的字体大小 |
NoAxes | 删除轴和轴文本 |
NoLegend | 删除所有图例元素 |
RestoreLegend | 删除后恢复图例 |
RotatedAxis | 旋转 x 轴标签 |
#绘图辅助函数用于基于ggplot2的散点图
#如DimPlot, FeaturePlot, CellScatter和FeatureScatter
plot <- DimPlot(object = pbmc) + NoLegend()
#HoverLocator取代了以前的`do.hover`
#它还可以通过' information '参数显示额外的数据 旨在与FetchData一起顺利工作
HoverLocator(plot = plot, information = FetchData(object = pbmc, vars = c("ident", "PC_1", "nFeature_RNA")))
#FeatureLocator取代了之前的`do.identify`
select.cells <- FeatureLocator(plot = plot)
#在ggplot对象上标记点
LabelPoints(plot = plot, points = TopCells(object = pbmc[["pca"]]), repel = TRUE)
使用Seurat,您可以轻松地在单细胞水平的不同检测方法之间切换(例如来自CITE-seq的ADT计数,或整合/批量校正的数据)。大多数函数现在都采用检测参数,但您可以设置默认检测以避免重复语句。
cbmc <- CreateSeuratObject(counts = cbmc.rna)
#添加ADT数据
cbmc[["ADT"]] <- CreateAssayObject(counts = cbmc.adt)
#通过指定要使用的assay来运行分析
NormalizeData(object = cbmc, assay = "RNA")
NormalizeData(object = cbmc, assay = "ADT", method = "CLR")
#检索并设置默认assay
DefaultAssay(object = cbmc)
DefaultAssay(object = cbmc) <- "ADT"
DefaultAssay(object = cbmc)
#使用keys从两个assays中提取特征表达式
FetchData(object = cbmc, vars = c("rna_CD3E", "adt_CD3"))
#使用keys绘制来自多个assays的数据
FeatureScatter(object = cbmc, feature1 = "rna_CD3E", feature2 = "adt_CD3")
对 Seurat v5 中数据存储的一些技术更改特别感兴趣的用户可以浏览以下资源: