From da9d735a9ecc60c8224fa856bdfbe20c485b9a93 Mon Sep 17 00:00:00 2001 From: ixxmu Date: Sat, 9 Sep 2023 15:32:20 +0000 Subject: [PATCH] =?UTF-8?q?=E8=B7=9F=E7=9D=80Nature=20Communications?= =?UTF-8?q?=E5=AD=A6=E4=BD=9C=E5=9B=BE=EF=BC=9AR=E8=AF=AD=E8=A8=80ggplot2?= =?UTF-8?q?=E7=AE=B1=E7=BA=BF=E5=9B=BE/=E6=8A=96=E5=8A=A8=E6=95=A3?= =?UTF-8?q?=E7=82=B9=E5=9B=BE=E5=B1=95=E7=A4=BA=E5=A4=9A=E7=89=A9=E7=A7=8D?= =?UTF-8?q?=E5=9F=BA=E5=9B=A0=E7=BB=84=E5=A4=A7=E5=B0=8F=E5=92=8CTE?= =?UTF-8?q?=E5=90=AB=E9=87=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...\217\345\222\214TE\345\220\253\351\207\217.md" | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 "docs/2023-09/\350\267\237\347\235\200Nature_Communications\345\255\246\344\275\234\345\233\276_R\350\257\255\350\250\200ggplot2\347\256\261\347\272\277\345\233\276_\346\212\226\345\212\250\346\225\243\347\202\271\345\233\276\345\261\225\347\244\272\345\244\232\347\211\251\347\247\215\345\237\272\345\233\240\347\273\204\345\244\247\345\260\217\345\222\214TE\345\220\253\351\207\217.md" diff --git "a/docs/2023-09/\350\267\237\347\235\200Nature_Communications\345\255\246\344\275\234\345\233\276_R\350\257\255\350\250\200ggplot2\347\256\261\347\272\277\345\233\276_\346\212\226\345\212\250\346\225\243\347\202\271\345\233\276\345\261\225\347\244\272\345\244\232\347\211\251\347\247\215\345\237\272\345\233\240\347\273\204\345\244\247\345\260\217\345\222\214TE\345\220\253\351\207\217.md" "b/docs/2023-09/\350\267\237\347\235\200Nature_Communications\345\255\246\344\275\234\345\233\276_R\350\257\255\350\250\200ggplot2\347\256\261\347\272\277\345\233\276_\346\212\226\345\212\250\346\225\243\347\202\271\345\233\276\345\261\225\347\244\272\345\244\232\347\211\251\347\247\215\345\237\272\345\233\240\347\273\204\345\244\247\345\260\217\345\222\214TE\345\220\253\351\207\217.md" new file mode 100644 index 00000000..6cb8755b --- /dev/null +++ "b/docs/2023-09/\350\267\237\347\235\200Nature_Communications\345\255\246\344\275\234\345\233\276_R\350\257\255\350\250\200ggplot2\347\256\261\347\272\277\345\233\276_\346\212\226\345\212\250\346\225\243\347\202\271\345\233\276\345\261\225\347\244\272\345\244\232\347\211\251\347\247\215\345\237\272\345\233\240\347\273\204\345\244\247\345\260\217\345\222\214TE\345\220\253\351\207\217.md" @@ -0,0 +1,15 @@ +--- +title: "跟着Nature Communications学作图:R语言ggplot2箱线图/抖动散点图展示多物种基因组大小和TE含量" +date: 2023-09-09T15:31:24Z +draft: ["false"] +tags: [ + "fetched", + "小明的数据分析笔记本" +] +categories: ["Acdemic"] +--- +跟着Nature Communications学作图:R语言ggplot2箱线图/抖动散点图展示多物种基因组大小和TE含量 by 小明的数据分析笔记本 +------ +

论文

Large-scale genome sequencing of mycorrhizal fungi provides insights into the early evolution of symbiotic traits

https://www.nature.com/articles/s41467-020-18795-w

s41467-020-18795-w.pdf

这个是是有读者在公众号后台留言问到

我把论文找来看了一下,论文对应的图大部分都有数据,我们可以试着复现其中的图,先从最简单的的开始,论文中的Figure2是箱线图加抖动散点图,论文的配色也很好看,可以保留作为自己配色备选

部分示例数据截图

image.png

首先是读取数据

library(tidyverse)

dat<-read_delim("data/20230909/Source Data/Source_Data_figure_1a.csv",
           delim = ",")

colnames(dat)


dat %>% 
  pull(Ecology) %>% 
  table()

左侧的图展示基因组大小,代码如下

ggplot(data=dat %>% 
         filter(Ecology!="Yeast"&Ecology!="Parasite") %>% 
       mutate(Ecology=factor(Ecology,levels = c("Wood decayer",
                                                "Endophyte",
                                                "Arbuscular mycorrhizae",
                                                "Orchid mycorrhizae",
                                                "Ericoid mycorrhizae",
                                                "Pathogen",
                                                "Saprotroph",
                                                "Ectomycorrhizae"))),
       aes(x=Genome.size,y=Ecology))+
  geom_boxplot(color="gray")+
  geom_jitter(aes(color=Ecology),
              size=5,
              show.legend = FALSE,
              alpha=0.5)+
  scale_color_manual(values = c("#f1a2c9","#b6b3b3","#a8e3ea",
                                "#fde05f","#f49b40",
                                "#7ac84e","#73a1cb","#e15e53"))+
  scale_x_continuous(limits = c(0,150000000),
                     labels = function(x){x/1000000})+
  theme_bw()+
  theme(panel.border = element_blank(),
        axis.ticks = element_blank())+
  labs(x=NULL,y=NULL,title = "Genomes (Mbp)")
image.png

右侧的图代码基本一样

ggplot(data=dat %>% 
         filter(Ecology!="Yeast"&Ecology!="Parasite") %>% 
         mutate(Ecology=factor(Ecology,levels = c("Wood decayer",
                                                  "Endophyte",
                                                  "Arbuscular mycorrhizae",
                                                  "Orchid mycorrhizae",
                                                  "Ericoid mycorrhizae",
                                                  "Pathogen",
                                                  "Saprotroph",
                                                  "Ectomycorrhizae"))),
       aes(x=TE.CoverageTotal,y=Ecology))+
  geom_boxplot(color="gray")+
  geom_jitter(aes(color=Ecology),
              size=5,
              show.legend = FALSE,
              alpha=0.5)+
  scale_color_manual(values = c("#f1a2c9","#b6b3b3","#a8e3ea",
                                "#fde05f","#f49b40",
                                "#7ac84e","#73a1cb","#e15e53"))+
  scale_x_continuous(limits = c(0,100))+
  theme_bw()+
  theme(panel.border = element_blank(),
        axis.ticks = element_blank(),
        axis.text.y = element_blank())+
  labs(x=NULL,y=NULL,title = "Repeat element coverage (%)")
image.png

最后是拼图

library(patchwork)

p1+p2
image.png

示例数据可以到论文中下载,代码可以在推文中复制,或者给推文打赏一元获取我整理好的数据和代码

欢迎大家关注我的公众号

小明的数据分析笔记本

小明的数据分析笔记本 公众号 主要分享:1、R语言和python做数据分析和数据可视化的简单小例子;2、园艺植物相关转录组学、基因组学、群体遗传学文献阅读笔记;3、生物信息学入门学习资料及自己的学习笔记!


+
+原文链接