analysis.Rmd

---
title: "Revised Analysis of phenological shifts in Germany"
output:
  html_notebook: default
  pdf_document: default
---


```{r setup, include=FALSE}
knitr::opts_chunk$set(fig.align = "center", echo = FALSE)

# disable dplyr::summarise() grouping message
# also disable stringsasfactors
options(dplyr.summarise.inform = FALSE,
        stringsAsFactors = FALSE)

library("data.table")
library("rgbif")
library("raster")
library("broom.mixed")
library("lme4")
library("cowplot")
library("magick")

# tidyverse stuff last to avoid masking these functions
library("magrittr")
library("forcats")
library("stringr")
library("ggplot2")
library("tidyr")
library("dplyr")
library("tidyselect")

## Functions ----------------------------------------------

source("scripts/functions.R")

# Control variables -------------------------------------------------

## Script control -----------------------------------------

# check if notebook is rendered from script and whether script control is 
#   overridden
if (exists("override_control")) {
  
  use_notebook_control <- override_control
  
} else {
  use_notebook_control <- TRUE
}

if (use_notebook_control) {
  
  # name of analysis
  # as a prerequisite, scripts have to be available under scripts/analysis_dname/
  analysis_dname <- "full_analysis_data/"
  
  # do an overall test run? 
  # this will mean working with a dataset with reduce number of species
  # so everything will be faster
  test_run <- FALSE
  
  
  ## Data getting:
  
  # run script for retrieving climate data from DWD regardless of data being 
  # already present?
  force.clim.get      <- FALSE
  
  # run script for retrieving plant trait data from bioFlor regardless of data 
  # being already present?
  force.traits.get    <- FALSE
  
  # force running of occurrence getting script
  force.occ.get       <- FALSE
  
  # control which of the downloaded files to delete
  #   - "all": delete both .zip files and extracted occ .txt files
  #   - "zip": delete only .zip files, keep .txt
  #   - "txt": delete only .txt files, keep .zip
  #   - "non": keep all files in download folder
  delete.occ.download <- "all"
  
  # force pruning of data and addition of climate and elevation
  force.occ.prune     <- FALSE
  
  
  ## Modeling
  
  # force running of model generation script
  force.models           <- FALSE
  
  # run model script independently? 
  # * models will be taken from the testing model formula file
  # * no checks will be performed whether the models are appropriate for the main
  #   analysis
  # * The results will not be integrated into the analysis
  run.models.ind         <- test_run
  
  # how to treat predictor variables
  # not currently used
  center_preds           <- FALSE
  scale_preds            <- FALSE
  
  # save plot of random regression slopes over data?
  # will add additional running time, increasing with # of data points
  plot_rnd_slopes        <- FALSE
  
  # save diagnostics plots
  # will take a very long time on the full dataset
  plot_diagnostics       <- FALSE
  
  #save faceted plots of diagnostic plots 
  plot_diagnostics_facet <- FALSE
  
  # set number of times a model with failed convergence will attempt to restart
  n_restart               <- 2
  
  
  ## Plot saving
  
  # save plots to disk?
  save_plots  <- TRUE
  
  # save tables (as .docx)?
  save_tables <- TRUE
  
}

## Path names ---------------------------------------------

# get paths for various analysis specific dirs
data_dir   <- paste0("data/",    analysis_dname)
script_dir <- paste0("scripts/", analysis_dname)

if (!test_run) {
  dat_occ_file        <- paste0(data_dir, "occurrences_full_pruned_clim_elev.csv")
  dat_slope_temp_file <- paste0(data_dir, "rnd_eff_temp.csv")
  dat_slope_year_file <- paste0(data_dir, "rnd_eff_year.csv")
} else {
  dat_occ_file        <- paste0(data_dir, "occurrences_full_pruned_clim_elev_test.csv")
  dat_slope_temp_file <- paste0(data_dir, "rnd_eff_temp_test.csv")
  dat_slope_year_file <- paste0(data_dir, "rnd_eff_year_test.csv")
}


### Model formula stuff 

# set model formula paths
mod_form_base <- "static_data/model_formulas/"
mod_form_dir  <- paste0(mod_form_base, analysis_dname)

# check if analysis specific formulas exist
if (!dir.exists(mod_form_dir)) {
  mod_form_dir <- paste0(paste0(mod_form_base, "default/"))
}

# set appropriate path
if (!test_run && !run.models.ind) {
  mod_form_file <- paste0(mod_form_dir, "model_formulas.txt")
} else {
  mod_form_file <- paste0(mod_form_dir, "model_formulas_test.txt")
}

## Set graphics parameters --------------------------------

# set colour for raw data points
col.pt         <- "black"

# set alpha for raw data points
alpha.pt       <- 0.5

# set alpha for raw data lines
alpha.ln       <- 0.2

# set size of data lines
size.dat.line    <- 2

# set colour for static lines
col.stc.line       <- "gray40"

# set colour for axis elements
col.ax         <- "gray40"

# ribbon alpha
alpha.ribbon   <- 0.3

# set col for annotations
col.annot      <- "gray31"

# size of annotations
size.annot.txt <- 3

# colors for density gradients
col.grad.low   <- "gray"
col.grad.high  <- "black"

## define custom plotting theme ---------------------------

theme_shifts <- function (...) {
  
  theme_minimal() %+replace%
    theme(panel.grid = element_blank(), ...)
  
}

## define clearer group names -----------------------------

# set named vector for recoding group names to common names
recode.vec <- c(
  "Plants"      = "Plants",
  "Coleoptera"  = "Beetles",
  "Diptera"     = "Flies",
  "Hymenoptera" = "Bees",
  "Lepidoptera" = "Butterflies/\nMoths"
)

# make alternative vector for interactions
recode.vec.int <- c("Hoverfly"  = "Hoverfly - Plant",
                    "Bee"       = "Bee - Plant",
                    "Butterfly" = "Butterfly - Plant")

# recode vec for PollDep
recode.vec.PollDep <- c("Yes"          = "Pollinator dependent",
                        "Intermediate" = "Intermediate",
                        "No"           = "Pollinator independent"
                        )

### set colour tables -------------------------------------------------------

# define colours
# DO NOT CHANGE ORDER, only append
col.grp <-
  data.frame(group  = c("Beetles",
                        "Flies",
                        "Bees",
                        "Butterflies/\nMoths",
                        "Insects overall",
                        "Plants",
                        "Hoverfly - Plant",
                        "Bee - Plant",
                        "Butterfly - Plant",
                        "Overall",
                        "Pollinator dependent",
                        "Intermediate",
                        "Pollinator independent"),
             
             colour = c("#9815db",
                        "#f41d0f",
                        "#ffa500",
                        "#4744ff",
                        "gold",
                        "#008a00",
                        "#f41d0f",
                        "#ffa500",
                        "#4744ff",
                        "deepskyblue",
                        "red",
                        "#ffa500",
                        "#008a00")
  )

# alternative: named vector
col.grp.vec        <- col.grp$colour
names(col.grp.vec) <- col.grp$group

#set colors for Plant-Pollinator comparisons
col.plapoll        <- col.grp[5:6,]

# static polls group (no exclusion)
col.poll.stc       <- col.grp[1:4,]

#only for polls
col.poll           <- col.grp[1:4,] 

#only for plants
col.plant          <- col.grp[6,]

#set colors for group comparisons (add plant group at the end)
col.group.stc      <- bind_rows(col.poll.stc, col.plant)
col.group          <- bind_rows(col.poll, col.plant)

#set colors for group comparisons including overall
col.group2.stc     <- bind_rows(col.poll.stc, col.plapoll)
col.group2         <- bind_rows(col.poll, col.plapoll)

#set colors for PollDep comparisons
col.PollDep        <- col.grp[11:13,]

# interaction colors with standard names
col.int.stc        <- col.grp[7:9,]
col.int.stc$group  <- c("Hoverfly", "Bee", "Butterfly")

#set colors for Interaction comparisons
col.int            <- col.grp[7:9,]

#set colors for Interaction comparison with overall group
col.int2           <- col.grp[7:10,]

#set colors for Interaction group comparisons
col.int3           <- bind_rows(col.int, col.plant)

# colours for id.grps with scientific scheme
# TODO: Apply colour scheme throughout notebook
col.group.sci <- col.grp$colour[c(1:4, 6)]
names(col.group.sci) <- c("Coleoptera",
                          "Diptera",
                          "Hymenoptera",
                          "Lepidoptera",
                          "Plants")

```


```{r Ensure additional data is present}

# Plant trait data ----------------------------------------

if (file.exists('static_data/bioflor_traits.csv')) {
  run.traits.get <- FALSE
} else {
  run.traits.get <- TRUE
}

if (run.traits.get | force.traits.get) {
  source('scripts/get_bioflor_traits.R')
}

if (!file.exists(paste0("static_data/plant_pollinator_interactions_for_",
                        "potential_networks_2018.csv"))) {
  stop(paste0("Interaction data not found, please download and place the",
              "file in the static_data directory.\nDownload from ",
              "'https://doi.org/10.5285/6d8d5cb5-bd54-4da7-903a-15bd4bbd531b'."))
}

```


```{r get_occurrence_data, include=FALSE}

# for now, only check whether data and random effects are present
if (!all(file.exists(dat_occ_file)) || 
    force.occ.get                   ||
    force.occ.prune) {
  
  
  if (!test_run) {
    
    source("scripts/get_occ_data_all_in_one.R")
    
  } else {
    
    # this script automatically ensures full data is present
    source("scripts/get_occ_test_data.R")
    
  }
}

if (!all(file.exists(dat_slope_temp_file, dat_slope_year_file)) ||
    force.models                                                ||
    run.models.ind) {
  source("scripts/run_models.R")
}

```


```{r load_occurrence_slope_data}

# load plant trait data
plant_traits <- fread("static_data/bioflor_traits.csv",
                      na.strings = c("", "NA")) %>% 
  
  # recode PollDep to use clearer names 
  mutate(PollDep = recode_factor(PollDep, !!! recode.vec.PollDep))


# load occurrence data
dat.occ <- fread(dat_occ_file, showProgress = FALSE) %>% 
  
  # add trait data
  left_join(plant_traits, by = "species") %>%
  
  # recode id.grp to use trivial names 
  # (recode only takes name/value pairs, so !!! expansion necessary)
  mutate(id.grp = recode_factor(id.grp, !!! recode.vec)) %>% 

  # order factor levels so that PollDep is in order
  #  of increasing slope
  mutate(PollDep = fct_relevel(PollDep, recode.vec.PollDep))


# load slope data
slopes_temp <- fread(dat_slope_temp_file)
slopes_year <- fread(dat_slope_year_file)


# re-scale data
if (scale_preds) {
  sd_temp   <- sd(dat.occ$temp)
  
  slopes_temp <- slopes_temp %>% 
    mutate(slope         = slope         / sd_temp,
           slope_std_err = slope_std_err / sd_temp)
  
  
  sd_year   <- sd(dat.occ$year)
  
  slopes_year <- slopes_year %>% 
    mutate(slope         = slope         / sd_year,
           slope_std_err = slope_std_err / sd_year)
}

if (center_preds) {
  scale_mean_temp <- mean(dat.occ$temp / sd(dat.occ$temp))
  
  slopes_temp <- slopes_temp %>% 
    mutate(intercept = intercept - slope * scale_mean_temp)
  
  
  scale_mean_year <- mean(dat.occ$year / sd(dat.occ$temp))
  
  slopes_year <- slopes_year %>% 
    mutate(intercept = intercept - slope * scale_mean_year)
}

# add taxonomic and trait data to slopes
col_vec <- c("kingdom",
             "phylum",
             "id.grp",
             "order",
             "family",
             "genus",
             "species")

tax_df <- dat.occ %>%  
  select(all_of(col_vec)) %>% 
  distinct()


# load data on species sample sizes
species_n <- fread(paste0(data_dir, "n_species_pruned_sum.csv")) %>%
  
  # recode id.grp to use trivial names 
  # (recode only takes name/value pairs, so !!! expansion necessary)
  mutate(id.grp = recode_factor(id.grp, !!! recode.vec))
  

slopes_temp <- slopes_temp %>% 
  
  # add taxonomic data
  left_join(tax_df, by = "species") %>% 
  
  # add trait data
  left_join(plant_traits, by = "species") %>%
  
  # add species sample sizes
  left_join(species_n, by = c("id.grp", "species"))


slopes_year <- slopes_year %>%  
  
  # add taxonomic data
  left_join(tax_df, by = "species") %>% 
  
  # scale slopes and std_err from days per year to days per decade
  mutate(slope         = slope         * 10,
         slope_std_err = slope_std_err * 10) %>% 
  
  # add trait data
  left_join(plant_traits, by = "species") %>%
  
  # add species sample sizes
  left_join(species_n, by = c("id.grp", "species"))


# generate df giving slopes for both vars
slopes_all <- left_join(slopes_temp,
                        slopes_year,
                        by = c("kingdom",
                               "phylum",
                               "id.grp",
                               "order",
                               "family",
                               "genus",
                               "n",
                               names(plant_traits)),
                        suffix = c("_temp", "_year")) %>% 
  
  # drop redundant cols
  select(-starts_with(c("group_",
                        "main_var_")))

```


```{r show_record_sizes_by_group}

dat_nrec <- fread(paste0(data_dir, "n_records_by_idgrp_pruned.csv"),
      showProgress = FALSE) %>%
  
  # recode id.grp to use trivial names
  #   (recode only takes name/value pairs, so !!! expansion necessary)
  mutate(id.grp = recode_factor(id.grp, !!! recode.vec)) %>% 
  
  rename(Group = id.grp, `n records` = n_rec, `n species` = n_species,
         `Min records per species` = min_species_rec,
         `Max records per species` = max_species_rec,
         `Median records per species` = median_species_rec)
  
dat_nrec

```


```{r plot_record_distributions}

record_dist_plot <- ggplot(dat.occ,
                           aes(year, col = id.grp)) +
  
  geom_bar(size = 5, position = "dodge", width = 0.8, orientation = "x") +
  
  labs(x = "Year",
       y = "# Records") +
  
  facet_wrap( ~ id.grp,
              nrow = 2,
              scales = "free_y") +
  
  scale_y_log10() +
  
  scale_color_manual(name = "Group",
                     aesthetics = c("color", "fill"),
                     values = col.grp.vec) +
  
  theme_shifts(legend.position = "none")

record_dist_plot

```


```{r analyze_time_temp_association}

dat.temp <- dat.occ %>% 
  select(year, temp, lat, long) %>% 
  
  # reconstruct climate tiles from lat and long
  mutate(lat  = cut(lat,
                    breaks = seq(min(floor(lat)),
                                 max(ceiling(lat)),
                                 0.5)),
         long = cut(long,
                    breaks = seq(min(floor(long)),
                                 max(ceiling(long)),
                                 0.5))) %>% 
  
  distinct() %>% 
  
  mutate(lat  = str_extract_all(lat,  "[0-9\\.]+"),
         long = str_extract_all(long, "[0-9\\.]+")) %>% 
  
  mutate(lat  = sapply(lat,  function(x) mean(as.numeric(x))),
         long = sapply(long, function(x) mean(as.numeric(x)))) %>% 
  
  group_by(year) %>% 
  # summarize(temp = mean(temp)) %>% 
  ungroup()


lm_time_temp     <- lm(data = dat.temp, temp ~ year)
lm_time_temp_sum <- summary(lm_time_temp)

r_sqare   <- lm_time_temp_sum[["adj.r.squared"]]
slope     <- coef(lm_time_temp_sum)[2,1]
slope_sig <- coef(lm_time_temp_sum)[2,4] %>% cut(
    breaks = c(0, 0.001, 0.01, 0.05, 0.1, 1),
    labels = c("***", "**", "*", ".", " "),
    right  = FALSE )

time_temp_plt <- ggplot(dat.temp, aes(year, temp)) +
  geom_point() + 
  stat_summary(geom = "line", fun.data = mean_se, fun.args = 1.96) +
  geom_smooth(method = "lm", formula = y ~ x) +
  stat_summary(col = "red", fun.data = mean_se, fun.args = 1.96) +
  geom_text(aes(x = mean(unique(year)), y = ypos(temp, frac = 0.1),
                label = paste(
                  paste("R² =", round(r_sqare, 2)),
                  paste("slope =", round(slope * 10, 2), "[\u00B0C / decade]"),
                  slope_sig,
                  sep = "\n")),
            col = col.annot) +
  labs(x = "Year",
       y = "Temperature [\u00B0C] (\u00B1 95% CI)") +
  ylim(NA, ypos(dat.temp$temp, frac = 0.2)) +
  theme_shifts(legend.position = "none")


time_temp_plt + 
  labs(title = "Change in temperature at record location")

```


```{r plot_shifts}

if (test_run) {
  for (id.grp.var in unique(dat.occ$id.grp)) {
    
    dat.occ.plt <- dat.occ %>% filter(id.grp == id.grp.var)
    slopes.plt  <- slopes_year %>% filter(id.grp == id.grp.var)
    
    print(
    ggplot() +
      geom_point(data = dat.occ.plt,
                 aes(year, doy, col = id.grp)) +
      geom_abline(data = slopes.plt,
                  aes(intercept = intercept, slope = slope),
                  col = col.stc.line) +
      facet_wrap(~ species) +
      scale_color_manual(name = "Group",
                         values = col.grp.vec) +
      theme_shifts()
    )
    
  }
}

```

## Analysis of differences between groups in shift

```{r boxplot_between_group_diffs}

ggplot(slopes_temp,
       aes(id.grp, slope, fill = id.grp)) +
  geom_hline(yintercept = 0) +
  geom_boxplot() + 
  labs(x = "Group",
       y = "Slope [days / \u00B0C]") +
  scale_fill_manual(values = col.grp.vec) +
  theme_shifts(legend.position = "none")

ggplot(slopes_year,
       aes(id.grp, slope, fill = id.grp)) + 
  geom_hline(yintercept = 0) +
  geom_boxplot() + 
  labs(x = "Group",
       y = "Slope [days / decade]") +
  scale_fill_manual(values = col.grp.vec) +
  theme_shifts(legend.position = "none")

```


```{r analyse_between_group_diffs}

cat("Differences in slope with temperature:\n\n")

aov_temp <- aov(data = slopes_temp, slope ~ id.grp, 
                # weights = 1 / (slope_std_err ^ 2)
                )
summary(aov_temp)

cat("\n Significant pairwise differences: \n")

ph_aov_temp <- TukeyHSD(aov_temp)
ph_aov_temp$id.grp[ph_aov_temp$id.grp[, 4] < 0.05,]

cat("\n\n")
cat("----------------------------------------------------------\n\n")
cat("Differences in slope over time:\n\n")

aov_year <- aov(data = slopes_year, slope ~ id.grp, 
                # weights = 1 / (slope_std_err ^ 2)
                )
summary(aov_year)

cat("\n Significant pairwise differences: \n")

ph_aov_year <- TukeyHSD(aov_year)
ph_aov_year$id.grp[ph_aov_year$id.grp[, 4] < 0.05,]

```


## Summary of Shifts  

```{r summarize_slopes}

slopes_temp_sum <- slopes_temp %>% 
  group_by (id.grp) %>% 
  summarise(n_spec              = length(slope),
            slope_mean          = mean(slope),
            slope_sd            = sd(slope),
            # slope_sem           = sqrt(sum(slope_std_err ^ 2)) / length(slope),
            slope_sem           = slope_sd / sqrt(n_spec),
            slope_ci_min        = slope_mean - slope_sem * 1.96,
            slope_ci_max        = slope_mean + slope_sem * 1.96,
            p_diff_zero         = t.test(slope)$p.value,
            slope_n_corr        = cor(slope, log10(n)),
            slope_n_corr_p      = cor.test(slope, log10(n),
                                           method = "pearson")$p.value,
            slope_frac_negative = sum(slope < 0) / length(slope),
            var                 = "Temperature") %>% 
  
  # adjust diff zero & slope_n_corr pval for multiple testing
  mutate(p_diff_zero_adj    = p.adjust(p_diff_zero,    method = "fdr"),
         slope_n_corr_p_adj = p.adjust(slope_n_corr_p, method = "fdr")) %>% 
  
  # add significance letters
  mutate(pairw_grp = sig_letters(ph_aov_temp$id.grp, order = recode.vec))

slopes_year_sum <- slopes_year %>% 
  group_by (id.grp) %>% 
  summarise(n_spec              = length(slope),
            slope_mean          = mean(slope),
            slope_sd            = sd(slope),
            # slope_sem           = sqrt(sum(slope_std_err ^ 2)) / length(slope),
            slope_sem           = slope_sd / sqrt(n_spec),
            slope_ci_min        = slope_mean - slope_sem * 1.96,
            slope_ci_max        = slope_mean + slope_sem * 1.96,
            p_diff_zero         = t.test(slope)$p.value,
            slope_n_corr        = cor(slope, log10(n),
                                           method = "pearson"),
            slope_n_corr_p      = cor.test(slope, log10(n))$p.value,
            slope_frac_negative = sum(slope < 0) / length(slope),
            var                 = "Year") %>% 
  
  # adjust diff zero & slope_n_corr pval for multiple testing
  mutate(p_diff_zero_adj    = p.adjust(p_diff_zero,    method = "fdr"),
         slope_n_corr_p_adj = p.adjust(slope_n_corr_p, method = "fdr")) %>% 
  
  # add significance letters
  mutate(pairw_grp = sig_letters(ph_aov_year$id.grp, order = recode.vec))

#combine both summary data sets and display them
slopes_all_sum <- left_join(slopes_temp_sum,
                            slopes_year_sum,
                            by = c("id.grp"),
                            suffix = c("_temp",
                                       "_year")) %>% 
  
  # drop redundant columns
  select(-starts_with("var_"))

slopes_all_sum

```


```{r correlate_species_n_with_slope}

# do corr tests on log 10 transformed data
slope_n_temp_corr <- cor.test(log10(slopes_all$n), slopes_all$slope_temp, 
                              method = "pearson")
slope_n_year_corr <- cor.test(log10(slopes_all$n), slopes_all$slope_year, 
                              method = "pearson")

# generate summary strings for plotting
n_temp_corr_str <- paste("r =", round(slope_n_temp_corr$estimate, 2),
                         ", ",
                         "p =", round(slope_n_temp_corr$p.value,  2))

n_year_corr_str <- paste("r =", round(slope_n_year_corr$estimate, 2),
                         ", ",
                         "p =", round(slope_n_year_corr$p.value,  2))

slope_n_corr_temp_plt <- ggplot(slopes_all,
                                aes(n, slope_temp, col = id.grp)) +
  
  geom_point(alpha = alpha.pt) + 
  
  # geom_smooth(method = "lm",
  #             col = "black") +
  
  geom_text(data = slopes_all_sum,
            aes(x     = 10 ^ mean(c(log10(max(slopes_all$n)),
                                    log10(min(slopes_all$n)))),
                y     = ypos(slopes_all$slope_temp),
                label = paste0("r = ", round(slope_n_corr_temp,        2),
                               ",\n",
                               "p = ", round(slope_n_corr_p_adj_temp,  2))),
            col   = col.annot,
            hjust = 0.5,
            vjust = 0.5,
            size  = 2.5) +
  
  geom_hline(yintercept = 0) +
  
  facet_wrap(~ id.grp, nrow = 1) +
  
  scale_x_log10() +
  
  scale_color_manual(name = "Group",
                     aesthetics = c("color", "fill"),
                     values = col.grp.vec) +
  
  labs(x = "Species n", 
       y = "Shift with temperature [days / \u00B0C]",
       title = "Shift vs sample size",
       subtitle = paste0("Overall:", n_temp_corr_str)) +
  
  theme_shifts(axis.text.x = element_text(angle = 45,
                                          hjust = 0.5),
               legend.position = "none")

slope_n_corr_temp_plt


slope_n_corr_year_plt <- ggplot(slopes_all,
                                aes(n, slope_year, col = id.grp)) + 
  
  geom_point(alpha = alpha.pt) + 
  
  # geom_smooth(method = "lm",
  #             col = "black") +
  
  geom_text(data = slopes_all_sum,
            aes(x     = 10 ^ mean(c(log10(max(slopes_all$n)),
                                    log10(min(slopes_all$n)))),
                y     = ypos(slopes_all$slope_year),
                label = paste0("r = ", round(slope_n_corr_year,        2),
                               ",\n",
                               "p = ", round(slope_n_corr_p_adj_year,  2))),
            col   = col.annot,
            hjust = 0.5,
            vjust = 0.5,
            size  = 2.5) +
  
  geom_hline(yintercept = 0) +
  
  facet_wrap(~ id.grp, nrow = 1) +
  
  scale_x_log10() +
  
  scale_color_manual(name = "Group",
                     aesthetics = c("color", "fill"),
                     values = col.grp.vec) +
  
  labs(x = "Species n", 
       y = "Shift over time [days / decade]",
       title = "Shift vs sample size",
       subtitle = paste0("Overall:", n_year_corr_str)) +
  
  theme_shifts(axis.text.x = element_text(angle = 45,
                                          hjust = 0.5),
               legend.position = "none")

slope_n_corr_year_plt

```


```{r plot_slopes_vs_species_forest}

# save plot for display and plotting
slopes_spec_temp_for_plt <- slopes_temp %>% 
  
  # plot slopes in ascending order
  ggplot() +
  
  geom_pointrange(aes(x    = reorder(species,  -slope),
                      y    = slope,
                      ymin = slope - slope_std_err * 1.96,
                      ymax = slope + slope_std_err * 1.96,
                      col  = id.grp),
                      alpha = alpha.pt) +
  
  geom_point(aes(x = reorder(species,  -slope),
                 y = slope,
                 col = id.grp)) +
  
  # clearly denote the 0 line
  geom_hline(yintercept = 0) +
  
  # invert x and y axis for classic forest plot look
  coord_flip() + 
  
  # break plot into facets, each with an independent species axis
  facet_wrap( ~ id.grp, scale = "free_y") +
  
  # add lables
  labs(y = "Shift with temperature [days / \u00B0C] (\u00B1 95% CI) ",
       x = "Species") +
  
  # add coloring
  scale_color_manual(name   = "Group",
                     values = col.grp.vec) +
  
  # remove labeling of individual species (too much information at once)
  # remove the legend
  theme_shifts(axis.text.y     = element_blank(),
               axis.ticks.y    = element_blank(),
               legend.position = "none")


# display plot with title
slopes_spec_temp_for_plt + 
  labs(title = "Shifts with temperature")


# save plot
slopes_spec_year_for_plt <- slopes_year %>% 
  
  # plot slopes in ascending order
  ggplot() +
  
  geom_pointrange(aes(x    = reorder(species,  -slope),
                      y    = slope,
                      ymin = slope - slope_std_err * 1.96,
                      ymax = slope + slope_std_err * 1.96,
                      col  = id.grp),
                      alpha = alpha.pt) +
  
  geom_point(aes(x = reorder(species,  -slope),
                 y = slope,
                 col = id.grp)) +
  
  # clearly denote the 0 line
  geom_hline(yintercept = 0) +
  
  # invert x and y axis for classic forest plot look
  coord_flip() + 
  
  # break plot into facets, each with an independent species axis
  facet_wrap( ~ id.grp, scale = "free_y") +
  
  # add lables
  labs(y = "Shift over time [days / decade] (\u00B1 95% CI)",
       x = "Species") +
  
  # add coloring
  scale_color_manual(name   = "Group",
                     values = col.grp.vec) +
  
  # remove labeling of individual species (too much information at once)
  # remove the legend
  theme_shifts(axis.text.y     = element_blank(),
               axis.ticks.y    = element_blank(),
               legend.position = "none")


# display plot
slopes_spec_year_for_plt +
  labs(title = "Shifts over time")

```


```{r plot_slopes_vs_groups}

# for temp
slopes_grp_temp_mean_plt <- ggplot() +
  
  # # raw data
  # geom_point     (data = slopes_temp,
  #                 aes(x    = id.grp,
  #                     y    = slope),
  #                 col   = col.pt,
  #                 alpha = alpha.pt,
  #                 position = position_jitter()) +
  
  # error bars
  geom_errorbar  (data = slopes_temp_sum,
                  aes(x    = id.grp,
                      ymin = slope_ci_min,
                      ymax = slope_ci_max,
                      col  = id.grp)) +
  
  # mean data
  geom_point     (data = slopes_temp_sum,
                  aes(x   = id.grp,
                      y   = slope_mean,
                      col = id.grp)) + 
  
  # pairwise differences
  geom_text      (data = slopes_temp_sum, 
                  aes(x     = id.grp,
                      y     = ypos(slope_ci_max, frac = 0.5),
                      label = pairw_grp),
                  col = col.annot) +
  
  # line denoting 0
  geom_hline     (yintercept = 0,
                  col = col.stc.line) +
  
  # add labels
  labs(x = "Group",
       y = "Temperature sensitivity\n[days / \u00B0C]") +
  
  # add coloring
  scale_color_manual(name   = "Group",
                     values = col.grp.vec) +
  
  # suppress legend
  theme_shifts(legend.position = "none")


slopes_grp_temp_mean_plt + 
  labs(title = "Between group differences in shift",
       subtitle = "with temperature")


# for year
slopes_grp_year_mean_plt <- ggplot() +
  
  # # raw data
  # geom_point     (data = slopes_year,
  #                 aes(x    = id.grp,
  #                     y    = slope),
  #                 col   = col.pt,
  #                 alpha = alpha.pt,
  #                 position = position_jitter()) +
  
  # error bars
  geom_errorbar  (data = slopes_year_sum,
                  aes(x    = id.grp,
                      ymin = slope_ci_min,
                      ymax = slope_ci_max,
                      col  = id.grp)) +
  
  # mean data
  geom_point     (data = slopes_year_sum,
                  aes(x   = id.grp,
                      y   = slope_mean,
                      col = id.grp)) + 
  
  # pairwise differences
  geom_text      (data = slopes_year_sum, 
                  aes(x     = id.grp,
                      y     = ypos(slope_ci_max, frac = 0.7),
                      label = pairw_grp),
                  col = col.annot) +
  
  # line denoting 0
  geom_hline     (yintercept = 0,
                  col = col.stc.line) +
  
  # add labels
  labs(x = "Group",
       y = "Temporal shift\n[days / decade]") +
  
  # add coloring
  scale_color_manual(name   = "Group",
                     values = col.grp.vec) +
  
  # suppress legend
  theme_shifts(legend.position = "none")

slopes_grp_year_mean_plt + 
  labs(title = "Between group differences in shift",
       subtitle = "over time")

```


## Analysis of correlation of time and temperature shifts

```{r analyse_year_temp_corr}

# perform correlation test of time and temp slopes for each group and
# store results in df
corr_slopes <- lapply(
  unique(slopes_all$id.grp),
  function(id.grp_var, data = slopes_all) {
    
    slopes_grp <- filter(data, id.grp == id.grp_var)
    
    # make sure data has enough observations for cor.test
    if (nrow(slopes_grp) > 2) {
      
      test <- cor.test(slopes_grp$slope_year,
                       slopes_grp$slope_temp)
      
      res <- data.frame(id.grp = id.grp_var,
                        cor    = test[["estimate"]][["cor"]],
                        t      = test[["statistic"]],
                        df     = test[["parameter"]][["df"]],
                        pval   = test[["p.value"]])
      
      return(res)
      
    } else {
      
      # if we have not enough observations, return the closest thing
      #   to NAs
      res <- data.frame(id.grp = id.grp_var,
                        cor    = NA,
                        t      = "-",
                        df     = "-",
                        pval   = NA)
      
      return(res)
      
    }
    
  })

# bind_rows was acting up, so do.call(rbind) it is
corr_slopes <- do.call(rbind, corr_slopes) %>%
  
  # adjust pvals for multiple testing
  mutate(pval_fdr = p.adjust(pval, method="fdr")) %>% 
  
  # format pvalues into significance symbols
  mutate(pval.sig = cut(
    pval,
    breaks = c(0, 0.001, 0.01, 0.05, 0.1, 1),
    labels = c("***", "**", "*", ".", " "),
    right  = FALSE )) %>%
  
  # special case of NAs
  mutate(pval.sig = str_replace_na(pval.sig, "-")) %>%
  
  # also add group sizes for plotting (slightly hacky)
  left_join(select(slopes_all_sum, id.grp, n_spec_temp), by = "id.grp")


```


```{r plot_year_vs_temp_slopes_correlation}

slopes_grp_all_corr_plt <- ggplot() +
  
  # add 0 lines
  geom_hline     (yintercept = 0, col = col.stc.line) + 
  geom_segment   (data = slopes_all,
                  aes(x = 0, y = -Inf,
                      xend = 0, yend = ypos(slope_temp)),
                  col = col.stc.line) +
  
  # # add errorbars for raw data
  # geom_errorbar  (data = slopes_all,
  #                 aes(x    = slope_year,
  #                     ymin = slope_temp - slope_std_err_temp,
  #                     ymax = slope_temp + slope_std_err_temp),
  #                 col = col.pt) +
  # 
  # geom_errorbarh (data = slopes_all,
  #                 aes(y    = slope_temp,
  #                     xmin = slope_year - slope_std_err_year,
  #                     xmax = slope_year + slope_std_err_year),
  #                 col = col.pt) +
  
  # add raw data
  geom_point     (data = slopes_all,
                  aes(x = slope_year,
                      y = slope_temp),
                  col   = col.pt,
                  alpha = alpha.pt) + 
  
  # add year errorbar 
  geom_errorbarh (data = slopes_all_sum,
                  aes(y    = slope_mean_temp,
                      xmin = slope_ci_min_year,
                      xmax = slope_ci_max_year,
                      col  = id.grp
                      ),
                  width = 0) +
  
  # add temp errorbar
  geom_errorbar  (data = slopes_all_sum,
                  aes(x    = slope_mean_year,
                      ymin = slope_ci_min_temp,
                      ymax = slope_ci_max_temp,
                      col  = id.grp),
                  width = 0)  +
  
  # add group means
  geom_point     (data = slopes_all_sum,
                  aes(x    = slope_mean_year,
                      y    = slope_mean_temp,
                      col  = id.grp)) +
  
  # add group sizes, correlation coefficient and its significance
  geom_text      (data = corr_slopes,
                  
                  # set text at the middle of all time slopes and slightly 
                  #   above the max of all temp slopes
                  aes(x = mean(c(min(slopes_all$slope_year),
                                 max(slopes_all$slope_year))),
                      y = ypos(slopes_all$slope_temp, frac = 0.5),
                      
                      label = str_glue("~italic(r) == {round(cor, 2)}")),
                  col  = col.annot,
                  size = size.annot.txt,
                  parse = TRUE) +
  
  # adjust ylim to fit text
  ylim(c(min(slopes_all$slope_temp), max(slopes_all$slope_temp) * 2)) +
  
  # split the plot based on the group
  facet_wrap( ~ id.grp) + 
  
  # improve axis lables 
  labs(x = "Temporal shift [days / decade]",
       y = "Temperature sensitivity \n[days / \u00B0C]") +
  
  # add coloring
  scale_color_manual(name   = "Group",
                     values = col.grp.vec) +
  
  # disable the legend
  theme_shifts(legend.position = "none")


slopes_grp_all_corr_plt +
  labs(title = "Correlation of shifts over time with shifts with temperature")

```

## Analysis of interaction asynchrony
  
Here, asynchrony is defined as the difference between the peak doy of flowering
and the peak doy of flight in a given year or at a given temperature. Here,
the peak doys are approximated by the predicted values given by the linear
models for the corresponding plant and pollinator. The shift in asynchrony
is then given by the change in the difference between the predicted values
over time and with temperature.

```{r summarize_interactions}

#load interaction data
int_data <- read.csv(paste0("static_data/",
                           "plant_pollinator_interactions_for_potential_",
                           "networks_2018.csv")) %>%
    
  # make names less unwieldy
  rename(poll = POLLINATOR_NAME, plant = PLANT_NAME, group = Group) %>%
  
  # correct use of synonymous names
  mutate(poll = recode(poll, "Inachis io" = "Aglais io")) %>% 
  
  # filter out species which have no occurrence data
  filter(
    plant %in% unique(dat.occ$species) &
    poll  %in% unique(dat.occ$species)
    ) %>% 
    
  # add the plant trait data to the interactions
  left_join(plant_traits,
            by = c("plant" = "species")) %>%
  
  # exclude interactions with pollinator independent plants
  filter(PollDep != "No") %>%
  
  # recode group to use clearer names 
  # (recode only takes name/value pairs, so !!! expansion necessary)
  mutate(group = recode_factor(group, !!! recode.vec.int))


# display numbers of unique plants and pollinators there are per group
int_data_sum <- int_data %>% 
  select(poll, plant, group) %>% 
  group_by(group) %>% 
  summarise(across(everything(), uniqueN))

int_data_sum

```


```{r compute_asynchrony}

int_slopes_asyn <- int_data %>%
  apply(1, function(int_data_row) {

    # get slope data for plant and poll
    slopes_all_plant <- slopes_all %>%
      filter(species == int_data_row[["plant"]])

    slopes_all_poll  <- slopes_all %>%
      filter(species == int_data_row[["poll" ]])
    
    return(data.frame(

      # get general data on interaction partners
      plant = int_data_row[["plant"]],
      poll  = int_data_row[["poll" ]],
      group = int_data_row[["group"]],

      # extract their slopes with temp
      slope_temp_plant = slopes_all_plant$slope_temp,
      slope_temp_poll  = slopes_all_poll $slope_temp,

      # extract their intercepts with temp
      intcp_temp_plant = slopes_all_plant$intercept_temp,
      intcp_temp_poll  = slopes_all_poll $intercept_temp,

      # compute the difference between both temp slopes
      slope_temp_asyn  = slopes_all_plant$slope_temp -
        slopes_all_poll$slope_temp,
      
      # compute their SEs
      slope_temp_asyn_std_err  = sqrt(
        slopes_all_plant$slope_std_err_temp  ^ 2 +
          slopes_all_poll$slope_std_err_temp ^ 2),
      
      
      # same for intercepts
      intcp_temp_asyn  = slopes_all_plant$intercept_temp - 
        slopes_all_poll$intercept_temp,
      
      # compute their SEs
      intcp_temp_asyn_std_err  = sqrt(
        slopes_all_plant$intercept_std_err_temp  ^ 2 +
          slopes_all_poll$intercept_std_err_temp ^ 2),
      
      
      # extract their slopes over time
      slope_year_plant = slopes_all_plant$slope_year,
      slope_year_poll  = slopes_all_poll $slope_year,

      # extract their intercepts with year
      intcp_year_plant = slopes_all_plant$intercept_year,
      intcp_year_poll  = slopes_all_poll $intercept_year,
      
      # compute the difference between both year slopes
      slope_year_asyn  = slopes_all_plant$slope_year -
        slopes_all_poll$slope_year,
      
      # compute their SEs
      slope_year_asyn_std_err  = sqrt(
        slopes_all_plant$slope_std_err_year  ^ 2 +
          slopes_all_poll$slope_std_err_year ^ 2),
      
      
      # same for intercepts
      intcp_year_asyn  = slopes_all_plant$intercept_year - 
        slopes_all_poll$intercept_year,
      
      # compute their SEs
      intcp_year_asyn_std_err = sqrt(
        slopes_all_plant$intercept_std_err_year  ^ 2 +
          slopes_all_poll$intercept_std_err_year ^ 2)))
    
  }) %>%

  bind_rows() %>% 
  
  # ensure factor ordering is still there
  mutate(group = fct_relevel(group, recode.vec.int))

```


```{r boxplot_between_group_asynchrony_diffs}

ggplot(int_slopes_asyn,
       aes(group, slope_temp_asyn, fill = group)) +
  geom_hline(yintercept = 0) +
  geom_boxplot() + 
  labs(x        = "Group",
       y        = "Asynchrony shift [days / \u00B0C]") +
  scale_fill_manual(values = col.grp.vec) +
  theme_shifts(legend.position = "none")

ggplot(int_slopes_asyn,
       aes(group, slope_year_asyn, fill = group)) + 
  geom_hline(yintercept = 0) +
  geom_boxplot() +
  labs(x        = "Group",
       y        = "Asynchrony shift [days / decade]") +
  scale_fill_manual(values = col.grp.vec) +
  theme_shifts(legend.position = "none")

```


```{r analyse_between_group_asynchrony_diffs}

cat("Differences in slope of asynchrony with temperature:\n\n")

aov_asyn_temp <- aov(data = int_slopes_asyn, slope_temp_asyn ~ group,
                     # weights = 1 / (slope_temp_asyn_std_err ^ 2)
                     )
summary(aov_asyn_temp)

cat("\n Significant pairwise differences: \n")

ph_aov_asyn_temp <- TukeyHSD(aov_asyn_temp)
ph_aov_asyn_temp$group[ph_aov_asyn_temp$group[,4] < 0.1,]

cat("\n\n")
cat("----------------------------------------------------------\n\n")
cat("Differences in slope of asynchrony over time:\n\n")

aov_asyn_year <- aov(data = int_slopes_asyn, slope_year_asyn ~ group,
                     # weights = 1 / (slope_year_asyn_std_err ^ 2)
                     )
summary(aov_asyn_year)

cat("\n Significant pairwise differences: \n")

ph_aov_asyn_year <- TukeyHSD(aov_asyn_year)
ph_aov_asyn_year$group[ph_aov_asyn_year$group[,4] < 0.1,]

```


```{r summarize_slope_interactions}

int_slopes_asyn_sum <- int_slopes_asyn %>% 
  group_by(group) %>% 
  summarise(n_int                         = length(slope_temp_plant),
            n_poll                        = length(unique(poll)),
            n_plant                       = length(unique(plant)),
            
            # summarize temp asyn
            slope_temp_asyn_mean          = mean(slope_temp_asyn),
            slope_temp_asyn_sd            = sd(slope_temp_asyn),
            # slope_temp_asyn_sem           = sqrt(
            #   sum(slope_temp_asyn_std_err ^ 2)) / length(slope_temp_asyn),
            slope_temp_asyn_sem           = slope_temp_asyn_sd / 
              sqrt(n_int),
            slope_temp_asyn_ci_min        = slope_temp_asyn_mean - 
              slope_temp_asyn_sem * 1.96,
            slope_temp_asyn_ci_max        = slope_temp_asyn_mean + 
              slope_temp_asyn_sem * 1.96,
            slope_temp_asyn_frac_negative = sum(slope_temp_asyn < 0) /
              length(slope_temp_asyn),
            
            # test if the average temp slope differs from 0
            p_diff_zero_temp              = t.test(slope_temp_asyn)$p.value,
            
            # summarize intercept
            intcp_temp_asyn_mean          = mean(intcp_temp_asyn),
            # intcp_temp_asyn_sem           = sqrt(
            #   sum(intcp_temp_asyn_std_err ^ 2)) / length(intcp_temp_asyn),
            intcp_temp_asyn_sem           = sd(intcp_temp_asyn) / 
              sqrt(n_int),
            
            
            # summarize year asyn
            slope_year_asyn_mean          = mean(slope_year_asyn),
            slope_year_asyn_sd            = sd(slope_year_asyn),
            # slope_year_asyn_sem           = sqrt(
            #   sum(slope_year_asyn_std_err ^ 2)) / length(slope_year_asyn),
            slope_year_asyn_sem           = slope_year_asyn_sd / 
              sqrt(n_int),
            slope_year_asyn_ci_min        = slope_year_asyn_mean - 
              slope_year_asyn_sem * 1.96,
            slope_year_asyn_ci_max        = slope_year_asyn_mean + 
              slope_year_asyn_sem * 1.96,
            slope_year_asyn_frac_negative = sum(slope_year_asyn < 0) /
              length(slope_year_asyn),
            
            # test if the average year slope differs from 0
            p_diff_zero_year              = t.test(slope_year_asyn)$p.value,
            
            # summarize intercept
            intcp_year_asyn_mean          = mean(intcp_year_asyn),
            # intcp_year_asyn_sem           = sqrt(
            #   sum(intcp_year_asyn_std_err ^ 2)) / length(intcp_year_asyn)
            intcp_year_asyn_sem           = sd(intcp_year_asyn) / 
              sqrt(n_int)
  ) %>% 
  
  # add pairwise difference lables
  mutate(pairw_grp_temp_asyn = sig_letters(ph_aov_asyn_temp$group,
                                           recode.vec.int),
         pairw_grp_year_asyn = sig_letters(ph_aov_asyn_year$group,
                                           recode.vec.int)) %>% 
  
  # adjust pvals that mean differs from 0 for multiple testing
  mutate(across(starts_with("p_diff_zero"), p.adjust, method = "fdr", 
                .names ="{.col}_adj"))


int_slopes_asyn_sum

```


```{r plot_slope_interactions}

# for temp
asyn_int_temp_mean_plt <- ggplot() +
  
  # raw data
  geom_point     (data = int_slopes_asyn,
                  aes(x    = group,
                      y    = slope_temp_asyn),
                  col   = col.pt,
                  alpha = alpha.pt,
                  position = position_jitter()) +
  
  # error bars
  geom_errorbar (data = int_slopes_asyn_sum,
                 aes(x    = group,
                     ymin = slope_temp_asyn_ci_min,
                     ymax = slope_temp_asyn_ci_max,
                     col  = group)) +
  
  # mean data
  geom_point    (data = int_slopes_asyn_sum,
                 aes(x   = group,
                     y   = slope_temp_asyn_mean,
                     col = group)) + 
  
  # pairwise differences
  geom_text      (data = int_slopes_asyn_sum, 
                  aes(x     = group,
                      y     = ypos(int_slopes_asyn$slope_temp_asyn),
                      label = pairw_grp_temp_asyn),
                  col = col.annot) +
  
  # line denoting 0
  geom_hline    (yintercept = 0,
                 col = col.stc.line) +
  
  # add labels
  labs(x        = "Group",
       y        = "Asynchrony shift [days / \u00B0C] (\u00B1 95% CI)") +
  
  # add color
  scale_color_manual(name   = "Group",
                     values = col.grp.vec) +
  
  # suppress legend
  theme_shifts(legend.position = "none")

asyn_int_temp_mean_plt +
  labs(title    = "Interaction asynchrony shifts",
       subtitle = "with temperature")


# for year
asyn_int_year_mean_plt <- ggplot() +
  
  # raw data
  geom_point     (data = int_slopes_asyn,
                  aes(x    = group,
                      y    = slope_year_asyn),
                  col   = col.pt,
                  alpha = alpha.pt,
                  position = position_jitter()) +
  
  # error bars
  geom_errorbar (data = int_slopes_asyn_sum,
                 aes(x    = group,
                     ymin = slope_year_asyn_ci_min,
                     ymax = slope_year_asyn_ci_max,
                     col  = group)) +
  
  # mean data
  geom_point    (data = int_slopes_asyn_sum,
                 aes(x   = group,
                     y   = slope_year_asyn_mean,
                     col = group)) + 
  
  # pairwise differences
  geom_text      (data = int_slopes_asyn_sum, 
                  aes(x     = group,
                      y     = ypos(int_slopes_asyn$slope_year_asyn),
                      label = pairw_grp_year_asyn),
                  col = col.annot) +
  
  # line denoting 0
  geom_hline    (yintercept = 0,
                 col = col.stc.line) +
  
  # add labels
  labs(x        = "Group",
       y        = "Asynchrony shift [days / decade] (\u00B1 95% CI)") +
  
  # add color
  scale_color_manual(name   = "Group",
                     values = col.grp.vec) +
  
  # suppress legend
  theme_shifts(legend.position = "none")


asyn_int_year_mean_plt +
  labs(title    = "Interaction asynchrony shifts",
       subtitle = "over time")

```


```{r plot_year_vs_temp_asyn_slopes_correlation}

asyn_int_all_corr_plt <- ggplot() +
  
  # add 0 lines
  geom_hline     (yintercept = 0, col = col.stc.line) + 
  geom_vline     (xintercept = 0, col = col.stc.line) +
  
  # # add errorbars for raw data
  # geom_errorbar  (data = slopes_all,
  #                 aes(x    = slope_year,
  #                     ymin = slope_temp - slope_std_err_temp,
  #                     ymax = slope_temp + slope_std_err_temp),
  #                 col = col.pt) +
  # 
  # geom_errorbarh (data = slopes_all,
  #                 aes(y    = slope_temp,
  #                     xmin = slope_year - slope_std_err_year,
  #                     xmax = slope_year + slope_std_err_year),
  #                 col = col.pt) +
  
  # add raw data
  geom_point     (data = int_slopes_asyn,
                  aes(x = slope_year_asyn,
                      y = slope_temp_asyn),
                  col   = col.pt,
                  alpha = alpha.pt) + 
  
  # add year errorbar 
  geom_errorbarh (data = int_slopes_asyn_sum,
                  aes(y    = slope_temp_asyn_mean,
                      xmin = slope_year_asyn_ci_min,
                      xmax = slope_year_asyn_ci_max,
                      col  = group
                      )) +
  
  # add temp errorbar
  geom_errorbar  (data = int_slopes_asyn_sum,
                  aes(x    = slope_year_asyn_mean,
                      ymin = slope_temp_asyn_ci_min,
                      ymax = slope_temp_asyn_ci_max,
                      col  = group))  +
  
  # add group means
  geom_point     (data = int_slopes_asyn_sum,
                  aes(x    = slope_year_asyn_mean,
                      y    = slope_temp_asyn_mean,
                      col  = group)) +
  
  # split the plot based on the group
  facet_wrap( ~ group, nrow = 1) + 
  
  # improve axis labels 
  labs(x = "Asynchrony shift over time [days / decade] (\u00B1 95% CI)",
       y = "Asynchrony temperature shift [days / \u00B0C] (\u00B1 95% CI)") +
  
  # add coloring
  scale_color_manual(name   = "Group",
                     values = col.grp.vec) +
  
  # disable the legend
  theme_shifts(legend.position = "none")

asyn_int_all_corr_plt +
  labs(title = paste("Correlation of asynchrony shifts over time",
                     "and with temperature"))

```


```{r simulate_interaction_diff_slopes}

# TODO: give error for estimated points

temp_vec <- unique(round(dat.occ$temp, 1))

year_vec <- unique(dat.occ$year)

# for each interaction, derive the difference between the plant and the 
#   pollinator in a given year or at a given temperature
sim_diff_temp <- apply(
  int_slopes_asyn, 1,
  function(int_slopes_row) {
    
    n <- length(temp_vec)
    
    # coercion to numeric necessary as apply treats int_slope_row as a character
    #  matrix
    intercept <- as.numeric(int_slopes_row[["intcp_temp_asyn"]])
    slope     <- as.numeric(int_slopes_row[["slope_temp_asyn"]])
    
    diff_vec  <-  intercept + temp_vec * slope
    
    return(data.frame(
      plant = rep(int_slopes_row[["plant"]], n),
      poll  = rep(int_slopes_row[["poll" ]], n),
      group = rep(int_slopes_row[["group"]], n),
      id    = rep(paste(int_slopes_row[["plant"]],
                        int_slopes_row[["poll" ]]), n),
      temp  = temp_vec,
      diff  = diff_vec
      ))
    
  }) %>% 
  
  bind_rows() %>% 
  
  # ensure factor ordering is still there
  mutate(group = fct_relevel(group, recode.vec.int))


sim_diff_year <- apply(
  int_slopes_asyn, 1,
  function(int_slopes_row) {
    
    n <- length(year_vec)
    
    # coercion to numeric necessary as apply treats int_slope_row as a character
    #  matrix
    intercept <- as.numeric(int_slopes_row[["intcp_year_asyn"]])
    slope     <- as.numeric(int_slopes_row[["slope_year_asyn"]])
    
    # extract yearly estimate of phenology difference
    # scale year by 0.1 to match slopes being in days / decade
    diff_vec  <-  intercept + (year_vec * 0.1) * slope
    
    return(data.frame(
      plant = rep(int_slopes_row[["plant"]], n),
      poll  = rep(int_slopes_row[["poll" ]], n),
      group = rep(int_slopes_row[["group"]], n),
      id    = rep(paste(int_slopes_row[["plant"]],
                        int_slopes_row[["poll" ]]), n),
      year  = year_vec,
      diff  = diff_vec
      ))
    
  }) %>% 
  
  bind_rows() %>% 
  
  # ensure factor ordering is still there
  mutate(group = fct_relevel(group, recode.vec.int))


# summarize per temp differences over groups
sim_diff_temp_sum <- sim_diff_temp %>%
  
  group_by(group, temp) %>%
  
  summarize(diff_n            = length(diff),
            diff_mean         = mean(diff),
            diff_sem          = sqrt( var(diff) / length(diff)),
            diff_ci_min       = diff_mean - 1.96 * diff_sem,
            diff_ci_max       = diff_mean + 1.96 * diff_sem,
            perc_poll_earlier = (sum(diff > 0) / length(diff)) * 100)


# summarize per year differences over groups
sim_diff_year_sum <- sim_diff_year %>%

  group_by(group, year) %>%
  
  summarize(diff_n            = length(diff),
            diff_mean         = mean(diff),
            diff_sem          = sqrt( var(diff) / length(diff)),
            diff_ci_min       = diff_mean - 1.96 * diff_sem,
            diff_ci_max       = diff_mean + 1.96 * diff_sem,
            perc_poll_earlier = (sum(diff > 0) / length(diff)) * 100)


```


```{r plot_diff_slopes}

# for temp
asyn_grp_temp_scatter_plt <- ggplot() +
  
  # add per interaction data
  geom_line(data = sim_diff_temp,
            aes(temp, diff, group = id),
            col   = col.pt,
            alpha = alpha.ln) +
  
  # geom_density2d(data = sim_diff_temp,
  #                aes(temp, diff),
  #                col = col.stc.line) +
  
  # add confint for group average data as ribbon
  geom_ribbon(data = sim_diff_temp_sum,
              aes(x    = temp,
                  ymin = diff_ci_min,
                  ymax = diff_ci_max,
                  fill = group),
              alpha = alpha.ribbon) +
  
  # add group average data as a line
  geom_line(data = sim_diff_temp_sum,
            aes(temp, diff_mean, col = group)) +
  
  # line denoting 0
  geom_hline    (yintercept = 0,
                 col = col.stc.line) +
  
  # add facets
  facet_wrap( ~ group ) +
  
  labs(x        = "Temperature [\u00B0C]",
       y        = "Asynchrony [days] (\u00B1 95% CI)") +
  
  # add color
  scale_color_manual(name       = "Group",
                     values     = col.grp.vec,
                     aesthetics = c("col", "fill")) +
  
  # suppress legend
  theme_shifts(legend.position = "none")


asyn_grp_temp_scatter_plt +
  labs(title    = "Asynchrony in interactions",
       subtitle = "with temperature")


# for year
asyn_grp_year_scatter_plt <- ggplot() +
  
  # add per interaction data
  geom_line(data = sim_diff_year,
            aes(year, diff, group = id),
            col   = col.pt,
            alpha = alpha.ln) +
  
  # geom_density2d(data = sim_diff_year,
  #                aes(year, diff),
  #                col = col.stc.line) +
  
  # add confint for group average data as ribbon
  geom_ribbon(data = sim_diff_year_sum,
              aes(x    = year,
                  ymin = diff_ci_min,
                  ymax = diff_ci_max,
                  fill = group),
              alpha = alpha.ribbon) +
  
  # add group average data as a line
  geom_line(data = sim_diff_year_sum,
            aes(year, diff_mean, col = group)) +
  
  # line denoting 0
  geom_hline    (yintercept = 0,
                 col = col.stc.line) +
  
  # add facets
  facet_wrap( ~ group ) +
  
  labs(x        = "Year",
       y        = "Asynchrony [days] (\u00B1 95% CI)") +
  
  # add color
  scale_color_manual(name       = "Group",
                     values     = col.grp.vec,
                     aesthetics = c("col", "fill")) +
  
  # suppress legend
  theme_shifts(legend.position = "none",
               axis.text.x = element_text(angle = 45,
                                          hjust = 0.5))


asyn_grp_year_scatter_plt +
  labs(title    = "Asynchrony in interactions",
       subtitle = "over time")

```


```{r plot_poll_early}

# for temp
earlyfrac_grp_temp_line_plt <- ggplot(data = sim_diff_temp_sum,
       aes(temp, perc_poll_earlier, col = group)) +
  
  # add group data as points
  geom_line (size = size.dat.line) +
  
  # line denoting 50%
  geom_hline(yintercept = 50,
                 col = col.stc.line) +
  
  # ensure the whole range is visible
  ylim      (c(0, 100)) + 
  
  # add facets
  facet_wrap( ~ group ) +
  
  labs(x = "Temperature [\u00B0C]",
       y = paste("Fraction of interactions with pollinators",
                        "earlier than plants [%]",
                        sep = "\n")) +
  
  # add color
  scale_color_manual(name       = "Group",
                     values     = col.grp.vec,
                     aesthetics = c("col", "fill")) +
  
  # suppress legend
  theme_shifts(legend.position = "none")


earlyfrac_grp_temp_line_plt + 
  labs(title    = "Change in earlier partner",
       subtitle = "with temperature")


# for year
earlyfrac_grp_year_line_plt <-  ggplot(data = sim_diff_year_sum,
       aes(year, perc_poll_earlier, col = group)) +
  
  # add group data as points
  geom_line (size = size.dat.line) +
  
  # line denoting 50%
  geom_hline(yintercept = 50,
                 col = col.stc.line) +
  
  # ensure the whole range is visible
  ylim      (c(0, 100)) + 
  
  # add facets
  facet_wrap( ~ group ) +
  
  labs(x = "Year",
       y = paste("Fraction of interactions with pollinators",
                        "earlier than plants [%]",
                        sep = "\n")) +
  
  # add color
  scale_color_manual(name       = "Group",
                     values     = col.grp.vec,
                     aesthetics = c("col", "fill")) +
  
  # suppress legend
  theme_shifts(legend.position = "none",
               axis.text.x = element_text(angle = 45,
                                          hjust = 0.5))


earlyfrac_grp_year_line_plt + 
  labs(title    = "Change in earlier partner",
       subtitle = "over time")

```

## Analysis of influence of plant traits

```{r get_plant_traits_slope}
# filter out species without determined PollDep
#   (includes insects)
slopes_temp_polldep <- slopes_temp %>% 
  drop_na(PollDep) 

slopes_year_polldep <- slopes_year %>% 
  drop_na(PollDep)

join_vec <- unique(c("group",
                     names(tax_df),
                     names(plant_traits)))

slopes_all_polldep <- left_join(slopes_temp_polldep,
                                slopes_year_polldep,
                                by = join_vec,
                                suffix = c("_temp", "_year")) %>% 
  select(- contains("main_var"))
```


```{r boxplots_plant_traits_slope}

ggplot(slopes_temp_polldep,
       aes(PollDep, slope, fill = PollDep)) +
  geom_hline(yintercept = 0) +
  geom_boxplot() + 
  labs(x        = "Group",
       y        = "Shift [days / \u00B0C]") +
  scale_fill_manual(values = col.grp.vec) +
  theme_shifts(legend.position = "none")

ggplot(slopes_year_polldep,
       aes(PollDep, slope, fill = PollDep)) +
  geom_hline(yintercept = 0) +
  geom_boxplot() +
  labs(x        = "Group",
       y        = "Shift [days / decade]") +
  scale_fill_manual(values = col.grp.vec) +
  theme_shifts(legend.position = "none")

```


```{r analyse_plant_traits_slope}

cat("Differences in slope with temperature between levels of pollinator ",
    "dependence:\n\n")

aov_polldep_temp <- aov(data = slopes_temp_polldep, slope ~ PollDep,
                    # weights = 1 / (slope_std_err ^ 2)
                    )
summary(aov_polldep_temp)

cat("\n Significant pairwise differences: \n")

ph_aov_polldep_temp <- TukeyHSD(aov_polldep_temp)
ph_aov_polldep_temp # $PollDep[ph_aov_polldep_temp$PollDep[, 4] < 0.05, ]

cat("\n\n")
cat("----------------------------------------------------------\n\n")
cat("Differences in in slope over time between levels of pollinator ",
    "dependence:\n\n")

aov_polldep_year <- aov(data = slopes_year_polldep, slope ~ PollDep,
                    # weights = 1 / (slope_std_err ^ 2)
                    )
summary(aov_polldep_year)

cat("\n Significant pairwise differences: \n")

ph_aov_polldep_year <- TukeyHSD(aov_polldep_year)
ph_aov_polldep_year # $PollDep[ph_aov_polldep_year$PollDep[, 4] < 0.05, ]

```


```{r sum_plant_traits_slopes}

# summarise data sets
slopes_temp_polldep_sum <- slopes_temp_polldep %>% 
  group_by(PollDep) %>% 
  summarise(n_spec              = length(slope),
            slope_mean          = mean(slope),
            slope_sd            = sd(slope),
            # slope_sem           = sqrt(sum(slope_std_err ^ 2)) / length(slope),
            slope_sem           = slope_sd / sqrt(n_spec),
            slope_ci_min        = slope_mean - slope_sem * 1.96,
            slope_ci_max        = slope_mean + slope_sem * 1.96,
            slope_frac_negative = sum(slope < 0) / length(slope),
            var                 = "Temperature") %>% 
  
  # add lables for pairwise diffs
  mutate(pairw_grp_polldep = sig_letters(ph_aov_polldep_temp$PollDep,
                                         recode.vec.PollDep))


slopes_year_polldep_sum <- slopes_year_polldep %>% 
  group_by(PollDep) %>% 
  summarise(n_spec              = length(slope),
            slope_mean          = mean(slope),
            slope_sd            = sd(slope),
            # slope_sem           = sqrt(sum(slope_std_err ^ 2)) / length(slope),
            slope_sem           = slope_sd / sqrt(n_spec),
            slope_ci_min        = slope_mean - slope_sem * 1.96,
            slope_ci_max        = slope_mean + slope_sem * 1.96,
            slope_frac_negative = sum(slope < 0) / length(slope),
            var                 = "Year") %>% 
  
  # add lables for pairwise diffs, order, such that the lowest slope comes first
  mutate(pairw_grp_polldep = sig_letters(ph_aov_polldep_year$PollDep,
                                         recode.vec.PollDep))

#combine both summary data sets and display them
slopes_all_polldep_sum <- left_join(slopes_temp_polldep_sum,
                                    slopes_year_polldep_sum,
                                    by = c("PollDep"),
                                    suffix = c("_temp",
                                               "_year")) %>% 
  
  # drop redundant columns
  select(-starts_with("var_"))

slopes_all_polldep_sum


```


```{r plot_plant_traits_slope}

# for temp
polldep_grp_temp_mean_plt <- ggplot() +
  
  # raw data
  geom_point     (data = slopes_temp_polldep,
                  aes(x  = PollDep,
                      y  = slope),
                  col   = col.pt,
                  alpha = alpha.pt,
                  position = position_jitter()) +
  
  # error bars
  geom_errorbar (data = slopes_temp_polldep_sum,
                 aes(x    = PollDep,
                     ymin = slope_ci_min,
                     ymax = slope_ci_max,
                     col  = PollDep)) +
  
  # mean data
  geom_point    (data = slopes_temp_polldep_sum,
                 aes(x   = PollDep,
                     y   = slope_mean,
                     col = PollDep)) + 
  
  # pairwise differences
  geom_text      (data = slopes_temp_polldep_sum, 
                  aes(x     = PollDep,
                      y     = ypos(slopes_temp_polldep$slope),
                      label = pairw_grp_polldep),
                  col = col.annot) +
  
  # line denoting 0
  geom_hline    (yintercept = 0,
                 col = col.stc.line) +
  
  # add labels
  labs(x        = "Group",
       y        = "Slope [days / \u00B0C] (\u00B1 95% CI)") +
  
  # add coloring
  scale_color_manual(name   = "Pollinator Dependence",
                     values = col.grp.vec) +
  
  # suppress legend
  theme_shifts(legend.position = "none")


polldep_grp_temp_mean_plt + 
  labs(title    = "Between group differences in shift",
       subtitle = "with temperature")


# for year
polldep_grp_year_mean_plt <- ggplot() +
  
  # raw data
  geom_point    (data = slopes_year_polldep,
                 aes(x    = PollDep,
                     y    = slope),
                 col   = col.pt,
                 alpha = alpha.pt,
                 position = position_jitter()) +
  
  # error bars
  geom_errorbar (data = slopes_year_polldep_sum,
                 aes(x    = PollDep,
                     ymin = slope_ci_min,
                     ymax = slope_ci_max,
                     col  = PollDep)) +
  
  # mean data
  geom_point    (data = slopes_year_polldep_sum,
                 aes(x   = PollDep,
                     y   = slope_mean,
                     col = PollDep)) + 
  
  # pairwise differences
  geom_text      (data = slopes_year_polldep_sum, 
                  aes(x     = PollDep,
                      y     = ypos(slopes_year_polldep$slope),
                      label = pairw_grp_polldep),
                  col = col.annot) +
  
  # line denoting 0
  geom_hline    (yintercept = 0,
                 col = col.stc.line) +
  
  # add labels
  labs(x        = "Group",
       y        = "Slope [days / decade] (\u00B1 95% CI)") +
  
  # add coloring
  scale_color_manual(name   = "Pollinator Dependence",
                     values = col.grp.vec) +
  
  # suppress legend
  theme_shifts(legend.position = "none")


polldep_grp_year_mean_plt + 
  labs(title    = "Between group differences in shift",
       subtitle = "over time")


# corr plot
polldep_grp_corr_plt <- ggplot() +
  
  # add 0 lines
  geom_hline     (yintercept = 0, col = col.stc.line) + 
  geom_vline     (xintercept = 0, col = col.stc.line) +
  
  # add raw data
  geom_point     (data = slopes_all_polldep,
                  aes(x = slope_year,
                      y = slope_temp),
                  col   = col.pt,
                  alpha = alpha.pt) + 
  
  # add year errorbar 
  geom_errorbarh (data = slopes_all_polldep_sum,
                  aes(y    = slope_mean_temp,
                      xmin = slope_ci_min_year,
                      xmax = slope_ci_max_year,
                      col  = PollDep
                      )) +
  
  # add temp errorbar
  geom_errorbar  (data = slopes_all_polldep_sum,
                  aes(x    = slope_mean_year,
                      ymin = slope_ci_min_temp,
                      ymax = slope_ci_max_temp,
                      col  = PollDep))  +
  
  # add group means
  geom_point     (data = slopes_all_polldep_sum,
                  aes(x    = slope_mean_year,
                      y    = slope_mean_temp,
                      col  = PollDep)) +
  
  # split the plot based on the group
  facet_wrap( ~ PollDep, nrow = 1) + 
  
  # improve axis labels 
  labs(x = "Shift over time [days / decade] (\u00B1 95% CI)",
       y = "Temperature shift [days / \u00B0C] (\u00B1 95% CI)") +
  
  # add coloring
  scale_color_manual(name   = "Pollinator dependence",
                     values = col.grp.vec) +
  
  # disable the legend
  theme_shifts(legend.position = "none")

polldep_grp_corr_plt +
  labs(title = paste("Correlation of plant shifts over time",
                     "and with temperature"),
       subtitle = "by level of pollinator dependence")

```


```{r save_plots_to_disk}

if (save_plots) {
  
  plot_dir <- paste0("plots/", analysis_dname)
  
  dir.check(plot_dir)
  
  # Records by year ---------------------------------------
  
  save_plot(paste0(plot_dir, "record_numbers_time.png"), record_dist_plot,
            bg = "transparent")
  
  # Temperature by year -----------------------------------
  
  save_plot(paste0(plot_dir, "temp_over_time.png"), time_temp_plt,
            bg = "transparent")
  
  
  # Slope forest plots ------------------------------------
  
  # modify plots for saving
  slopes_spec_temp_for_plt <- slopes_spec_temp_for_plt + 
    facet_wrap( ~ id.grp, nrow = 1, scales = "free_y")
  
  slopes_spec_year_for_plt <- slopes_spec_year_for_plt + 
    facet_wrap( ~ id.grp, nrow = 1, scales = "free_y")
  
  
  slopes_spec_all_for_pic <- plot_grid(slopes_spec_temp_for_plt,
                                       slopes_spec_year_for_plt,
                                       labels = c("A", "B"),
                                       nrow   = 2)
  
  save_plot(paste0(plot_dir, "forest_plot_slopes_all.png"), slopes_spec_all_for_pic,
            nrow = 2,
            bg = "transparent")
  
  # Correlation plot --------------------------------------
  
  # TODO: fix flags of ci whiskers
  
  # modify plot for saving
  slopes_grp_year_mean_plt <- slopes_grp_year_mean_plt +
    
    theme(plot.margin  = unit(c(70, 10, 10, 10), "bigpts"),
          axis.text.x  = element_blank(),
          axis.title.x = element_blank())
  
  
  slopes_grp_temp_mean_plt <- slopes_grp_temp_mean_plt +
    
    theme(plot.margin  = unit(c(10, 10, 10, 10), "bigpts"),
          axis.text.x  = element_blank(),
          axis.title.x = element_blank())
  
  
  slopes_grp_all_corr_plt <- slopes_grp_all_corr_plt +
    
    # ensure its only 1 row of data
    facet_wrap( ~ id.grp, nrow = 1)
  
  
  slopes_grp_year_mean_pic <- ggdraw() +
    
    # draw original plot
    draw_plot(slopes_grp_year_mean_plt) +
    
    # add pictograms for id.grps
    draw_image(image = "assets/Plant.png",
               x = 0.249, y = 0.86,
               scale = 0.25, hjust = 0.5, vjust = 0.5) +
    draw_image(image = "assets/Beetle.png",
               x = 0.408, y = 0.86,
               scale = 0.25, hjust = 0.5, vjust = 0.5) +
    draw_image(image = "assets/Hoverfly.png",
               x = 0.566, y = 0.86,
               scale = 0.25, hjust = 0.5, vjust = 0.5) +
    draw_image(image = "assets/Bee.png",
               x = 0.727, y = 0.86,
               scale = 0.25, hjust = 0.5, vjust = 0.5) +
    draw_image(image = "assets/Butterfly.png",
               x = 0.882, y = 0.86,
               scale = 0.25, hjust = 0.5, vjust = 0.5)
  
  
  slopes_all_mean_pic <- plot_grid(slopes_grp_year_mean_pic,
                                        slopes_grp_temp_mean_plt,
                                        labels = c("A", "B"),
                                        ncol = 1)
  
  slopes_all_mean_corr_pic <- plot_grid(slopes_all_mean_pic,
                                        slopes_grp_all_corr_plt,
                                        labels = c(NA, "C"),
                                        ncol = 1)
  
  # save plot as pdf and png
  save_plot(paste0(plot_dir, "slopes_correlation.pdf"),
            plot = slopes_all_mean_corr_pic,
            nrow = 3,
            bg = "transparent")
  
  save_plot(paste0(plot_dir, "slopes_correlation.png"),
            plot = slopes_all_mean_corr_pic,
            nrow = 3,
            bg = "transparent")
  
  
  # Asynchrony plots --------------------------------------
  
  
  # modify plots for saving
  asyn_int_year_mean_plt <- asyn_int_year_mean_plt +
    theme(plot.margin = unit(c(50, 10, 10, 10), "bigpts"),
          axis.text.x  = element_blank(),
          axis.title.x = element_blank())
  
  asyn_grp_year_scatter_plt <- asyn_grp_year_scatter_plt +
    theme(strip.text = element_blank())
  
  
  # add lables to plots and combine plots
  asyn_pic  <- plot_grid(asyn_int_year_mean_plt,
                         asyn_grp_year_scatter_plt,
                         labels = c("A", "B"),
                         ncol = 1)
  
  # add pictograms
  asyn_pic <- ggdraw() +
    draw_plot(asyn_pic) +
    draw_image(image = "assets/Hoverfly_and_Plant.png",
               x = 0.265, y = 0.95,
               scale = 0.25, hjust = 0.5, vjust = 0.5) +
    draw_image(image = "assets/Bee_and_Plant.png",
               x = 0.542, y = 0.95,
               scale = 0.25, hjust = 0.5, vjust = 0.5) +
    draw_image(image = "assets/Butterfly_and_Plant.png",
               x = 0.820, y = 0.95,
               scale = 0.25, hjust = 0.5, vjust = 0.5)
  
  
  # save plot as pdf and png
  save_plot(paste0(plot_dir, "interactions_all.pdf"), asyn_pic,
            nrow = 2,
            bg = "transparent")
  
  save_plot(paste0(plot_dir, "interactions_all.png"), asyn_pic,
            nrow = 2,
            bg = "transparent")
  
  
  # additionally save ins early plot for temp as supp figure
  save_plot(paste0(plot_dir, "asyn_grp_temp_scatter_plt.png"),
            asyn_grp_temp_scatter_plt,
           bg = "transparent")
  
  
  # PollDep plots -----------------------------------------
  
  
  polldep_pic <- plot_grid(polldep_grp_year_mean_plt,
                           polldep_grp_temp_mean_plt,
                           labels = c("A", "B"),
                           ncol = 1)
  
  save_plot(paste0(plot_dir, "polldep_diffs.png"), polldep_pic,
            nrow = 2,
            bg = "transparent")
  
  
  # Slope - sample size correlation plots -----------------
  
  save_plot(paste0(plot_dir, "slope_n_corr_temp.png"), 
            slope_n_corr_temp_plt,
            bg = "transparent",
            base_asp = 2)
  
  save_plot(paste0(plot_dir, "slope_n_corr_year.png"), 
            slope_n_corr_year_plt +
              facet_wrap(~ id.grp, nrow = 1),
            bg = "transparent",
            base_asp = 2)
  
  
}

```


```{r save_tables}

if (save_tables) {
  
  library("flextable")
  library("officer")
  
  tab_dir <- paste0("tables/", analysis_dname)
  
  dir.check(tab_dir)
  
  # set default section params
  def_sect_props <- prop_section(page_size = page_size(orient = "landscape"))
  
  # Record numbers and slopes ------------------------------------------
  
  # set up export docx file
  doc_file <- read_docx() %>% 
    body_set_default_section(def_sect_props)
  
  # get data and adapt for table
  n_rec <- fread(paste0(data_dir, "n_records_by_idgrp_pruned.csv"),
                 showProgress = FALSE) %>%
    
    # recode id.grp to use trivial names 
    # (recode only takes name/value pairs, so !!! expansion necessary)
    mutate(id.grp = recode_factor(id.grp, !!! recode.vec)) %>% 
    
    select(
      "\u200B"             = id.grp,
      "\u200B\u200B"       = n_species,
      "\u200B\u200B\u200B" = n_rec,
      "Min"                = min_species_rec,
      "Median"             = median_species_rec,
      "Max"                = max_species_rec)
  
  
  slopes_sum <- slopes_all_sum %>% 
    
    mutate(across(where(is.numeric), round, 2)) %>% 
    mutate(across(matches("frac_negative"), ~ paste0(.x * 100, "%"))) %>% 
    
    mutate(slope_mean_year          = paste(slope_mean_year,
                                            slope_sem_year,
                                            sep = " \u00B1 "),
           slope_mean_temp          = paste(slope_mean_temp,
                                            slope_sem_temp,
                                            sep = " \u00B1 ")) %>% 
    
    # colnames are just 0 width spaces in order to have something unique there
    #  that would show up as nothing in word
    select("\u200B"                          = id.grp,
           "Days / decade"                   = slope_mean_year,
           "p slope \u2260 0"                = p_diff_zero_adj_year,
           "% Advancing"                     = slope_frac_negative_year,
           "Days / 1 °C"                     = slope_mean_temp,
           "p slope \u2260 0\u200B"          = p_diff_zero_adj_temp,
           "% Advancing\u200B"               = slope_frac_negative_temp)
  
  
  slopes_n_rec <- left_join(n_rec, slopes_sum, by = "\u200B")
  
  
  slopes_n_rec_tab <- slopes_n_rec %>% 
    
    flextable() %>% 
    
    add_header_row(values = c("Group", "# Species", "# Records", 
                              "# Records / Species", "Temporal shifts",
                              "Temperature sensitivity"),
                   colwidths = c(1, 1, 1, 3, 3, 3)) %>%
    
    theme_booktabs(bold_header = TRUE)
  
  # add table to the docx file and save it
  body_add_flextable(doc_file, slopes_n_rec_tab) %>% 
    print(paste0(tab_dir, "slopes_nrec_tab.docx"))
  
  
  # PollDep table -------------------------------------------
  
  # set up export docx file
  doc_file <- read_docx() %>% 
    body_set_default_section(def_sect_props)
  
  polldep_tab <- slopes_all_polldep_sum %>% 
    
    mutate(across(where(is.numeric), round, 2)) %>% 
    mutate(across(matches("frac_negative"), ~ paste0(.x * 100, "%"))) %>% 
    
    mutate(slope_mean_year = paste(slope_mean_year,
                                   slope_sem_year,
                                   sep = " \u00B1 ")) %>% 
    mutate(slope_mean_temp = paste(slope_mean_temp,
                                   slope_sem_temp,
                                   sep = " \u00B1 ")) %>% 
    
    select(
      "\u200B"            = PollDep,
      "\u200B\u200B"      = n_spec_year,
      "Days / decade"     = slope_mean_year,
      "% Advancing"       = slope_frac_negative_year,
      "Days / 1 °C"       = slope_mean_temp,
      "% Advancing\u200B" = slope_frac_negative_temp) %>%
    
    flextable() %>% 
    
    add_header_row(values = c("Insect dependence", "# Species", "Temporal shifts",
                              "Temperature sensitivity"),
                   colwidths = c(1, 1, 2, 2)) %>%
    
    theme_booktabs(bold_header = TRUE)
  
  
  # add table to the docx file
  body_add_flextable(doc_file, polldep_tab) %>% 
    print(paste0(tab_dir, "polldep_tab.docx"))
  
  
  # Asyn table ----------------------------------------------
  
  # set up export docx file
  doc_file <- read_docx() %>% 
    body_set_default_section(def_sect_props)
  
  asyn_tab <- int_slopes_asyn_sum %>% 
    
    mutate(across(where(is.numeric), round, 2)) %>% 
    
    mutate(slope_year_asyn_mean = paste(slope_year_asyn_mean,
                                        slope_year_asyn_sem,
                                        sep = " \u00B1 ")) %>% 
    mutate(slope_temp_asyn_mean = paste(slope_temp_asyn_mean,
                                        slope_temp_asyn_sem,
                                        sep = " \u00B1 ")) %>% 
    
    select(
      "\u200B"                 = group,
      "\u200B\u200B"           = n_int,
      "Plant"                  = n_plant,
      "Pollinator"             = n_poll,
      "Days / decade"          = slope_year_asyn_mean,
      "p slope \u2260 0"       = p_diff_zero_temp,
      "Days / 1 °C"            = slope_temp_asyn_mean,
      "p slope \u2260 0\u200B" = p_diff_zero_year) %>% 
    
    flextable() %>% 
    
    add_header_row(values = c("Interaction Group",
                              "# Pairwise interactions",
                              "# Species",
                              "Temporal shifts",
                              "Temperature sensitivity"),
                   colwidths = c(1, 1, 2, 2, 2)) %>%
    
    theme_booktabs(bold_header = TRUE)
  
  # add table to the docx file
  doc_file <- body_add_flextable(doc_file, asyn_tab) %>% 
    print(paste0(tab_dir, "asyn_tab.docx"))
  
}
```