Skip to content

Commit

Permalink
added R scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
jakobrehmann committed Feb 1, 2024
1 parent 8550737 commit 5ce3b3f
Show file tree
Hide file tree
Showing 10 changed files with 690 additions and 0 deletions.
83 changes: 83 additions & 0 deletions src/main/R/BaseSuscepvsPandemicSize.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
library(tidyverse)

enter_path_here <- "" #Here one needs to enter the path to the folder of the modeling results
setwd(enter_path_here)

# Reading in of files, the user needs to change line 9 according to the network they are interested in
files <- list.files(path=enter_path_here, pattern="*recovered", full.names=FALSE, recursive=FALSE)
files <- files[files != paste0(enter_path_here, "_info.csv")]

dataSetFull <- data.frame()

for (i in 1:(length(files))){
filesReduced <- files[i]
for (file in filesReduced){
dataSetNew <- read.csv(file)
dataSetNew$ID <- seq.int(nrow(dataSetNew))
dataSetNew$Network <- str_split(file, "-")[[1]][[1]]
dataSetNew$BaseSuscep <- str_split(file, "-")[[1]][[2]]
dataSetNew$Strategy <- str_split(file, "-")[[1]][[3]]
if (length(str_split(file, "-")[[1]]) == 5) {
dataSetNew$lag <- str_split(file, "-")[[1]][[5]]
} else {
dataSetNew$lag <- 0
}
dataSetNew <- pivot_longer(dataSetNew, cols = seed1:seed100)
dataSetFull <- rbind(dataSetFull, dataSetNew)
}
}

#dataSetFull <- read.csv("/Users/sydney/Desktop/10000nodesdata/dataSetFullBaseSuscepvsPandemicsize.csv")

dataSetGrouped <- dataSetFull
noTimeSteps <- max(dataSetGrouped$ID)
dataSetGrouped <- dataSetGrouped %>% filter(ID == noTimeSteps) %>%
filter(value > 1) %>%
group_by(BaseSuscep, ID, Strategy, lag, Network) %>%
summarise(mean = mean(value), sd = sd(value))
dataSetGrouped <- dataSetGrouped %>% ungroup()


# ggplot(dataSetGrouped, aes(x = Strategy, y = mean)) +
# geom_point() +
# ylab("Pandemic size (# of recovered, after 200 steps") +
# xlab("Considered szenario") +
# theme_minimal() +
# facet_wrap(~BaseSuscep) +
# ggtitle(network_top) +
# theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
#ggsave(paste0(str_split(file, "-")[[1]][[1]], "ScenarioVsPandemicSize.pdf"), dpi = 500, w = 9, h = 6)

dataSetGrouped$BaseSuscep <- as.double(dataSetGrouped$BaseSuscep)

dataSetGrouped <- dataSetGrouped %>% filter(Strategy != "local2") %>% filter(Strategy != "global_local1_and_2") %>% filter(Strategy != "global_local1")

for (network in unique(dataSetGrouped$Network)) {
for (consideredlag in unique(dataSetGrouped$lag)) {
ggplot(dataSetGrouped %>% filter(Network == network) %>% filter(lag == consideredlag), aes(x = BaseSuscep, y = mean)) +
geom_line(aes(color = Strategy), size = 4.5) +
scale_color_brewer(palette = "Dark2") +
ylab("Pandemic Size") +
xlab("Infection Probability \n per Single Contact") +
#scale_y_log10(breaks = c(1, 10, 100, 1000,10000), labels = c(1, 10, 100, 1000,10000)) +
theme_minimal() +
#ggtitle(network_top) +
theme(text = element_text(size = 60)) +
scale_x_continuous(breaks = seq(from = 0.1, to = 0.3, by = 0.05)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
theme(legend.position = "right", legend.title = element_blank()) +
theme(axis.ticks.x = element_line(),
axis.ticks.y = element_line(),
axis.ticks.length = unit(5, "pt")) +
theme(axis.title.x = element_text(margin = margin(t = 10, r = 20, b = 0, l = 10)))

if (length(str_split(file, "-")[[1]]) == 5) {
ggsave(paste0(as.character(network), "-", as.character(consideredlag), "-BaseSuscepVsPandemicSize.pdf"), dpi = 500, w = 12.5, h = 9.5)
} else {
ggsave(paste0(as.character(network), "-Susceptible-BaseSuscepVsPandemicSize.pdf"), dpi = 500, w = 12.5, h = 9.5)
}

}
}


141 changes: 141 additions & 0 deletions src/main/R/BaseSuscepvsPeak.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
library(tidyverse)

enter_path_here <- "" #Here one needs to enter the path to the folder of the modeling results
setwd(enter_path_here)

# Reading in of necessary modeling results
files <- list.files(path=enter_path_here, pattern="*infectious", full.names=FALSE, recursive=FALSE)
files <- files[files != paste0(enter_path_here, "_info.csv")]

dataSetFull <- data.frame()
for(i in 1:(length(files))){
filesReduced <- files[i]

for(file in filesReduced){
dataSetNew <- read.csv(file)
dataSetNew$ID <- seq.int(nrow(dataSetNew))
dataSetNew$Network <- str_split(file, "-")[[1]][[1]]
dataSetNew$BaseSuscep <- str_split(file, "-")[[1]][[2]]
dataSetNew$Strategy <- str_split(file, "-")[[1]][[3]]
if (length(str_split(file, "-")[[1]]) == 5) {
dataSetNew$lag <- str_split(file, "-")[[1]][[5]]
} else {
dataSetNew$lag <- 0
}
dataSetNew <- pivot_longer(dataSetNew, cols = seed1:seed100)

#Removing the runs where the infection dies out directly after the initial seed
removed <- dataSetNew %>% group_by(name) %>% summarise(cumsuminfected = (max(value)))
removed <- removed %>% filter(cumsuminfected == 1)
test <- removed$name
dataSetNew <- dataSetNew %>% filter(!name %in% test)

dataSetFull <- rbind(dataSetFull, dataSetNew)
}
}

#Preparing the data for plotting
dataSetGrouped <- dataSetFull %>%
group_by(BaseSuscep, Strategy, ID, lag, Network) %>%
summarise(value = mean(value))
dataSetGrouped <- dataSetGrouped %>%
group_by(BaseSuscep, Strategy, lag, Network) %>%
summarise(maxInfected = max(value), timePointMax = which.max(value))

# ggplot(dataSetGrouped, aes(x = Strategy, y = maxInfected)) +
# geom_point() +
# xlab("weight") +
# theme_minimal() +
# facet_wrap(~ BaseSuscep) +
# #ggtitle(network_top) +
# xlab("Considered Scenario") +
# ylab("Peak height") +
# theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
#ggsave(paste0(str_split(file, "-")[[1]][[1]], "ScenarioVsMaxInfected.pdf"), dpi = 500, w = 9, h = 6)

# ggplot(dataSetGrouped, aes(x = Strategy, y = timePointMax)) +
# geom_point() +
# xlab("weight") +
# theme_minimal() +
# facet_wrap(~ BaseSuscep) +
# #ggtitle(network_top) +
# xlab("Considered Scenario") +
# ylab("Timing of peak") +
# theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

#ggsave(paste0(str_split(file, "-")[[1]][[1]], "ScenarioVsPeakTiming.pdf"), dpi = 500, w = 9, h = 6)

# dataSetGrouped <- dataSetFull %>%
# group_by(BaseSuscep, DiseaseState, name) %>%
# summarise(maxInfected = max(value), sd = sd(value)) %>%
# filter(maxInfected > 5)
# #
# ggplot(dataSetGrouped, aes(x = Strategy, y = maxInfected)) +
# geom_boxplot() +
# facet_wrap(~BaseSuscep) +
# theme_minimal()

dataSetGrouped$BaseSuscep <- as.numeric(dataSetGrouped$BaseSuscep)

dataSetGrouped <- dataSetGrouped %>% filter(Strategy != "local2") %>% filter(Strategy != "global_local1_and_2") %>% filter(Strategy != "global_local1")

#Creating and saving a plot that displays the infection probability per single contact vs. the peak height
for(network in unique(dataSetGrouped$Network)){
for(consideredlag in unique(dataSetGrouped$lag)) {
ggplot(dataSetGrouped %>% filter(Network == network) %>% filter(lag == consideredlag), aes(x = BaseSuscep, y = maxInfected)) +
geom_line(aes(color = Strategy), size = 4.5) +
scale_color_brewer(palette = "Dark2") +
ylab("Peak Height") +
#scale_y_log10() +
xlab("Infection Probability \n per Single Contact") +
theme_minimal() +
#ggtitle(network_top) +
scale_x_continuous(breaks = seq(from = 0.1, to = 0.3, by = 0.05)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
theme(text = element_text(size = 60)) +
theme(legend.position = "bottom", legend.title = element_blank()) +
theme(axis.ticks.x = element_line(),
axis.ticks.y = element_line(),
axis.ticks.length = unit(5, "pt")) +
theme(axis.title.x = element_text(margin = margin(t = 10, r = 20, b = 0, l = 10))) +
guides(fill = FALSE, color = FALSE)
#guides(color=guide_legend(nrow=2, byrow=TRUE))

if (length(str_split(file, "-")[[1]]) == 5){
ggsave(paste0(as.character(network), "-", as.character(consideredlag), "-BaseSuscepVsMaxInfected.pdf"), dpi = 500, w = 9.5, h = 9.5)
}else {
ggsave(paste0(as.character(network), "-BaseSuscepVsMaxInfected.pdf"), dpi = 500, w = 9.5, h = 9.5)
}
}
}

#Creating and saving a plot that displays the infection probability per single contact vs. the time step of the peak
for(network in unique(dataSetGrouped$Network)){
for(consideredlag in unique(dataSetGrouped$lag)){
ggplot(dataSetGrouped %>% filter(Network == network) %>% filter(lag == consideredlag), aes(x = BaseSuscep, y = timePointMax)) +
geom_line(aes(color = Strategy), size = 4.5) +
scale_color_brewer(palette = "Dark2") +
ylab("Time Step \n of Peak") +
xlab("Infection Probability \nper Single Contact") +
theme_minimal() +
# scale_y_log10() +
#ggtitle(network_top) +
theme(text = element_text(size = 57)) +
scale_x_continuous(breaks = seq(from = 0.1, to = 0.3, by = 0.05)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
theme(legend.position = "bottom", legend.title = element_blank()) +
theme(axis.ticks.x = element_line(),
axis.ticks.y = element_line(),
axis.ticks.length = unit(5, "pt")) +
theme(axis.title.x = element_text(margin = margin(t = 10, r = 70, b = 0, l = 10))) +
guides(fill = FALSE, color = FALSE)
#guides(color=guide_legend(nrow=2, byrow=TRUE))

if (length(str_split(file, "-")[[1]]) == 5){
ggsave(paste0(as.character(network), "-", as.character(consideredlag), "-BaseSuscepVsPeakTiming.pdf"), dpi = 500, w = 9.5, h = 9.5)
} else {
ggsave(paste0(as.character(network), "-BaseSuscepVsPeakTiming.pdf"), dpi = 500, w = 9.5, h = 9.5)
}

}
}
37 changes: 37 additions & 0 deletions src/main/R/DensityPlot.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
enter_path_here <- "/Users/sydney/Dropbox/JAKOB-SYDNEY/2023-12-04T141138-0.5"
setwd(enter_path_here)

files <- list.files(path=enter_path_here, pattern="*.csv", full.names=FALSE, recursive=FALSE)
files <- files[files != "_info.csv"]
files <- files[!grepl("smallworld", files)]

for(i in 1:(length(files)/4)){
lowerCounter <- 1+(i-1)*4
upperCounter <- 4*i
filesReduced <- files[lowerCounter:upperCounter]

dataSetFull <- data.frame()

for(file in filesReduced){
dataSetNew <- read.csv(file)
dataSetNew$ID <- seq.int(nrow(dataSetNew))
dataSetNew$DiseaseState <- str_split(file, "-")[[1]][[4]]
dataSetNew <- pivot_longer(dataSetNew, cols = seed1:seed100)
dataSetFull <- rbind(dataSetFull, dataSetNew)
}

noTimeSteps <- max(dataSetFull$ID)
dataSetFull <- dataSetFull %>% filter(DiseaseState == "recovered.csv") %>% filter(ID == noTimeSteps)


plot <- ggplot(dataSetFull, aes(x=value)) +
geom_density(color="darkblue", fill="lightblue", alpha = 0.2) +
theme_minimal() +
xlab("Final size") +
ylab("Density")


file_name <- paste0(str_split(file, "-")[[1]][[1]], "-DensityPlot.pdf")
ggsave(file_name, plot, dpi = 500, w = 9, h = 4.5)
}

103 changes: 103 additions & 0 deletions src/main/R/ICurves.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
library(tidyverse)

enter_path_here <- "" #Here one needs to enter the path to the folder of the modeling results
setwd(enter_path_here)

#Reading in of the necessary data
files <- list.files(path=enter_path_here, pattern="*infectious-0.csv", full.names=FALSE, recursive=FALSE) #Right now, the curves are produced for a 0-day lag. The pattern needs to be adapted if the user is interested in alternative lags.
files <- files[files != "_info.csv"]
files <- files[!str_detect(files, "infectionChance")]
#pattern <- c("-0.1-", "-0.2-", "-0.3-")
pattern <- c("-0.3-")
files <- files[grepl(paste(pattern, collapse = "|"), files)]

dataSetFull <- data.frame()

for(file in files){
dataSetNew <- read.csv(file)
dataSetNew$ID <- seq.int(nrow(dataSetNew))
dataSetNew$Network <- str_split(file, "-")[[1]][[1]]
dataSetNew$BaseProb <- str_split(file, "-")[[1]][[2]]
dataSetNew$DiseaseState <- str_split(file, "-")[[1]][[3]]
#dataSetNew$DiseaseState <- str_remove(dataSetNew$DiseaseState, ".csv")
if (length(str_split(file, "-")[[1]]) == 5) {
dataSetNew$lag <- str_split(file, "-")[[1]][[5]]
} else {
dataSetNew$lag <- 0
}
#dataSetNew$DiseaseState <- substr(dataSetNew$DiseaseState,1,nchar(dataSetNew$DiseaseState)-4)
dataSetNew <- pivot_longer(dataSetNew, cols = seed1:seed100)

#Removing the runs where the infection dies out directly after the initial seed
removed <- dataSetNew %>% group_by(name) %>% summarise(cumsuminfected = (max(value)))
removed <- removed %>% filter(cumsuminfected == 1)
test <- removed$name
dataSetNew <- dataSetNew %>% filter(!name %in% test)
dataSetFull <- rbind(dataSetFull, dataSetNew)
}

# The following line of codes plot the infectious curve for each seed
# This tests if the different seeds are converging
# for(network in unique(dataSetFull$Network)){
# for(baseprob in unique(dataSetFull$BaseProb)){
# for(strategy in unique(dataSetFull$DiseaseState)){
# for(lag in unique(dataSetFull$lag)){
# plot <- ggplot(dataSetFull %>% filter(Network == network) %>% filter(BaseProb == baseprob) %>% filter(DiseaseState == strategy), aes(x=ID, y = value)) +
# xlab("Time step") +
# ylab("Number of infectious agents") +
# geom_line(aes(color = name), alpha = 0.2) +
# theme_minimal() +
# theme(legend.position = "bottom", legend.title = element_blank()) +
# guides(fill=FALSE, color=FALSE) +
# theme(text = element_text(size = 45)) +
# theme(axis.ticks.x = element_line(),
# axis.ticks.y = element_line(),
# axis.ticks.length = unit(5, "pt"))

# file_name <- paste0(as.character(network),"-", as.character(dataSetGrouped$lag), "-", as.character(baseprob), "-", as.character(strategy), "-ICurvesDifferentSeeds.pdf")
# ggsave(file_name, plot, dpi = 500, w = 15, h = 10)
# }
# }
# }
# }

#Data pprep for plotting
dataSetGrouped <- dataSetFull %>% group_by(Network, DiseaseState, ID, lag, BaseProb) %>% summarise(mean = mean(value), sd = sd(value))
dataSetGrouped <- dataSetGrouped %>% ungroup()
dataSetGrouped <- dataSetGrouped %>% mutate(ymin = case_when(mean-sd > 0 ~ mean-sd,
.default = 0)) %>%
mutate(lag = case_when(lag == "0.csv" ~ 0,
lag == "1.csv" ~ 1,
lag == "2.csv" ~ 2,))

dataSetGrouped <- dataSetGrouped %>% mutate(BaseProbLabel = case_when(
BaseProb == 0.3 ~ "Infection probability = 0.3",
BaseProb == 0.2 ~ "Infection probability = 0.2",
BaseProb == 0.1 ~ "Infection probability = 0.1"))
dataSetGrouped$BaseProbLabel <- as.factor(dataSetGrouped$BaseProbLabel)
dataSetGrouped$BaseProbLabel <- ordered (dataSetGrouped$BaseProbLabel, levels = c("Infection probability = 0.3", "Infection probability = 0.2","Infection probability = 0.1"))


#Plots are created and saved for different networks
for(network in unique(dataSetGrouped$Network)){
plot <- ggplot(dataSetGrouped %>% filter(DiseaseState != "local2") %>% filter(DiseaseState != "global_local1_and_2") %>% filter(DiseaseState != "global_local1") %>% filter(Network == network), aes(x=ID, y = mean)) +
geom_line(aes(color = DiseaseState), size = 4.5) +
xlab("Time Step") +
ylab("No. of \n Infectious Agents") +
#scale_y_log10(breaks = c(1, 10, 100, 1000,10000), labels = c(1, 10, 100, 1000,10000)) +
xlim(c(0, 110)) +
theme_minimal() +
theme(legend.position = "none", legend.title = element_blank()) +
theme(axis.ticks.x = element_line(),
axis.ticks.y = element_line(),
axis.ticks.length = unit(5, "pt")) +
scale_color_brewer(palette = "Dark2") +
theme(text = element_text(size = 60)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
theme(axis.title.x = element_text(margin = margin(t = 10, r = 20, b = 0, l = 10)))
# theme(axis.title.y = element_text(margin = margin(t = 200, r = 0, b = 0, l = 20)))

file_name <- paste0(as.character(network),"-", unique(dataSetGrouped$lag), "lag-ICurves.pdf")
ggsave(file_name, plot, dpi = 500, w = 9.5, h = 9.5)
}

Loading

0 comments on commit 5ce3b3f

Please sign in to comment.