Forecasts.Rmd

---
title: "Cases and Deaths"
date: "`r format(Sys.time(), '%A %b %d, %Y %r')`"
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = FALSE, warning=FALSE, message=FALSE)
```

```{r, message=FALSE, warning=FALSE, include=FALSE}
library(CoV19)
library(ggplot2)
library(gridExtra)
```

Oct 2020: I removed the forecasts, which were based on fitting a log-normal shape curve. That approach worked for epidemics that were contained but not for the sputtering continuous spread observed in many US states.

<!-- These are forecast by fitting a log-normal shape curve the new positive cases and estimating deaths using the 7-lag CFR.  The 95% confidence intervals on the forecasts are shown.  -->

<!-- These are just toy forecasts to try to understand the behavior of statistical forecasts using only a log-normal fit to new cases.  This is somewhat similar to the strategy used in the [UW model](https://covid19.healthdata.org/) except they fit to deaths and I am fitting to a 7-day average of reported positives (I had been using 3-day previously but switched to 7-day on May 1). -->

<!-- These are just experiments to understand the nature of these kind of fits.  Don't put any stock whatsoever into these 'forecasts'. If the new cases are still going up exponentially, then the model has no information about where the peak might be and the model cannot fit. In that case, I show the new cases only. -->

<!-- If the model fits, then the blue dots are the smoothed (7-day average) new cases to which the model was fit. -->

```{r thenormfuncs}
# estimate mu, sigma and scaling
f2<-function(x, theta) {
  m<-theta[1]; s<-theta[2]; a<-theta[3];
  a*exp(-0.5*((x-m)/s)^2)
}
f2<-function(x, theta) {
  m<-theta[1]; s<-theta[2]; a<-theta[3];
  x <- x-m
  a*exp(-0.5*(x/s)^2)
}
# fix m
f3<-function(x, theta, m) {
  s<-theta[1]; a<-theta[2];
  a*exp(-0.5*((x-m)/s)^2)
}
# fix s
f4<-function(x, theta, s) {
  m<-theta[1]; a<-theta[2];
  a*exp(-0.5*((x-m)/s)^2)
}
f1<-function(x, theta, a) {
  m<-theta[1]; s<-theta[2];
  a*exp(-0.5*((x-m)/s)^2)
}
```

```{r fitf1}

fitf1<- function(subx, m=5, a=1, s=0.08){
  # m is actual m not log(day)
  f1<-function(x, theta, a) {
    m<-theta[1]; s<-theta[2];
    ifelse(s<0,-1,1)*a*exp(-0.5*((x-m)/s)^2)
  }
  f1<-function(x, theta, a) {
    m<-theta[1]; s<-theta[2];
    x <- x-m
    ifelse(s<0,-1,1)*a*exp(-0.5*(x/s)^2)
  }
  for(m in c(m, seq(.5,2,.1)*m)){
    fit1<-try(nls(y~f1(x,c(m,s),a), data=subx, start=list(m=m, s=s), control = list(maxiter = 500), algorithm="port", lower=c(0,0.03)), silent=TRUE)
    if(!inherits(fit1, "try-error")) break
  }
  return(fit1)
}

signx <- function(x) return(ifelse(x<0,-1,1))

getcis <- function(fit.best, subx, m=20, s=0.2, sig=0.05, level=0.05){
  if(coef(fit.best)[3] > 20) return(matrix(NA,3,2))
  m <- coef(fit.best)[1]
  s <- coef(fit.best)[2]
  sig <- max(summary(fit.best)$sigma, sig)
  err.best <- residuals(fit.best)
  n <- length(err.best)
  val=c()
  aval <- seq(.5, 10*coef(fit.best)[3],0.1)
  for(a in aval){
    fit <- fitf1(subx, a=a, m=m, s=s)
    err <- residuals(fit)
    rss=sum(err^2, na.rm=TRUE)
    Fval <- (rss-sum(err.best^2, na.rm=TRUE))/sig^2
    tauval <- signx(a-coef(fit.best)[3])*sqrt(Fval)
    if(!is.na(tauval) && tauval>3 && tauval<5) break #catch bad fits with < 5
    if(is.na(tauval)) next
    val <- rbind(val, c(rss=rss, tau=tauval,
                        coef(fit),a=a))
  }
  val <- val[val[,"tau.a"]> -5 & val[,"tau.a"]< 5,]
  mina <- max(which(val[,"tau.a"]<qt(level/2,n-3)),1)
  maxa <- min(which(val[,"tau.a"]>qt(1-level/2,n-3)),nrow(val))
  ret <- cbind(val[mina,c("m", "s", "a")], val[maxa,c("m", "s", "a")])
  if(ret[1,2]<m) ret[,2] <- NA
  if(ret[1,1]>m) ret[,2] <- NA
  return(cbind(val[mina,c("m", "s", "a")], val[maxa,c("m", "s", "a")]))
}
```

```{r getfit}
# Get the fit
getfit <- function(x, posmin=100, maxh=200, fun="f2", s.fix=0.08, sig=0.05, fitit=TRUE, fil=7, sub=1){
  x$new.cases.smooth <- stats::filter(x$new.cases, rep(1/fil,fil))
  x$new.cases.smooth <- c(rep(NA,floor(fil/2)),zoo::rollapply(x$new.cases, fil, mean, na.rm=TRUE), rep(NA,floor(fil/2)))
  day1 <- min(which(x$positive>posmin))
  x$day <- 1:nrow(x) - day1 + 1
  x$x <- log(x$day)
  
  subx <- subset(x, positive>posmin)[,c("date", "positive", "region", "new.cases", "new.cases.smooth", "day", "x")]
  subx <- na.omit(subx)
  subx$x <- log(subx$day)
  subx$y <- subx$new.cases.smooth*subx$day
  maxy <- max(subx$y, na.rm=TRUE)
  subx$y <- subx$y/maxy
  subrange <- seq(nrow(subx) %% sub, nrow(subx),sub)
  subrange <- subrange[subrange!=0]
  subx <- subx[subrange,]
  
  if(!fitit) return(list(x=x, subx=subx))
  
  for(s in c(0.05,.1,1)){
    for(m in seq(10,200,25)){
      if(fun=="f2") fit<-try(nls(y~f2(x,c(m,s,a)), data=subx, start=list(m=log(m), s=s, a=max(subx$y, na.rm=TRUE)), control = list(maxiter = 500)), silent=TRUE)
      if(fun=="f4") fit<-try(nls(y~f4(x,c(m,a), s.fix), data=subx, start=list(m=log(m), a=max(subx$y, na.rm=TRUE)), control = list(maxiter = 500)), silent=TRUE)
      if(!inherits(fit, "try-error")) break()
    }
    if(!inherits(fit, "try-error")) break()
  }
  if(inherits(fit, "try-error")) return(list(x=x, pred=NULL, fit=fit))
  tmp <- predict(fit, newdata=x)*maxy/x$day
  nNAs <- nrow(x)-length(tmp)
  # subx cuts off period where positive < posmin
  x$fitted <-c(rep(NA, nNAs), tmp)
  
  pred <- data.frame(
    day=max(x$day)+1:maxh,
    date=max(x$date)+1:maxh)
  pred$x <- log(pred$day)
  pred$pred <- predict(fit, newdata=pred)*maxy/pred$day
  
  cm1=NA; cm2=NA
  confval1 <- try(confint(fit, level=0.95), silent=TRUE)
  if(inherits(confval1, "try-error") || any(is.na(confval1)) ){ 
    confval <- try(getcis(fit.best=fit, subx=subx, level=0.05, sig=sig))
    if(!inherits(confval1, "try-error") && !inherits(confval, "try-error") && !any(any(is.na(confval1[,1])))) confval[,1] <- confval1[,1]
    if(!inherits(confval1, "try-error") && !inherits(confval, "try-error") && !any(any(is.na(confval1[,2])))) confval[,2] <- confval1[,2]
  }else{ confval <- confval1 }
  
  if(!inherits(confval, "try-error")){
    if(fun=="f2"){
      cm1 <- sum(f2(log(pred$day), confval[,1])*maxy/pred$day)
      cm2 <- sum(f2(log(pred$day), confval[,2])*maxy/pred$day)
      pred$pred.low <- f2(log(pred$day), confval[,1])*maxy/pred$day
      pred$pred.high <- f2(log(pred$day), confval[,2])*maxy/pred$day
      x$fitted.low <- f2(x$x, confval[,1])*maxy/x$day
      x$fitted.high <- f2(x$x, confval[,2])*maxy/x$day
    }else{
      cm1 <- sum(f4(log(pred$day), confval[,1], s.fix)*maxy/pred$day)
      cm2 <- sum(f4(log(pred$day), confval[,2], s.fix)*maxy/pred$day)
      pred$pred.low <- f4(log(pred$day), confval[,1], s.fix)*maxy/pred$day
      pred$pred.high <- f4(log(pred$day), confval[,2], s.fix)*maxy/pred$day
      x$fitted.low <- f4(x$x, confval[,1], s.fix)*maxy/x$day
      x$fitted.high <- f4(x$x, confval[,2], s.fix)*maxy/x$day
    }
    
  }
  
  return(list(x=x, subx=subx, pred=pred, fit=fit, confval=confval, cm=sort(c(cm1, cm2))))
}
```

```{r myfun}
myfun <- function(data, reg, regname="", posmin=100, maxh=200, mindate="2020-02-15", maxdate="2020-12-01", bad.reg=NULL, fun="f2", s.fix=0.08, sig=0.05, fitit=FALSE, fil=7, sub=4){
  mindate <- as.Date(mindate)
  maxdate <- as.Date(maxdate)
  library(dplyr)
  b <- data %>% 
      subset(region %in% reg) %>%
      dplyr::group_by(date) %>%
      dplyr::summarize_if(is.numeric, function(x){ifelse(all(is.na(x)), NA, sum(x, na.rm=TRUE))})
    # This will not have the region column, so we add that back on
  if(missing(regname)) regname <- paste(reg, collapse="+")
  b$region <- regname
  b <- b[!is.na(b$positive),]

  #b<-subset(data, region==reg)
  if(max(b$positive, na.rm=TRUE)<500) return()
  day1 <- min(which(b$positive>posmin))
  
  b$new.cases <- c(NA, diff(b$positive))
  b$new.deaths <- c(NA, diff(b$death))
  b$new.cases[b$new.cases<0] <- NA
  if(identical(reg, "Hubei China")) b$new.cases[b$new.cases>10000]=NA #hack 
  #if(identical(reg, "France")) b$new.cases[b$new.cases>10000]=NA #hack
  #if(identical(reg, "Hungary")) b$new.cases[b$new.cases>200]=NA #hack
  cfr <- mean(b$death[(nrow(b)-5):nrow(b)]/b$positive[(nrow(b)-5-7):(nrow(b)-7)], na.rm=TRUE)
  
  fit <- getfit(b, posmin=posmin, maxh=maxh,
                fun=fun, s.fix=s.fix, sig=sig, fitit=fitit,
                fil=fil, sub=sub)
  if(!fitit || inherits(fit$fit, "try-error") || all(is.na(fit$confval))){
    b <- fit$x
    bm <- 1.05*max(b$new.cases, na.rm=TRUE)
    popmill <- round(sum(popdata$population[match(unique(c(reg, regname)), popdata$name)], na.rm=TRUE)/1e6)
    cfr <- mean(b$death[(nrow(b)-5):nrow(b)]/b$positive[(nrow(b)-5-7):(nrow(b)-7)], na.rm=TRUE)
    b2 <- subset(b, months(b$date) == "April")
    cfr.apr <- sum(b2$new.deaths[7:nrow(b2)],na.rm=TRUE)/sum(b2$new.cases[1:(nrow(b2)-7)], na.rm=TRUE)
    b2 <- subset(b, months(b$date) == "September")
    cfr.sep <- sum(b2$new.deaths[7:nrow(b2)],na.rm=TRUE)/sum(b2$new.cases[1:(nrow(b2)-7)], na.rm=TRUE)
    b2 <- subset(b, months(b$date) == "October")
    cfr.oct <- sum(b2$new.deaths[7:nrow(b2)],na.rm=TRUE)/sum(b2$new.cases[1:(nrow(b2)-7)], na.rm=TRUE)
    b2 <- subset(b, months(b$date) == "November")
    cfr.nov <- sum(b2$new.deaths[7:nrow(b2)],na.rm=TRUE)/sum(b2$new.cases[1:(nrow(b2)-7)], na.rm=TRUE)
    p <- ggplot(b, aes(x = date, y = new.cases.smooth)) + geom_point() +
      ggtitle(paste0(regname, "\n", "CFR Apr = ",
                    round(100*cfr.apr,digits=2), "  Sep = ",
                    round(100*cfr.sep,digits=2), "  Oct = ",
                    round(100*cfr.oct,digits=2), "  Nov = ",
                    round(100*cfr.nov,digits=2))) +
      scale_x_date(limits = c(mindate, maxdate), date_breaks="1 month", 
                   date_labels="%b")
    #y=bm
    p <- p + annotate("text", x=mindate+1, y=Inf, label=paste(
      "\n\nDeaths to date\n", max(b$death, na.rm=TRUE), " ", 
      round(max(b$death, na.rm=TRUE)/(popmill*1000), digits=2), "per thousand"), hjust=0)
    return(list(p=p, cfr=cfr))
  }
  
  # updated with fitted
  b <- fit$x
  pred <- fit$pred
  bm <- 1.05*max(b$new.cases.smooth, pred$pred, pred$pred.high, na.rm=TRUE)
  
  p <- ggplot(b, aes(x = date, y = new.cases.smooth)) + geom_point(col="grey") + 
    geom_point(data=fit$subx, col="blue") +
    geom_line(aes(x=date, y=fitted), data=b) +
    geom_line(aes(x=date, y=pred), data=pred, color="red") +
    ggtitle(regname) +
    scale_x_date(limits = c(mindate, maxdate), date_breaks="1 month", 
                 date_labels="%b")
  
  npos <- max(b$positive, na.rm=TRUE)+sum(pred$pred, na.rm=TRUE)
  ndeaths.p <- max(b$death, na.rm=TRUE)+
    sum(b$new.cases[(nrow(b)-7):nrow(b)], na.rm=TRUE)*cfr
  ndeaths <- ndeaths.p +
    sum(pred$pred, na.rm=TRUE)*cfr
  
  if(!inherits(fit$confval, "try-error") && !reg%in%bad.reg && !any(is.na(fit$confval))){
    #mode of log normal is 
    mode.lims <- sort(round(exp(fit$confval[1,]-fit$confval[2,]^2)))
    df <- data.frame(x=b$date[1]+day1+mode.lims-1, y=c(bm,bm))
    #p <- p + geom_line(data=df, aes(x=x, y=y), size=5, color="grey")
    if(fun=="f2") cfs <- coef(fit$fit)
    if(fun=="f4") cfs <- c(coef(fit$fit)[1], s.fix, coef(fit$fit)[2])
    popmill <- round(sum(popdata$population[match(unique(c(reg, regname)), popdata$name)], na.rm=TRUE)/1e6)
    p <- p +
      ggtitle(paste0(regname, "\nEst at Sept 1: Positives = ",
                    round(npos),
                    " Deaths = ", round(ndeaths), " (", 
                    round(ndeaths/(popmill*1000), digits=2), "/thousand)\n",
                    "Estimate range: Deaths low ", 
                    round(ndeaths.p+fit$cm[1]*cfr), 
                    " to Deaths high ", 
                    round(ndeaths.p+fit$cm[2]*cfr), "\n",
                    "CFR Est = ",
                    round(100*cfr,digits=2)))
    fit$pred$new.cases.smooth <- fit$pred$pred
    p <- p +
      geom_ribbon(data=fit$pred, aes(x=date, ymin = pred.low, ymax = pred.high), fill = "pink") +
      geom_line(data=fit$pred, aes(x=date, y=pred.low), linetype="dashed", color="red") +
      geom_line(data=fit$pred, aes(x=date, y=pred.high), linetype="dashed", color="red") + 
      geom_line(data=fit$pred, aes(x=date, y=pred), color="red")
    p <- p + annotate("text", x=mindate+1, y=Inf, label=paste("\nDeaths to date\n", max(b$death, na.rm=TRUE)," ", round(max(b$death, na.rm=TRUE)/(popmill*1000),digits=2), "per thousand"), hjust=0, vjust=1)
    
    
  }else{
    cfs <- coef(fit$fit)
    
    p <- p +
      ggtitle(paste(regname, "\nPoint estimates: Positives =", round(npos),
                    "Deaths =", round(ndeaths), "CFR =",
                    round(100*cfr,digits=2), "\n",
                    "s = ", round(cfs[2],3), "sig = ", round(summary(fit$fit)$sigma,3)))
    p <- p + annotate("text", x=mindate+1, y=bm, label=paste("\nDeaths to date\n", max(b$death, na.rm=TRUE)), hjust=0)
    
  }
  
  return(list(p=p, fit=fit$fit, confint=fit$confval, data=b, x=fit$x, subx=fit$subx, cfr=cfr))
}
```

## Lombardia

Lombardia is my case study area. Population size 10 million (2019).

```{r Lomb-forecast}
reg <- "Lombardia"
p <- myfun(italy, reg)
popmill <- round(sum(popdata$population[match(reg, popdata$name)], na.rm=TRUE)/1e6, digits=2)
if(inherits(p, "list")){
  p$p <- p$p+annotate("text", x=as.Date("2020-09-01"), y=Inf, label=paste("population =", popmill, "million"), hjust=1, vjust=2)
  p$p <- p$p+ scale_y_continuous(sec.axis = sec_axis(~ ./popmill, name="new cases/million"))
 plot(p$p)
}
```

# European Countries

```{r worldplotfun}
worldplot <- function(reg, posmin=100, maxh=150, fitit=FALSE){
popmill <- round(sum(popdata$population[match(reg, popdata$name)], na.rm=TRUE)/1e6, digits=2)
p <- myfun(world, reg, posmin=posmin, maxh=maxh, fitit=fitit)
if(inherits(p, "list")){
  p$p <- p$p+annotate("text", x=as.Date("2020-09-01"), y=Inf, label=paste("population =", popmill, "million"), hjust=1, vjust=2)
  p$p <- p$p+ scale_y_continuous(sec.axis = sec_axis(~ ./popmill, name="new cases/million"))
 plot(p$p)
}
}
```

## Scandinavia

```{r  Scandinavia-forecast}
for(i in c("Finland", "Norway", "Sweden")){
  worldplot(i)
}
```

```{r  denmark-forecast}
i="Denmark"
p <- myfun(world, i, posmin=100, maxh=150)
if(inherits(p, "list")){ 
  p$p <- p$p+geom_vline(xintercept = as.Date("2020-04-15")); 
  p$p <- p$p+annotate("text", x=as.Date("2020-04-15"), y=1, label="primary schools open", hjust=0)
  popmill <- round(sum(popdata$population[match(i, popdata$name)], na.rm=TRUE)/1e6, digits=2)
  p$p <- p$p+annotate("text", x=as.Date("2020-09-01"), y=Inf, label=paste("population =", popmill, "million"), hjust=1, vjust=2) +
    scale_y_continuous(sec.axis = sec_axis(~ ./popmill, name="new cases/million"))
  p$p 
  }
```

## British Isles

```{r  UK-forecast}
i="Ireland"
p <- myfun(world, i, posmin=100, maxh=150)
p$p <- p$p+geom_vline(xintercept = as.Date("2020-10-21")); 
p$p <- p$p+annotate("text", x=as.Date("2020-10-21"), y=1, label="fall 6-wk lockdown", hjust=0)
popmill <- round(sum(popdata$population[match(i, popdata$name)], na.rm=TRUE)/1e6, digits=2)
p$p <- p$p+annotate("text", x=as.Date("2020-09-01"), y=Inf, label=paste("population =", popmill, "million"), hjust=1, vjust=2) +
    scale_y_continuous(sec.axis = sec_axis(~ ./popmill, name="new cases/million"))
  p$p 

i <- "United Kingdom"
worldplot(i)
```

## Germany and Benelux

```{r  Germanic-forecast}
for(i in c("Belgium", "Netherlands", 
           "Luxembourg", 
           "Switzerland")){
  worldplot(i)
}
```

```{r  Germany-forecast}
i="Germany"
p <- myfun(world, i, posmin=100, maxh=150)
if(inherits(p, "list")){ 
  p$p <- p$p+geom_vline(xintercept = as.Date("2020-04-20")); 
  p$p <- p$p+annotate("text", x=as.Date("2020-04-20"), y=1, label="opening small shops", hjust=0)
  popmill <- round(sum(popdata$population[match(i, popdata$name)], na.rm=TRUE)/1e6, digits=2)
  p$p <- p$p+annotate("text", x=as.Date("2020-09-01"), y=Inf, label=paste("population =", popmill, "million"), hjust=1, vjust=2) +
    scale_y_continuous(sec.axis = sec_axis(~ ./popmill, name="new cases/million"))
  p$p 
  }
```

## Central Europe

```{r  CEurope-forecast}
for(i in c("Czechia", "Hungary", "Poland", "Slovakia")){
  worldplot(i)
}
```

```{r  Austria-forecast}
i="Austria"
p <- myfun(world, i, posmin=100, maxh=150)
if(inherits(p, "list")){ 
  p$p <- p$p+geom_vline(xintercept = as.Date("2020-04-14")); 
  p$p <- p$p+annotate("text", x=as.Date("2020-04-14"), y=1, label="opening small shops", hjust=0)
  popmill <- round(sum(popdata$population[match(i, popdata$name)], na.rm=TRUE)/1e6, digits=2)
  p$p <- p$p+annotate("text", x=as.Date("2020-09-01"), y=Inf, label=paste("population =", popmill, "million"), hjust=1, vjust=2) + scale_y_continuous(sec.axis = sec_axis(~ ./popmill, name="new cases/million"))
  p$p 
  }
```

## Southern Europe

```{r  SEurope-forecast}
for(i in c("France", "Italy", "Portugal", "Spain")){
  worldplot(i)
}
```

## Balkans

```{r  Balkans-forecast}
for(i in c("Romania", "Bulgaria",
           "Greece", 
           "Serbia", "Montenegro", "Croatia", "Slovenia")){
  worldplot(i)
}
```

## Former USSR

```{r  Russia}
  worldplot("Russia")
```

```{r  USSR}
for(i in c("Ukraine", "Belarus")){
  worldplot(i)
}
```

```{r  Asia-forecast}
for(i in c("Japan", "India", "Pakistan")){
  worldplot(i)
}
```

## North America

```{r NAmer-forecast1}
for(i in c("US", "Canada", "Ontario Canada", "Quebec Canada", "British Columbia Canada", "Manitoba Canada")){
  worldplot(i)
}
```

```{r Mex-forecast}
for(i in c("Mexico")){
  worldplot(i, fitit=FALSE)
}
```

## South America

```{r SAmer-forecast}
for(i in c("Brazil", "Chile", "Argentina")){
  worldplot(i, posmin=100)
}
```


## US States


```{r stateplotfun}
stateplot <- function(reg, posmin=100, maxh=150, fitit=FALSE, data="states", ylims=c(0,1500)){
#regfull <- state.name[match(reg, state.abb)]
if(data=="states"){
  popmill <- round(sum(popdata$population[match(reg, popdata$name)], na.rm=TRUE)/1e6, digits=2)
  p <- myfun(states, reg, posmin=posmin, maxh=maxh, fitit=fitit)
}
if(data=="world"){
reg2 <- paste(c(state.name, "District of Columbia")[match(reg, c(state.abb, "DC"))], "US")
p <- myfun(world, reg2, regname=paste(reg, collapse="+"), posmin=posmin, maxh=maxh)
}
if(inherits(p, "list")){
  p$p <- p$p+annotate("text", x=as.Date("2020-09-01"), y=Inf, label=paste("population =", popmill, "million"), hjust=1, vjust=2)
  p$p <- p$p + scale_y_continuous(sec.axis = sec_axis(~ ./popmill, name="new cases/million"), limits=popmill*ylims)
 # plot(p$p)
  p$p
}
}
```

## New England

```{r states-forecast-new-england}
stateplot(c("ME","VT","NH","MA","RI","CT"))
```

## Middle Atlantic

```{r states-forecast-mid-atlantic}
p <- stateplot(c("NY","NJ","PA"))
p
```


## East North Central Midwest

```{r states-forecast-emid}
stateplot(c("OH", "IN", "IL", "WI", "MI"))
```

## West North Central Midwest

```{r states-forecast-wmid}
stateplot(c("MN", "MO", "ND", "SD", "IA", "NE", "KS"))
```

```{r states-forecast-wmid2}
p <- stateplot(c("ND", "SD")) 
p + geom_vline(xintercept=as.Date("2020-08-12"))
```

## South Atlantic North


```{r states-forecast-usouthatlantic}
stateplot(c("DC", "DE", "VA", "WV", "MD"))
```

## South Atlantic South


```{r states-forecast-southatlanticsouth}
stateplot(c("GA", "FL", "SC", "NC"))
```

## East South Central


```{r states-forecast-eastsouth}
stateplot(c("AL", "MS", "KY", "TN"), posmin=100, fitit=FALSE)
```

## West South Central

The early peak is the New Orleans outbreak.

```{r states-forecast-westsouth}
stateplot(c("TX", "OK", "AR","LA"))
```

## Mountain North

```{r states-forecast-mtnnorth}
stateplot(c("ID", "MT", "WY"), posmin=10)
```

## Mountain Southwest

```{r states-forecast-mtnsw}
stateplot(c("CO", "UT", "NV", "NM", "AZ"))
```


## West wo Southern CA

```{r states-forecast-westnosocal}
reg <- c("San Diego California US",
         "San Bernardino California US",
         "Riverside California US",
         "Orange California US",
         "Los Angeles California US",
         "Ventura California US",
         "Kern California US",
         "Santa Barbara California US",
         "San Luis Obispo California US")
cacounties <- unique(world$region[str_detect(world$region, "California")])
ncacounties <- cacounties[!cacounties %in% reg & 
                            !cacounties=="California US" & 
                            !cacounties=="Out of CA California US" &
                            !cacounties=="Unassigned California US"]
ncacounties <- as.character(ncacounties)
p <- myfun(world, c("Washington US", "Oregon US", ncacounties), regname="West minus SoCal", posmin=100, maxh=150)
popmill <- sum(statepop$POPESTIMATE2019[match(c("Washington", "Oregon"), statepop$NAME)], na.rm=TRUE)+0.4*statepop$POPESTIMATE2019[match(c("California"), statepop$NAME)]
popmill <- round(popmill/1e6, digits=2)
if(inherits(p, "list")){
  p$p <- p$p+annotate("text", x=as.Date("2020-09-01"), y=Inf, label=paste("population =", popmill, "million"), hjust=1, vjust=2)
  p$p <- p$p+ scale_y_continuous(sec.axis = sec_axis(~ ./popmill, name="new cases/million"), limits=popmill*c(0,1500))
plot(p$p)
}
```

## Southern Californa

```{r states-forecast-southca}
reg <- c("San Diego California US",
         "San Bernardino California US",
         "Riverside California US",
         "Orange California US",
         "Los Angeles California US",
         "Ventura California US",
         "Kern California US",
         "Santa Barbara California US",
         "San Luis Obispo California US")
p <- myfun(world, reg, regname="Southern CA", posmin=10, maxh=150)
if(inherits(p, "list")){
popmill <- round(0.6*statepop$POPESTIMATE2019[match(c("California"), statepop$NAME)]/1e6, digits=2)
    p$p <- p$p + scale_y_continuous(sec.axis = sec_axis(~ ./popmill, name="new cases/million"), limits=popmill*c(0,1500))
  p$p <- p$p+annotate("text", x=as.Date("2020-09-01"), y=Inf, label=paste("population =", popmill, "million"), hjust=1, vjust=2)
  plot(p$p)
}
```


# Individual States

### Northeast

```{r states-forecast-northeast}
for(i in c("NY", "NJ", "MA", "CT", "DE", "PA", "MD", "DC")){
  p <- stateplot(i, posmin=100)
  ym <- 500*popdata$population[popdata$name==i]/1e6
  p <- p + geom_hline(yintercept=ym) + annotate("text", as.Date("2020-07-01"), y=1.05*ym, label="NY spring peak", color="blue")
  plot(p)
}
```

```{r states-forecast-north}
for(i in c("ME", "NH", "VT")){
  p <- stateplot(i, posmin=100)
  ym <- 500*popdata$population[popdata$name==i]/1e6
  p <- p + geom_hline(yintercept=ym) + annotate("text", as.Date("2020-07-01"), y=1.05*ym, label="NY spring peak", color="blue")
  plot(p)
}
```


### Southeast

```{r states-forecast-southeast}
for(i in c("FL", "VA", "SC", "NC", "GA")){
  p <- stateplot(i, posmin=100)
  ym <- 500*popdata$population[popdata$name==i]/1e6
  p <- p + geom_hline(yintercept=ym) + annotate("text", as.Date("2020-07-01"), y=1.05*ym, label="NY spring peak", color="blue")
  plot(p)
}
```

### South

```{r states-forecast-south}
for(i in c("TN", "KY", "AL", "MS", "AR", "LA")){
  p <- stateplot(i, posmin=100)
  ym <- 500*popdata$population[popdata$name==i]/1e6
  p <- p + geom_hline(yintercept=ym) + annotate("text", as.Date("2020-07-01"), y=1.05*ym, label="NY spring peak", color="blue")
  plot(p)
}
```

### Midwest

```{r states-forecast-midwest}
for(i in c("OH", "MI", "IN", "IL")){
  p <- stateplot(i, posmin=100)
  ym <- 500*popdata$population[popdata$name==i]/1e6
  p <- p + geom_hline(yintercept=ym) + annotate("text", as.Date("2020-07-01"), y=1.05*ym, label="NY spring peak", color="blue")
  plot(p)
}
```

### Center North

```{r states-forecast-cn}
for(i in c("WI", "MN", "SD")){
  p <- stateplot(i, posmin=10)
  ym <- 500*popdata$population[popdata$name==i]/1e6
  p <- p + geom_hline(yintercept=ym) + annotate("text", as.Date("2020-07-01"), y=1.05*ym, label="NY spring peak", color="blue")
  plot(p)
}
```

```{r states-forecast-cn2}
for(i in c("ND", "IA", "NE", "MO")){
  p <- stateplot(i, posmin=10)
  ym <- 500*popdata$population[popdata$name==i]/1e6
  p <- p + geom_hline(yintercept=ym) + annotate("text", as.Date("2020-07-01"), y=1.05*ym, label="NY spring peak", color="blue")
  plot(p)
}
```

### West coast


```{r states-forecast-westcoast}
for(i in c("WA", "CA", "OR")){
  p <- stateplot(i, posmin=100)
  ym <- 500*popdata$population[popdata$name==i]/1e6
  p <- p + geom_hline(yintercept=ym) + annotate("text", as.Date("2020-07-01"), y=1.05*ym, label="NY spring peak", color="blue")
  plot(p)
}
```


```{r states-forecast-socal}
reg <- c("San Diego California US",
         "San Bernardino California US",
         "Riverside California US",
         "Orange California US",
         "Los Angeles California US",
         "Ventura California US",
         "Kern California US",
         "Santa Barbara California US",
         "San Luis Obispo California US")
p <- myfun(world, reg, regname="Southern CA", posmin=10, maxh=150)
if(inherits(p, "list")){
  popmill <- round(0.6*statepop$POPESTIMATE2019[match(c("California"), statepop$NAME)]/1e6, digits=2)
    p$p <- p$p + scale_y_continuous(sec.axis = sec_axis(~ ./popmill, name="new cases/million"), limits=popmill*c(0,1500))
  p$p <- p$p+annotate("text", x=as.Date("2020-09-01"), y=Inf, label=paste("population =", popmill, "million"), hjust=1, vjust=2)
  plot(p$p)
}
```

```{r states-forecast-norcal}
cacounties <- unique(world$region[str_detect(world$region, "California")])
ncacounties <- cacounties[!cacounties %in% reg & 
                            !cacounties=="California US" & 
                            !cacounties=="Out of CA California US" &
                            !cacounties=="Unassigned California US"]
p <- myfun(world, ncacounties, regname="Northern CA", posmin=100, maxh=150)
if(inherits(p, "list")){
  popmill <- round(0.4*statepop$POPESTIMATE2019[match(c("California"), statepop$NAME)]/1e6, digits=2)
    p$p <- p$p + scale_y_continuous(sec.axis = sec_axis(~ ./popmill, name="new cases/million"), limits=popmill*c(0,1500))
  p$p <- p$p+annotate("text", x=as.Date("2020-09-01"), y=Inf, label=paste("population =", popmill, "million"), hjust=1, vjust=2)
  plot(p$p)
}
```

### South central

```{r states-forecast-southcentral}
for(i in c("TX", "OK", "KS")){
  p <- stateplot(i, posmin=10)
  ym <- 500*popdata$population[popdata$name==i]/1e6
  p <- p + geom_hline(yintercept=ym) + annotate("text", as.Date("2020-07-01"), y=1.05*ym, label="NY spring peak", color="blue")
  plot(p)
}
```

### Four corners

```{r states-forecast-fourcorners}
for(i in c("CO", "AZ", "UT", "NM", "NV")){
  p <- stateplot(i, posmin=100)
  ym <- 500*popdata$population[popdata$name==i]/1e6
  p <- p + geom_hline(yintercept=ym) + annotate("text", as.Date("2020-07-01"), y=1.05*ym, label="NY spring peak", color="blue")
  plot(p)
}
```

### Rockies

```{r states-forecast-rockies}
for(i in c("ID", "MT", "WY")){
  p <- stateplot(i, posmin=10)
  ym <- 500*popdata$population[popdata$name==i]/1e6
  p <- p + geom_hline(yintercept=ym) + annotate("text", as.Date("2020-07-01"), y=1.05*ym, label="NY spring peak", color="blue")
  plot(p)
}
```