LLS Mac Results NB.Rmd

---
title: "Status drop"
output:
  html_document: default
  html_notebook: default
  pdf_document: default
---
```{r "setup", include=FALSE}
require("knitr")
opts_knit$set(root.dir = "~/Box Sync/skinner/projects_analyses/Project_status_drop")
```
```{r, include=FALSE}
library(haven)
library(psych)
#library(psycholing) AD: could not find the package  "package ‘psycholing’ is not available (for R version 3.3.2)"
library(MASS)
library(ggplot2)
library(lme4)
library(lmerTest)
library(pscl)
library(boot)
library(nortest)
library(MBESS)
library(sem)
library(Amelia)
library(ROCR)
library(stargazer)
library(multcomp)
library(lsmeans)
library(Hmisc)
library(mice)
library(doBy)
library(xtable)
library(corrplot)
library(readxl)
library(nnet)

#mac <- read.csv("~/Late Life Suicide/Mac Full.csv")
#mac <- subset(mac, select = -c(1), na.strings = c(''))
#mac <- subset(mac, na.strings = c(''))

#mac <- read.csv("~/Late Life Suicide/macfix.csv")
# for Alex only:
setwd("~/Box Sync/skinner/projects_analyses/Project_status_drop")
mac <- read.csv("~/Box Sync/skinner/projects_analyses/Project_status_drop/macfix.csv")
mac <- subset(mac, select = -c(1), na.strings = c(''))
#View(mac)

## data below not included -- too much missingness due to unentered data in new subjects
# new dataset with n = 196
# mac1 <- read_excel("~/Box Sync/skinner/projects_analyses/Project_status_drop/Macarthur and DOI updated 08-02-17.xlsx")
# View(mac1)
# 
# #check age in larger dataset
# summary(mn1 <- lm(mac1$`AGE AT SES`~ COMMENT, data = mac1))
# #equate on age
# boxplot(mac1$`AGE AT SES` ~ COMMENT, data = mac1, main = toupper("Raw Median Differences in Age"), font.main = 3, cex.main = 1.2, col = c("palevioletred4", "paleturquoise2", "seashell", "plum"), xlab = "Group", ylab = "Age", font.lab = 3, notch = TRUE, range = 0)
# mac1$oldcontrol <- ifelse(mac1$COMMENT == 'CONTROL' & mac1$`AGE AT SES` >80, 1, 0 )
# sum(mac1$oldcontrol)
# mac1 <- mac1[mac1$oldcontrol==0,]
# summary(mn1 <- lm(mac1$`AGE AT SES`~ COMMENT, data = mac1))
# anova(mn1)
# 
# mac1$age <- mac1$`AGE AT SES`
# mac1$SESdrop <- mac1$`MACARTHUR Q4B`
# mac1$SES <- mac1$`MACARTHUR Q4A`
# mac1$SEScurr <- mac1$SES - mac1$SESdrop
# 
# # reclassify depressed ideator as ideator
# 
# mac1$COMMENT[mac1$COMMENT=='DEPRESSION-IDEATOR'] <-  'IDEATOR'
# describe(mac1$`DOI LIFETIME`)
# # quick neg binomial glm
# 
# summary(SES <- glm(SESdrop ~ SES
#                    + COMMENT,
#               family = negative.binomial(theta = theta.SESdrop), data = mac1))
# 

# also read in original data for QC
macorig <- read_excel("~/Box Sync/skinner/projects_analyses/Project_status_drop/Macarthur and DOI REVISED 11-21-16.xlsx",
sheet = "MACARTHUR AND DOI SCORES")
# View(macorig1)
# n = 166

macorig <- macorig[macorig$`AGE AT SES` <90,]


# equate on age
macorig$oldcontrol <- ifelse(macorig$COMMENT == 'CONTROL' & macorig$`AGE AT SES` >85, 1, 0 )
sum(macorig$oldcontrol)
macorig <- macorig[macorig$oldcontrol==0,]
describe(macorig$`MACARTHUR DATA CHANGES`)

```


```{r, New Variables, Dummy Coding, and Subsets, include=FALSE}

# calculate highest SES
# rename for clarity
mac$SEScurr <- mac$SES1
mac$SES <- mac$SEScurr - mac$SESdrop


# No Controls Subset
mac.nocontrols <- subset(mac, depressed == 1 | ideators == 1 | attempters == 1, 
                         select = ID:SEScurr)

#first exclude 90+yo
mac <- mac[mac$age<90,]


# equate on age
mac$oldcontrol <- ifelse(mac$COMMENT == 'CONTROL' & mac$age >85, 1, 0 )
sum(mac$oldcontrol)
mac <- mac[mac$oldcontrol==0,]

# vector of who has original SES drop values
mac$originals <- macorig$`MACARTHUR DATA CHANGES`=="ORIGINAL"


## missing data
# remove yrsdep variable
mac <- mac[,-99]

md.pattern(mac)
library(VIM)
mac_aggr = aggr(mac, col=mdc(1:2), numbers=TRUE, sortVars=TRUE, labels=names(mac), cex.axis=.7, gap=3, ylab=c("Proportion of missingness","Missingness Pattern"))
## education missing in 1 subject -- impute
# WTAR missing in 9/162 subjects (6%) -- impute
# REGRET missing on 11/162 subjects (7%) -- impute
# DOI, on 2/162 subjects -- impute
mac$WTAR <- impute(mac$WTARRAW)
mac$edu <- impute(mac$edu)
mac$regret <- impute(mac$REGRETSUBSCALE)
mac$DOILIFETIME <- impute(mac$DOILIFETIME)
mac$DOITENYEARS <- impute(mac$DOITENYEARS)
# group with attempt lethality:

mac$grp_leth[mac$GROUP12467 == 1] <- "HC"
mac$grp_leth[mac$GROUP12467 == 2] <- "DC"
mac$grp_leth[mac$GROUP12467 == 4] <- "I"
mac$grp_leth[mac$GROUP12467 == 6] <- "LL"
mac$grp_leth[mac$GROUP12467 == 7] <- "HL"
mac$grp_leth <- as.factor(mac$grp_leth)

# code depression diagnosis
mac$depDx <- mac$group > 1

mac$SubstanceCurrent <- as.factor(mac$SubstanceCurrent)
mac$AnxietyCurrent <- as.factor(mac$AnxietyCurrent)
mac$SubstanceLifetime <- as.factor(mac$SubstanceLifetime)
mac$AnxietyLifetime <- as.factor(mac$AnxietyLifetime)

# equate on education
#mac$oldcontrol <- ifelse(mac$COMMENT == 'CONTROL' & mac$edu >17, 1, 0 )
#sum(mac$oldcontrol)
#mac <- mac[mac$oldcontrol==0,]


t1 = describeBy(mac, group = 'COMMENT',mat=TRUE,type=3,digits=2)
stargazer(t1, type="html", digits = 2,  out="table1.htm")


# more descriptives

age.by.group = lm(age ~ COMMENT, data = mac)
summary(age.by.group)
anova(age.by.group)
ed.by.group = lm(edu ~ COMMENT, data = mac)
summary(ed.by.group)
ed.tuk <- glht(ed.by.group, linfct = mcp(COMMENT = "Tukey"))
ed.tuk.cld <- cld(ed.tuk)
old.par <- par(mai=c(1,1,1.25,1), no.readonly = TRUE)
  plot(ed.tuk.cld)
  par(old.par)
anova(ed.by.group)  
ed.ls <- lsmeans(ed.by.group, "COMMENT")
pairs(ed.ls)

sex.tbl = table(mac$SEX, mac$COMMENT)
chisq.test(sex.tbl)

race.tbl = table(mac$RACE, mac$COMMENT)
chisq.test(race.tbl)

# DUMMY CODES
# Group
  # Control as Reference Group
  depressed <- ifelse(mac$COMMENT == 'DEPRESSION', 1, 0)
  ideators <- ifelse(mac$COMMENT == 'IDEATOR', 1, 0)
  attempters <- ifelse(mac$COMMENT == 'ATTEMPTER', 1, 0)
  mac$depressed <- depressed
  mac$ideators <- ideators
  mac$attempters <- attempters
  # Depressed as Reference Group
  dc.id <- ifelse(mac.nocontrols$COMMENT == 'IDEATOR', 1, 0)
  dc.att <- ifelse(mac.nocontrols$COMMENT == 'ATTEMPTER', 1, 0)
  mac.nocontrols$dc.id <- dc.id
  mac.nocontrols$dc.att <- dc.att

  
  # Gender
male <- ifelse(mac$SEX == 'MALE', 1, 0)
mac$male <- male

# Standardized Income
sdIncome <- sd(mac$INCOME)
meancome <- mean(mac$INCOME)
mac$INCOMEcst <- (mac$INCOME - meancome)/sdIncome

# Current SES

# SUBSETS BY GROUP
mac.att <- subset(mac, attempters == 1, select = ID:INCOMEcst)
mac.id <- subset(mac, ideators == 1, select = ID:INCOMEcst)
mac.dep <- subset(mac, depressed == 1, select = ID:INCOMEcst)
mac.control <- subset(mac, depressed == 0 & ideators == 0 & attempters == 0, select = ID:INCOMEcst)


# OK, so age seems to be the only imbalanced demographic.  EQUATE groups.

#hist(mac$age)
#hist(mac.att$age)
#hist(mac.control$age)
#hist(mac.dep$age)
#hist(mac.id$age)

```


```{r Theta Calculation, include=FALSE}

SESdropCOL <- mac$SESdrop
mu <- mean(SESdropCOL, na.rm = TRUE)
SESdropCOL <- na.omit(SESdropCOL)
length(SESdropCOL)
theta.SESdrop <- theta.ml(SESdropCOL, mean(SESdropCOL), 166, limit = 50, eps = .Machine$double.eps^.25, trace = FALSE)  
theta.SESdrop

SESdropCOL.nc <- mac.nocontrols$SESdrop
mu.nc <- mean(SESdropCOL.nc, na.rm = TRUE)
SESdrop.nc <- na.omit(SESdropCOL.nc)
length(SESdrop.nc)
theta.SESdrop.nc <- theta.ml(SESdrop.nc, mean(SESdrop.nc), 117, limit = 50, eps = .Machine$double.eps^.25, trace = FALSE)  
theta.SESdrop.nc

mean(mac$SEScurr)
SEScurrCOL <- mac$SEScurr
mu.SEScurr <- mean(SEScurrCOL, na.rm = TRUE)
SEScurrCOL <- na.omit(SEScurrCOL)
length(SEScurrCOL)
theta.SEScurr <- theta.ml(SEScurrCOL, 5.457831, 166, limit = 50, eps = .Machine$double.eps^.25, trace = FALSE)  
theta.SEScurr

#mean(mac.nocontrols$SEScurr)
#SEScurrCOL.nc <- mac.nocontrols$SEScurr
#length(SEScurrCOL.nc)
#theta.SEScurr.nc <- theta.ml(SEScurrCOL.nc, mean(mac.nocontrols$SEScurr, na.omit), 117, limit = 100, eps = .Machine$double.eps^.25, trace = FALSE)  
#theta.SEScurr.nc

```

```{r Residualized Change, include=FALSE}

# RESIDUALIZED CHANGE

r.drop.model <- glm(SESdrop ~ SEScurr, family = negative.binomial(theta = theta.SESdrop), data = mac)
r.drop <- residuals(r.drop.model)
mac$r.drop = r.drop
mac$r.drop <- r.drop

r.drop.model.nc <- glm(SESdrop ~ SEScurr, family = negative.binomial(theta = theta.SESdrop.nc), data = mac.nocontrols)
r.drop.nc <- residuals(r.drop.model.nc)
mac.nocontrols$r.drop.nc = r.drop.nc
mac.nocontrols$r.drop.nc <- r.drop.nc

#res.drop.nc <- glm(SESdrop ~ SEScurr, family = negative.binomial(theta = theta.SESdrop.nc), data = mac.nocontrols)
#residuals(res.drop.nc)
#resids.drop.nc = res.drop.nc$residuals.drop.nc
#mac.nocontrols$residuals.drop.nc = resids.drop.nc
#mac.nocontrols$residuals.drop.nc <- resids.drop.nc

```

```{r Diagnostics, echo=FALSE}

library(plyr)
ddply(mac,~COMMENT, summarise, mean = mean(mac$SEScurr), sd = sd(mac$SEScurr)) 
ddply(mac,~COMMENT, summarise, mean = mean(mac$SESdrop), sd = sd(mac$SESdrop)) 

#hist(mac$residuals, breaks = 20)
#hist(mac.nocontrols$residuals.nc, breaks = 20)
#hist(mac$residuals.lin, breaks = 20)

#ggplot(data = mac, aes(x = residuals, xlab = "SES", group = COMMENT, fill = COMMENT)) + 
    #geom_density(adjust = 1.5, alpha = 0.2)
#ggplot(data = mac, aes(x = residuals.lin, xlab = "SES", group = COMMENT, fill = COMMENT)) + 
    #geom_density(adjust = 1.5, alpha = 0.2)

hist(mac$SESdrop, breaks = 10)
hist(mac$SES, breaks = 10)
hist(mac$SEScurr, breaks = 12)

ggplot(data = mac, aes(x = SES, xlab = "SES", group = COMMENT, fill = COMMENT)) + 
    geom_density(adjust = 1.5, alpha = 0.2)
ggplot(data = mac, aes(x = SEScurr, xlab = "SEScurr", group = COMMENT, fill = COMMENT)) + 
    geom_density(adjust = 1.5, alpha = 0.2)

boxplot(SES ~ COMMENT, data = mac, main = toupper("Raw Median Differences in Highest Status"), font.main = 3, cex.main = 1.2, col = c("palevioletred4", "paleturquoise2", "seashell", "plum"), xlab = "Group", ylab = "Status", font.lab = 3, notch = TRUE, range = 0)

boxplot(SEScurr ~ COMMENT, data = mac, main = toupper("Raw Median Differences in Curent Status"), font.main = 3, cex.main = 1.2, col = c("palevioletred4", "paleturquoise2", "seashell", "plum"), xlab = "Group", ylab = "Highest Status", font.lab = 3, notch = TRUE, range = 0)

boxplot(SESdrop ~ COMMENT, data = mac, main = toupper("Raw Median Differences in Drop"), font.main = 3, cex.main = 1.2, col = c("palevioletred4", "paleturquoise2", "seashell", "plum"), xlab = "Group", ylab = "Status Drop", font.lab = 3, notch = TRUE, range = 0)

boxplot(SES ~ COMMENT, data = mac, main = "Raw Group Differences in Highest Status", 
  	xlab = "Group", ylab = "Raw Status", varwidth = TRUE, col =  cm.colors(3))

boxplot(SEScurr ~ COMMENT, data = mac, main = "Raw Group Differences in Current Status", 
  	xlab = "Group", ylab = "Highest Status", varwidth = TRUE, col =  cm.colors(3))

boxplot(SESdrop ~ COMMENT, data = mac, main = "Raw Group Differences in Status Drop", 
  	xlab = "Group", ylab = "Status Drop", varwidth = TRUE, col =  cm.colors(3))

boxplot(INCOME ~ COMMENT, data = mac, main = "Raw Group Differences in Income", 
  	xlab = "Group", ylab = "Income", varwidth = TRUE, col =  cm.colors(3))

boxplot(edu ~ COMMENT, data = mac, main = "Raw Group Differences in Education", 
  	xlab = "Group", ylab = "Education", varwidth = TRUE, col =  cm.colors(3))

```

```{r Negative Binomial with Theta}

# Healthy as reference group  ############################################################################################################
  # Only controls
summary(SES.onlyControlVars <- glm(SESdrop ~ SES + age + SEX + INCOME, 
              family = negative.binomial(theta = theta.SESdrop), data = mac))
  # Just group controlling for highest status (SEScurr), no other controls
summary(SES <- glm(SESdrop ~ SES 
                   + attempters + ideators + depressed,
              family = negative.binomial(theta = theta.SESdrop), data = mac))

  # Controlling for current status
summary(SES.c <- glm(SESdrop ~ SEScurr + age + SEX + INCOME 
                     + attempters + ideators + depressed, 
              family = negative.binomial(theta = theta.SESdrop), data = mac))

# with group "COMMENT", co-varying for highest status
summary(mb1 <- glm(SESdrop ~ SES + age + INCOME 
                     + COMMENT, 
              family = negative.binomial(theta = theta.SESdrop), data = mac))
ls.mb1 <- lsmeans(mb1, "COMMENT")
plot(ls.mb1, comparisons = TRUE, alpha = 0.05)

# with group "COMMENT", without highest status
summary(mb <- glm(SESdrop ~ age + INCOME 
                     + COMMENT, 
              family = negative.binomial(theta = theta.SESdrop), data = mac))
ls.mb <- lsmeans(mb, "COMMENT")
plot(ls.mb, comparisons = TRUE, alpha = 0.05)


# Depressed as reference group  ##########################################################################################################

  # Only controls
summary(SES2.onlyControlVars <- glm(SESdrop ~ SES + age + SEX + INCOME, 
                family = negative.binomial(theta = theta.SESdrop.nc), data = mac.nocontrols))
  # Just group controlling for highest status (SEScurr), no other controls
summary(SES2 <- glm(SESdrop ~ SES 
                    + dc.att + dc.id, 
                family = negative.binomial(theta = theta.SESdrop.nc), data = mac.nocontrols))
  # With groups and control variables
summary(SES2.c <- glm(SESdrop ~ SES + age + SEX + INCOME
                      + dc.att + dc.id 
                      + age + SEX + INCOME, 
                family = negative.binomial(theta = theta.SESdrop.nc), data = mac.nocontrols))

```

```{r SEScurr Interaction by Group}

  # Controlling for highest status (effect can vary across group, interaction) age sex income control
summary(SES.cInt <- glm(SESdrop ~ SEScurr + age + SEX + INCOME 
                     + attempters + ideators + depressed 
                     + SEScurr:attempters + SEScurr:ideators + SEScurr:depressed, 
              family = negative.binomial(theta = theta.SESdrop), data = mac))

summary(SES2.cInt <- glm(SESdrop ~ SEScurr + age + SEX + INCOME
                      + dc.att + dc.id + SEScurr 
                      + age + SEX + INCOME
                      + SEScurr:dc.att + SEScurr:dc.id, 
                family = negative.binomial(theta = theta.SESdrop.nc), data = mac.nocontrols))

```
```{r Alex analyses: prediction of drop, include = FALSE}
# sanity check1: predict SES drop from individual differences other than group
summary(dropDemo <- glm(SESdrop ~  age + SEX + INCOME + edu, 
              family = negative.binomial(theta = theta.SESdrop), data = mac))

# same with current SES (but beware of confounding)
summary(dropDemoSES <- glm(SESdrop ~  SEScurr + age + SEX + INCOME + edu, 
              family = negative.binomial(theta = theta.SESdrop), data = mac))

anova(dropDemo,dropDemoSES)

summary(dropDemoSESrace <- glm(SESdrop ~  age + SEX + INCOME + edu + RACE, 
              family = negative.binomial(theta = theta.SESdrop), data = mac))
anova(dropDemoSES, dropDemoSESrace)
# adding race increases deviance, forget race

#  add NEO
summary(dropDemoSESneo <- glm(SESdrop ~  SEScurr + age + SEX + INCOME + edu + neurotic + extrav + open +agree +consc, 
              family = negative.binomial(theta = theta.SESdrop), data = mac))
# (of course neuroticism)

# add regret
summary(SES.demoREGRET <- glm(SESdrop ~  age + SEX + INCOME + edu +  regret + MAXIMIZINGSUBSCALE, 
              family = negative.binomial(theta = theta.SESdrop), data = mac))

summary(SES.demoREGRETbis <- glm(SESdrop ~  SEScurr + age + SEX + INCOME + edu +  regret + MAXIMIZINGSUBSCALE +BISTOTALMEAN, 
              family = negative.binomial(theta = theta.SESdrop), data = mac))


# with group:
mac$GROUP12467 <- as.factor((mac$GROUP12467))
summary(SESdrop.demoREGRETgrp <- glm(SESdrop ~  SEScurr + age + SEX + INCOME + edu +  COMMENT + regret + MAXIMIZINGSUBSCALE, 
              family = negative.binomial(theta = theta.SESdrop), data = mac))

# group with leth
summary(SESdrop.demoREGRETgrpleth <- glm(SESdrop ~  SEScurr + age + SEX + INCOME + edu + GROUP12467 + regret + MAXIMIZINGSUBSCALE, 
              family = negative.binomial(theta = theta.SESdrop), data = mac))
anova(SESdrop.demoREGRETgrpleth)
# group with leth w/o regret
summary(SESdrop.demogrpleth <- glm(SESdrop ~  SEScurr + age + SEX + INCOME + edu + GROUP12467, 
              family = negative.binomial(theta = theta.SESdrop), data = mac))
anova(SESdrop.demogrpleth)
demo.group.leth.ls <- lsmeans(SESdrop.demogrpleth, "GROUP12467")
contrast(demo.group.leth.ls, method = "revpairwise", adjust ="tukey")
pairs(demo.group.leth.ls)
summary(SESdrop.leth <- glm(SESdrop ~  SEScurr + age + SEX + INCOME + edu + LETHALITYMOSTLETHAL, 
              family = negative.binomial(theta = theta.SESdrop), data = mac))

# group w/o leth and w/o regret
summary(SESdrop.curr.demo.subst.grp <- glm(SESdrop ~  SEScurr + COMMENT + age +  INCOME + edu + SubstanceCurrent, 
              family = negative.binomial(theta = theta.SESdrop), data = mac))
anova(SESdrop.curr.demo.subst.grp)

summary(SESdrop.hi.demo.subst.grp <- glm(SESdrop ~  SES + COMMENT + age +  INCOME + edu + SubstanceCurrent, 
              family = negative.binomial(theta = theta.SESdrop), data = mac))


summary(SESdrop.demo.subst.grp <- glm(SESdrop ~   COMMENT + age + INCOME + edu + SubstanceCurrent, 
              family = negative.binomial(theta = theta.SESdrop), data = mac))
#anova(SESdrop.grp)


demo.group.ls <- lsmeans(SESdrop.demo.subst.grp, "COMMENT")
contrast(demo.group.ls, method = "pairwise", adjust ="tukey")
plot(demo.group.ls)

# same predicting highest status
summary(SEShi.demogrp <- glm(SES ~ SEScurr + age + SEX + INCOME + edu + COMMENT, 
              family = negative.binomial(theta = theta.SESdrop), data = mac))
anova(SEShi.demogrp)
hi.demo.group.ls <- lsmeans(SEShi.demogrp, "COMMENT")
contrast(hi.demo.group.ls, method = "pairwise", adjust ="tukey")
plot(hi.demo.group.ls)


# does regret predict current SES?
summary(SEScurr.demoREGRETgrp <- lm(SEScurr ~ age + SEX + INCOME + edu +  COMMENT + regret + MAXIMIZINGSUBSCALE, 
              data = mac))
anova(SEScurr.demoREGRETgrp)
summary(SEScurr.demoREGRET <- lm(SEScurr ~ age + SEX + INCOME + edu +  regret + MAXIMIZINGSUBSCALE, 
              data = mac))
summary(drop.demoREGRET <- lm(drop ~ age + SEScurr + SEX + INCOME + edu +  regret , 
              data = mac))
anova(drop.demoREGRET)

# is attempters' highest status too high?
summary(SEShi.demo <- lm(SES ~ age + SEX + edu, 
              data = mac))
mac$SEShiResidDemo <- residuals(SEShi.demo)

```

```{r Education and Income Differences}

# INCOME
  # Healthy as reference group
summary(income <- lm(INCOME ~ attempters + ideators + depressed, data = mac))
summary(income <- lm(INCOME ~ age + SEX + attempters + ideators + depressed, data = mac)) # Control age sex
  # Depressed as reference group
summary(income <- lm(INCOME ~ dc.att + dc.id, data = mac.nocontrols))
summary(income <- lm(INCOME ~ age + SEX + dc.att + dc.id, data = mac.nocontrols)) # Control age sex

# Education 
  # Healthy as reference group
summary(education <- lm(edu ~ attempters + ideators + depressed, data = mac))
summary(education <- lm(edu ~ age + SEX + attempters + ideators + depressed, data = mac)) # Control age sex
  # Depressed as reference group
summary(education <- lm(edu ~ dc.att + dc.id, data = mac.nocontrols))
summary(education <- lm(edu ~ dc.att + dc.id, data = mac.nocontrols)) # Control age sex

```

```{r Highest Status Differences}

# INCOME
  # Healthy as reference group
summary(highest <- lm(SEScurr ~ attempters + ideators + depressed, data = mac))
summary(highest <- lm(SEScurr ~ age + SEX + edu + attempters + ideators + depressed, data = mac)) # Control age sex

## alex -- other predictors of highest status: NEO
summary(highest <- lm(SEScurr ~ age + SEX + edu + neurotic + extrav + open + agree + consc, data = mac)) # Control age sex
# regret
summary(highest <- lm(SEScurr ~ age + SEX + edu + regret + MAXIMIZINGSUBSCALE, data = mac)) # Control age sex
# check using current SES -- I predict REGRET would not be significant
summary(current <- lm(SES ~ age + SEX + edu + regret + MAXIMIZINGSUBSCALE, data = mac)) # Control age sex
summary(highest <- lm(SEScurr ~ SES +  age + SEX + edu + regret + MAXIMIZINGSUBSCALE, data = mac)) # Control age sex

  # Depressed as reference group
summary(highest <- lm(SEScurr ~ dc.att + dc.id, data = mac.nocontrols))
summary(highest <- lm(SEScurr ~ age + SEX + dc.att + dc.id, data = mac.nocontrols)) # Control age sex

```
``` {r Alex LME analyses with two timepoints}

library(reshape)
#id.vars = names(m)
#grep("SES", id.vars)
#grep("SEScurr",id.vars)
#id.vars = id.vars[-grep("SES") & -grep("SEScurr"),]
m = melt(mac, na.rm = FALSE, measure.vars = c("SES","SEScurr"),value.name = c("SES"))
plot(m$variable,m$value)
m$SES <- m$value
m$time[m$variable == "SEScurr"] <- "current"
m$time[m$variable == "SES"] <- "high"
m$grp_leth[m$GROUP12467 == 1] <- "HC"
m$grp_leth[m$GROUP12467 == 2] <- "DC"
m$grp_leth[m$GROUP12467 == 4] <- "I"
m$grp_leth[m$GROUP12467 == 6] <- "LL"
m$grp_leth[m$GROUP12467 == 7] <- "HL"
m$grp_leth <- as.factor(m$grp_leth)

# full demo predictors
summary(m1 <- lme4::lmer(SES ~ time + edu*time + age*time + INCOMEcst*time + (1|ID), data = m, na.omit = TRUE))
#stargazer(m1)

summary(m2.0 <- lme4::lmer(SES ~ time + COMMENT*time + (1|ID), data = m))


#  demo predictors + group
summary(m2 <- lme4::lmer(SES ~ time + edu*time + age*time + INCOMEcst*time +  COMMENT*time + (1|ID), data = m))
car::Anova(m2)
anova(m2,m2.0)


summary(m2.1 <- lme4::lmer(SES ~ time + edu*time + age*time + INCOMEcst*time +  COMMENT*time + SubstanceCurrent*time + (1|ID), data = m))
car::Anova(m2.1)
anova(m2, m2.1)
stargazer(m1, m2.0, m2, m2.1, type="html", out="ses.htm")


stargazer(m1, m2.0, m2, m2.1,  type="html", digits = 1,single.row=TRUE,  star.cutoffs = c(0.05, 0.01, 0.001), report = 'vcs*',
dep.var.labels=c("SES"), covariate.labels=c("Timepoint: highest vs. current","Education",
"Age","Standardized income",   "Timepoint*education", "Timepoint*age", "Timepoint*income",  "Healthy control (vs. attempter)", "Depressed control (vs. attempter)", "Ideator (vs. attempter)", "Current addiction", "Timepoint*Healthy control", "Timepoint*Depressed control", "Timepoint*Ideator", "Timepoint*Current addiction"), out="ses_pretty.htm")


summary(m3 <- lmer(SES ~ edu*time + age*time + INCOMEcst*time + SEX*time
           + RACE*time + grp_leth*time + (1|ID), data = m))
car::Anova(m3)
anova(m2,m3)

m$g <- revalue(m$COMMENT, c("CONTROL"="Healthy controls", "DEPRESSION"="Non-suicidal depressed", "IDEATOR" = "Suicide ideators", "ATTEMPTER" = "Suicide attempters" ))
m$g = factor(m$g,levels(m$g)[c(2:4,1)])
summary(m2.3 <- lme4::lmer(SES ~ time + edu*time + age*time + INCOMEcst*time +  g*time + (1|ID), data = m))

setwd("~/Box Sync/skinner/projects_analyses/Project_status_drop/")
ls2 <- lsmeans(m2.3, "time", by = "g")
ls2z <- lsmeans(m2.3, ~ time|g)
plot(ls2z)


pdf(file = "SES by group.pdf", width = 6, height = 6)
plot(ls2, type ~ SES, horiz=T,xlab = "SES rank (lower rank designates higher standing)", ylab = "Timepoint", comparisons = TRUE)
dev.off()

# make super-beautiful plot
library(multcompView)


leastsquare = lsmeans(m2.3,
                      pairwise ~ time:g,
                      adjust="tukey")
CLD = cld(leastsquare,
          alpha=0.05,
          Letters=letters,
          adjust="tukey")
###  Remove spaces in .group  

CLD$.group=gsub(" ", "", CLD$.group)
CLD$Group <- CLD$g


### Plot
pdf(file = "SES by group PRETTY.pdf", width = 8, height = 6)
pd = position_dodge(0.8)    ### How much to jitter the points on the plot
ggplot(CLD,
       aes(x     = Group,
           y     = lsmean,
           color = time,
           label = .group)) +
    geom_point(shape  = 15,
               size   = 4,
             position = pd) +
    geom_errorbar(aes(ymin  =  lower.CL,
                      ymax  =  upper.CL),
                      width =  0.2,
                      size  =  0.7,
                      position = pd) +
    theme_bw() +
    theme(axis.title   = element_text(face = "bold"),
          axis.text    = element_text(face = "bold"),
          plot.caption = element_text(hjust = 0)) +
    ylab("Least square mean SES \nHigher standing   <=   =>   Lower standing") +
     ggtitle ("Highest and current self-reported socio-economic status (SES) by group",
            subtitle = "Linear mixed-effects model") +
            labs(caption  = paste0("\n", 
                                   "Boxes indicate the LS mean.\n", 
                                   "Error bars indicate the 95% ",
                                    "confidence interval of the LS mean. \n",
                                   "Means sharing a letter are ",
                                   "not significantly different ",
                                   "(Tukey-adjusted comparisons)."),
                            hjust=0.5) +
  # geom_text(nudge_x = c(0.1, -0.1, 0.1, -0.1, 0.1, -0.1, -0.1, 0.1),
  #           nudge_y = c(4.5,  4.5, 4.5,  4.5, 4.5 , 4.5,  4.5, 4.5),
  #           color   = "black") +
  geom_text(color   = "black") +
  scale_color_manual(values = c("blue", "red"))
dev.off()

ls2a <- lsmeans(m2.3, "g", by = "time")
int <- update(pairs(ls2a, by = "g"), by = NULL)
cld(ls2a)
cld(int)
# diff impact of education
#summary(m3 <- lmer(SES ~ edu*time + age*time + INCOMEcst*time + SEX*time
#           + RACE*time + grp_leth*time*edu + (1|ID), data = m))
#car::Anova(m3)
#anova(m2,m3)
#ls3 <- lsmeans(m3, "time", by = c("grp_leth", "edu"), at=list(edu = c(8,17)))
#plot(ls3, type ~ SES, horiz=F,ylab = "SES", xlab = "time", comparisons = TRUE, alpha = 0.05)

summary(m4 <- lmer(SES ~ edu*time + age*time + INCOMEcst*time + regret*time + (1|ID), data = m))
car::Anova(m4)
ls4 <- lsmeans(m4, "time", by = c("regret"), at=list(regret = c(5,20)))
plot(ls4, type ~ SES, horiz=F,ylab = "SES", xlab = "time", comparisons = TRUE, alpha = 0.05)

summary(m5 <- lmer(SES ~ edu*time + age*time + INCOMEcst*time + neurotic*time + (1|ID), data = m))
car::Anova(m5)
ls5 <- lsmeans(m5, "time", by = c("neurotic"), at=list(neurotic = c(10,50)))
plot(ls5, type ~ SES, horiz=T,ylab = "SES", xlab = "time", comparisons = TRUE, alpha = 0.05)

# plot(lsmeansLT(m5))

# check for psychological predictor colinearity
predictors <- mac[,c(26, 66, 108, 109)]
cormat <- (cor(predictors, use="complete.obs"))
order <- corrMatOrder(cormat, order="AOE")
rcormat <- cormat[order,order]
corrplot(rcormat)
corrplot(rcormat,type = "upper", tl.pos = "td", cl.lim=c(-1,1),
         method = "circle", tl.cex = 1, tl.col = 'black',
         order = "hclust", diag = FALSE, 
         addCoef.col="black", addCoefasPercent = FALSE,
        p.mat = 1-rcormat, sig.level=0.3, insig = "blank")

# substance lifetime
summary(m6 <- lmer(SES ~ edu*time + age*time + INCOMEcst*time + SubstanceCurrent*time + (1|ID), data = m))

anova(m1,m6)

#lifetime substance less predictive than current, but both only of intercept
summary(m7 <- lmer(SES ~ edu*time + age*time + INCOMEcst*time + SEX*time
           + RACE*time + SubstanceLifetime*time + (1|ID), data = m))

# no effect of WTAR
summary(m8 <- lmer(SES ~ edu*time + age*time + INCOMEcst*time + SubstanceLifetime*time + WTAR*time + (1|ID), data = m))
anova(m6,m8)


# impute neuroticism


library(mice)
library(mitml)
Data <- subset(m, select=c(ID, SEX,edu,SES,time, age, group,DOILIFETIME,AnxietyLifetime,SubstanceLifetime,COMMENT,
                             SubstanceCurrent,INCOMEcst,neurotic,extrav,open,agree,consc,
                             BISNONPLAN,BISMOTOR,BISCOGNIT,NegUrg,PosUrg,LackPremed,LackPersev,
                             SocPSimp,SocPS,IIPins,IIPambiv,IIPagg,ARS,WTARRAW,EXITTOTAL,
                             iriPT,iriF,iriEC,iriPD,roles,people,regret))
ini <- mice(Data, maxit=0, pri=F) #get predictor matrix
pred <- ini$pred
    pred[,c("group","COMMENT", "SubstanceCurrent","SES","INCOMEcst","neurotic","extrav","open","agree","consc","regret")] <- 0 #don't use as predictor
    meth <- ini$meth
meth[c("ID", "SEX", "age", "DOILIFETIME","AnxietyLifetime","SubstanceLifetime",
                             "BISNONPLAN","BISMOTOR","BISCOGNIT","NegUrg","PosUrg","LackPremed","LackPersev",
                             "SocPSimp","SocPS","IIPins","IIPambiv","IIPagg","ARS","WTARRAW","EXITTOTAL",
                             "iriPT","iriF","iriEC","iriPD","roles","people")] <- "" #don't impute these variables, use only as predictors.
n <- mice(Data, m=22, maxit=10, printFlag=TRUE, pred=pred) #impute Data with 22 imputations and 10 iterations. 


## new analyses of regret and neuroticism based on Aidan's suggestions of multivariate
# just regret, take the non-missing neurotics
m8r <- with(n,lme4::lmer(SES ~ edu*time + age*time + INCOMEcst*time + regret*time + SubstanceCurrent*time + (1|ID)))
summary(est8r <- pool(m8r)) #pool my results

# just neuroticism
m8n <- with(n,lme4::lmer(SES ~ edu*time + age*time + INCOMEcst*time + neurotic*time + SubstanceCurrent*time + (1|ID)))
summary(est8n <- pool(m8n)) #pool my results


# regret and neuroticism
m8rn <- with(n,lme4::lmer(SES ~ edu*time + age*time + INCOMEcst*time + regret*time + neurotic*time + SubstanceCurrent*time + (1|ID)))
summary(est8rn <- pool(m8rn)) #pool my results


# add group
m8rng <- with(n,lme4::lmer(SES ~ edu*time + age*time + INCOMEcst*time + regret*time + neurotic*time + SubstanceCurrent*time + COMMENT*time + (1|ID)))
summary(est8rng <- pool(m8rng)) #pool my results


# add DOI -- does not hold
summary(m8rnd <- lme4::lmer(SES ~ edu*time + age*time + INCOMEcst*time + regret*time +  neurotic*time +SubstanceCurrent*time + DOILIFETIME*time + (1|ID), data = m[m$neurotic!="NA",]))
car::Anova(m8rnd)


# 
#stargazer(m8r, m8n, m8rn,  type="html", digits = 2,single.row=TRUE,  star.cutoffs = c(0.05, 0.01, 0.001), report = 'vcs*',
#dep.var.labels=c("SES"),  out="ses_regneuro.htm")


stargazer(est8r, est8n, est8rn,  type="html", digits = 2,single.row=TRUE,  star.cutoffs = c(0.05, 0.01, 0.001), report = 'vcs*',
dep.var.labels=c("SES"), covariate.labels=c("Education","Timepoint: highest vs. current",
"Age","Standardized income",  "Regret-proneness", "Neuroticism","Current addiction", "Timepoint*education", "Timepoint*age", "Timepoint*income","Timepoint*regret","Timepoint*neuroticism", "Timepoint*addiction"), out="ses_regneuro.htm")


# DOI?
summary(m8a <- lmer(SES ~ edu*time + age*time + INCOMEcst*time  + SubstanceCurrent*time + DOILIFETIME*time + (1|ID), data = m))
car::Anova(m8a)
ls8a <- lsmeans(m8a, "time", by = c("DOILIFETIME"), at=list(DOILIFETIME = c(0.26,0.61)))
plot(ls8a, type ~ SES, horiz=F,ylab = "SES", xlab = "time", comparisons = TRUE, alpha = 0.05)

summary(m8b <- lmer(SES ~ edu*time + age*time + INCOMEcst*time + regret*time + SubstanceCurrent*time + DOILIFETIME*time + (1|ID), data = m))
car::Anova(m8b)

describe(mac$SES)
# no effect of lethality
summary(m9 <- lmer(SES ~ LETHALITYMOSTLETHAL*time + (1|ID), data = m))

# intent-planning
summary(m10 <- lmer(SES ~ SINTMOSTLETHALPLAN*time +  (1|ID), data = m))

##  show that attempters' highest status is too high after accounting for education 
# first get residual of highest after covarying out demo
summary(him1 <- lm(SES ~ edu + age + WTAR, data = mac))
mac$r.hi <- residuals(him1)
summary(hirm2 <- lm(r.hi ~ COMMENT, data = mac))
# it's not really higher, maybe a bit for SA
boxplot(r.hi ~ COMMENT, data = mac, main = "Group Differences in Highest Status corrected for age, education, IQ", 
  	xlab = "Group", ylab = "Status", varwidth = TRUE, col =  cm.colors(3))

# sensitivity analyses to r/o effects of differential drop scaling

summary(sm1 <- glm(drop ~ COMMENT, family = binomial(link = "logit"), data = mac))
summary(sm2 <- glm(drop ~  age + COMMENT, family = binomial(link = "logit"), data = mac))
anova(sm1,sm2)

summary(sm3 <- glm(drop ~  age + COMMENT + INCOMEcst + edu + SubstanceCurrent, family = binomial(link = "logit"), data = mac))
anova(sm3)

# sensitivity analyses excluding recoded and unclear cases
# mo = m original, excluding recoded and unclear cases
mo <- m[m$originals,]
# redo the best-fitting model: identical results
summary(mo2 <- lmerTest::lmer(SES ~ time + edu*time + age*time + INCOMEcst*time +  COMMENT*time + SubstanceCurrent*time + (1|ID), data = mo))
car::Anova(mo2)

# take the log of SES to take care of distortion (lower current SES allows for larger drop)
summary(m2log <- lmerTest::lmer(log(SES + 1) ~ time + edu*time + age*time + INCOMEcst*time +  COMMENT*time + (1|ID), data = mo))
car::Anova(m2log)

# check hi/current SES by group in ANOVAs
summary(mhi <- lm(SES ~ COMMENT, data = mac))
anova(mhi)
summary(mcurr <- lm(SEScurr ~ COMMENT, data = mac))
anova(mcurr)
lshi <- lsmeans(mhi,"COMMENT")
pairs(lshi)

lscurr <- lsmeans(mcurr,"COMMENT")
pairs(lscurr)

# does drop predict SA status above and beyond current SES?

SAvsDC <-  mac[mac$group==2 | mac$group==5,]
nonHC <- mac[mac$group>1,]
summary(SApredict.drop <- glm(COMMENT ~  SEScurr +r.drop + SEX + RACE +  WTARRAW + SubstanceLifetime,  family = binomial(link = "logit"), data = SAvsDC))
car::Anova(SApredict.drop)

summary(SApredict.curr <- glm(COMMENT ~ SEScurr + age + edu + SEX + RACE + INCOMEcst + WTARRAW + SubstanceLifetime, family = binomial(link = "logit"), data = SAvsDC))
car::Anova(SApredict.curr)

anova(SApredict.curr,SApredict.drop, test="Chisq")
stargazer(SApredict.drop, type="html", digits = 2,single.row=TRUE,  star.cutoffs = c(0.05, 0.01, 0.001), report = 'vcs*',
dep.var.labels=c("Group: attempters vs. non-suicidal depressed"), covariate.labels=c("Current SES","SES drop, residualized for current SES",
  "Sex", "Race: Asian Pacific", "Race: White", "IQ (WTAR raw score)", "Addiction"), out="grp_pred_ses_drop.html")


# check SES intercorrelations
boxplot(SES ~ time, data = m, main = "Change in SES", 
  	xlab = "Time point", ylab = "Status", varwidth = TRUE, col =  cm.colors(3))
ses <- mac[,c(16,17,25)]
cormat <- (cor(ses))
order <- corrMatOrder(cormat, order="AOE")
rcormat <- cormat[order,order]
corrplot(rcormat)

# with log-transform
seslog <- log(mac[,c(16,17,25)]+1)
cormat <- (cor(seslog))
order <- corrMatOrder(cormat, order="AOE")
rcormat <- cormat[order,order]
corrplot(rcormat)

cor(mac$edu,mac$SEScurr)
cor(mac$edu,mac$SES)
cor(mac$edu,mac$SESdrop)

# in attempters and ideators
sui <- mac[mac$suicidal==1,]


cor(sui$edu,sui$SEScurr)
cor(sui$edu,sui$SES)
cor(sui$edu,sui$SESdrop)

# check regret
summary(mm1 <- lm(mac$regret~mac$COMMENT))

# age at first attempt
summary(am1 <- lm(mac$SES ~ mac$LETHALITYMOSTRECENT))

```