-
Notifications
You must be signed in to change notification settings - Fork 1
/
analysis_HT_20180221.R
executable file
·181 lines (102 loc) · 7.71 KB
/
analysis_HT_20180221.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
#################
# #
# HT INSIGHT #
# 21 FEB 2018 #
# #
#################
rm(list = ls())
library(lme4)
library(MASS)
setwd("/Users/nsircar/Dropbox/Papers/HTAnalysis/")
dat <- read.csv("/Users/nsircar/Dropbox/Papers/HTAnalysis/MPLADS_ADR_PRS_15thLS.csv", stringsAsFactors = F)
## Measures of Unspent Funds
unspent <- 1 - dat$Expenditure/dat$AvailablewInterest
unspent[unspent < 0] <- 0
unspent0 <- as.numeric(unspent == 0) ## All Funds Spent
unspent5 <- as.numeric(unspent < .05 ) ## Spent More than 95%
unspent10 <- as.numeric(unspent < .1) ## Spent More than 90%
unspent20 <- as.numeric(unspent < .2) ## Spent More than 80%
invlogit <- function(x) 1/(1 + exp(-x)) # Inverse of Logit Function
sc <- as.numeric(dat$Category == "SC") ## SC Constituency
st <- as.numeric(dat$Category == "ST") ## ST Constituency
pg <- as.numeric(dat$Educational.qualifications %in% c("Post Graduate", "Doctorate")) # Postgraduate Degree
crim <- as.numeric(dat$MajorCriminal == 1) ## Indicator of Major Criminal Cases (ADR)
state <- as.numeric(as.factor(dat$State)) ## State
party <- as.numeric(as.factor(paste(dat$Political.party, dat$State)))
logmoveable <- log10(dat$Moveable) ## Log (base 10) of moveable assets
keepval <- which(logmoveable > 0 & !is.na(logmoveable)) ## Subset of data to analyze
margin <- dat$MOV ## Margin of Victory
fit1 <- glmer(unspent0[keepval]~logmoveable[keepval] + crim[keepval] + pg[keepval] + (1|party[keepval]), family = binomial(link = "logit")) ## Estimating Probability of Spending ALL Funds
fit2 <- glmer(unspent10[keepval]~logmoveable[keepval] + crim[keepval] + pg[keepval] + (1|party[keepval]), family = binomial(link = "logit")) ## Estimating Probability of Spending >90% Funds
fit3 <- glmer(unspent5[keepval]~logmoveable[keepval] + crim[keepval] + pg[keepval] + (1|party[keepval]), family = binomial(link = "logit")) ## Estimating Probability of Spending >95% Funds
coef.sims <- mvrnorm(1000, summary(fit3)$coef[,1], summary(fit3)$vcov) ## Simulation from Last Model
## Estimated Probability of Effective Spending -- 10 Lakh and 50 Lakh Moveable Wealth
xval <- logmoveable[keepval][log10(1000000) <= logmoveable[keepval] & log10(5000000) >= logmoveable[keepval]]
xpred <- cbind(rep(1, length(xval)), xval, rep(mean(crim[keepval]), length(xval)), rep(mean(pg[keepval]), length(xval)))
mean(colMeans(invlogit((xpred %*% t(coef.sims)))))
## Estimated Probability of Effective Spending -- Greater than 1 Crore Moveable Wealth
xval <- logmoveable[keepval][log10(10000000) <= logmoveable[keepval] ]
xpred <- cbind(rep(1, length(xval)), xval, rep(mean(crim[keepval]), length(xval)), rep(mean(pg[keepval]), length(xval)))
mean(colMeans(invlogit((xpred %*% t(coef.sims)))))
crimval <- cbind(rep(1,2), rep(mean(logmoveable[keepval]), 2), c(0,1), rep(mean(pg[keepval]), 2)) ## X values for Crim Case Simulation -- Mean moveable assets, mean PG completion
pgval <- cbind(rep(1,2), rep(mean(logmoveable[keepval]), 2), rep(mean(crim[keepval]), 2), c(0,1)) ## X values for PG degree Simulation -- Mean moveable assets, mean criminality
## GENERATE FIGURE 1
vals <- c(apply(invlogit((pgval %*% t(coef.sims))), 1, mean), apply(invlogit((crimval %*% t(coef.sims))), 1, mean)) ## Predicted Values from Simulations
vals.ci <- cbind(apply(invlogit((pgval %*% t(coef.sims))), 1, quantile, c(.05, .95)), apply(invlogit((crimval %*% t(coef.sims))), 1, quantile, c(.05, .95))) ## 90% intervals from simulations
pdf("/Users/nsircar/Dropbox/Papers/HTanalysis/unspent5_fig1.pdf", width=8, height=6)
par(mar = rep(0,4), mai = rep(0,4))
plot(0,0, xlim = c(-.85,7), ylim = c(-.08, .7), ann = F, axes = F, type = "n")
x.coords <- barplot(vals, col = c(rgb(1,125/255,125/255), rgb(1,125/255,125/255), rgb(102/255,178/255,1), rgb(102/255,178/255,1)), space = c(.5,.5,1,.5), axes = F, add = T )
axis(2, at=seq(0,.7,.1), labels = seq(0,.7,.1), pos = 0)
cats <- c("Graduate\nor Less", "Post\nGraduate", "Not Major\nCriminal", "Major\nCriminal")
cols <- c(rgb(204/255,0,0), rgb(204/255,0,0), rgb(0,0,204/255), rgb(0,0,204/255))
for (i in 1:4){
text(x.coords[i],-.05, cats[i], cex = 1.2)
arrows(x.coords[i], vals.ci[1,i], x.coords[i], vals.ci[2,i], col = cols[i], code = 3, angle = 90, length = .07, lwd = 2)
text(x.coords[i]+.3, vals[i], round(vals[i], 2), pos = 3, cex = 1.5)
}
text(-.83, .35, "Estimated Probability of 95%+ MPLADS Utilisation", srt=90, cex = 1.2)
dev.off()
cutpoints <- quantile(logmoveable[keepval], seq(0,1,.05)) ## Generate Cutpoints for "Bins" every 5 Percentile Points
vec <- seq(5,8, .01) ## Values over which Simulated (Log) Moveable Assets Displayed
assetval <- cbind(rep(1, length(vec)), vec, rep(mean(crim[keepval]), length(vec)), rep(mean(pg[keepval]), length(vec))) ## Mean levels of PG degree completion and criminality
X <- cbind(rep(1, length(unspent5)), logmoveable, crim, pg)[keepval, ] ## Predictor Matrix
## Generating Binned Values. Bins refer to interval on the x axis between 2 cutpoints.
## An observation is said to be in a bin if its x value falls between cutpoints of that bin.
## Binned X value = midpoint between two cutpoints
## Binned Y value = Model prediction + mean error of prediction of all observations in a bin
## The reason to do binning is that the true Y take 0 and 1 values, this procedure allows us to assess the predicted probability by taking the expectation in a bin
xbin5 <- ybin5 <- error <- rep(NA, length(cutpoints) - 1) ## xbin5 = Binned X value; ybin5 = Binned Y value
for (i in 2:length(cutpoints)){
xbin5[i-1] <- mean(cutpoints[(i-1):i])
xval <- logmoveable[keepval][(logmoveable[keepval] > cutpoints[i-1]) & (logmoveable[keepval] <= cutpoints[i]) ]
yval <- unspent5[keepval][(logmoveable[keepval] > cutpoints[i-1]) & (logmoveable[keepval] <= cutpoints[i]) ]
estval <- invlogit(X[(logmoveable[keepval] > cutpoints[i-1]) & (logmoveable[keepval] <= cutpoints[i]), ] %*% fixef(fit3))
error[i-1] <- mean(yval - estval)
fitval <- mean(invlogit(assetval[(assetval[,2] > cutpoints[i-1]) & (assetval[,2] <= cutpoints[i]), ] %*% fixef(fit3)))
ybin5[i-1] <- fitval + error[i-1]
}
vals <- apply(invlogit((assetval %*% t(coef.sims))), 1, mean) ## Predicted Value from Simulations
vals.ci <- apply(invlogit((assetval %*% t(coef.sims))), 1, quantile, c(.05, .95)) ## 90% Intervals from Simulations
colsline <- rgb(153/255,255/255,153/255)
colsfill <- rgb(0,102/255,0)
pdf("/Users/nsircar/Dropbox/Papers/HTanalysis/unspent5_fig2.pdf", width=8, height=6)
par(mar = rep(0,4), mai = rep(0,4))
plot(0,0, xlim = c(min(vec) - .5, max(vec)+.1), ylim = c(-.2, 1), ann =F, axes = F, type = "n")
polygon(c(vec, rev(vec)), c(vals.ci[1,], rev(vals.ci[2,])), col = colsline,border = NA ) ## Colored Interval Band
points(vec, vals, lwd = 2.5, type = "l", col = colsfill) ## Predicted Value Curve
points(xbin5[2:19], ybin5[2:19], pch=19, col = colsfill, cex = 1.5) ## Plotting Points for Binned X and Y values
axis(1, at = seq(5, 8, 1), labels = c("1 Lakh", "10 Lakh", "1 Crore", "10 Crore"), pos=0) ## Axis on a Log (base 10) scale
axis(2, at = seq(0,1,.2), pos = 5)
text(6.5, -.18, "Reported Candidate Asset Wealth (Moveable)", cex = 1.2)
text(4.55, .5, "Estimated Probability of 95%+ MPLADS Utilisation", cex = 1.2, srt = 90)
dev.off()
## Unspent Funds by Party
partywise <- aggregate(unspent5[keepval], by = list(dat$Political.party[keepval]), length)
o <- order(partywise[,2])
## Consider parties with 15 seats and above
partyval <- partywise[o,][27:33,1]
keepval2 <- intersect(keepval, which(dat$Political.party %in% partyval))
partywise2 <- aggregate(unspent5[keepval2], by = list(dat$Political.party[keepval2]), mean)
partywise_mean <- aggregate(unspent5[keepval], by = list(dat$Political.party[keepval]), mean)[o,]
partywise2 ## Percentage of MPs spending effectively by party (> 15 seats)