forked from UW-POLS503/assignment-2017-2
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathREADME.tex
1000 lines (867 loc) · 41.2 KB
/
README.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
\documentclass[]{article}
\usepackage{lmodern}
\usepackage{amssymb,amsmath}
\usepackage{ifxetex,ifluatex}
\usepackage{fixltx2e} % provides \textsubscript
\ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex
\usepackage[T1]{fontenc}
\usepackage[utf8]{inputenc}
\else % if luatex or xelatex
\ifxetex
\usepackage{mathspec}
\else
\usepackage{fontspec}
\fi
\defaultfontfeatures{Ligatures=TeX,Scale=MatchLowercase}
\fi
% use upquote if available, for straight quotes in verbatim environments
\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
% use microtype if available
\IfFileExists{microtype.sty}{%
\usepackage{microtype}
\UseMicrotypeSet[protrusion]{basicmath} % disable protrusion for tt fonts
}{}
\usepackage[margin=1in]{geometry}
\usepackage{hyperref}
\hypersetup{unicode=true,
pdftitle={POLS 503: Assigment 2},
pdfborder={0 0 0},
breaklinks=true}
\urlstyle{same} % don't use monospace font for urls
\usepackage{color}
\usepackage{fancyvrb}
\newcommand{\VerbBar}{|}
\newcommand{\VERB}{\Verb[commandchars=\\\{\}]}
\DefineVerbatimEnvironment{Highlighting}{Verbatim}{commandchars=\\\{\}}
% Add ',fontsize=\small' for more characters per line
\usepackage{framed}
\definecolor{shadecolor}{RGB}{248,248,248}
\newenvironment{Shaded}{\begin{snugshade}}{\end{snugshade}}
\newcommand{\KeywordTok}[1]{\textcolor[rgb]{0.13,0.29,0.53}{\textbf{#1}}}
\newcommand{\DataTypeTok}[1]{\textcolor[rgb]{0.13,0.29,0.53}{#1}}
\newcommand{\DecValTok}[1]{\textcolor[rgb]{0.00,0.00,0.81}{#1}}
\newcommand{\BaseNTok}[1]{\textcolor[rgb]{0.00,0.00,0.81}{#1}}
\newcommand{\FloatTok}[1]{\textcolor[rgb]{0.00,0.00,0.81}{#1}}
\newcommand{\ConstantTok}[1]{\textcolor[rgb]{0.00,0.00,0.00}{#1}}
\newcommand{\CharTok}[1]{\textcolor[rgb]{0.31,0.60,0.02}{#1}}
\newcommand{\SpecialCharTok}[1]{\textcolor[rgb]{0.00,0.00,0.00}{#1}}
\newcommand{\StringTok}[1]{\textcolor[rgb]{0.31,0.60,0.02}{#1}}
\newcommand{\VerbatimStringTok}[1]{\textcolor[rgb]{0.31,0.60,0.02}{#1}}
\newcommand{\SpecialStringTok}[1]{\textcolor[rgb]{0.31,0.60,0.02}{#1}}
\newcommand{\ImportTok}[1]{#1}
\newcommand{\CommentTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textit{#1}}}
\newcommand{\DocumentationTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}}
\newcommand{\AnnotationTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}}
\newcommand{\CommentVarTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}}
\newcommand{\OtherTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{#1}}
\newcommand{\FunctionTok}[1]{\textcolor[rgb]{0.00,0.00,0.00}{#1}}
\newcommand{\VariableTok}[1]{\textcolor[rgb]{0.00,0.00,0.00}{#1}}
\newcommand{\ControlFlowTok}[1]{\textcolor[rgb]{0.13,0.29,0.53}{\textbf{#1}}}
\newcommand{\OperatorTok}[1]{\textcolor[rgb]{0.81,0.36,0.00}{\textbf{#1}}}
\newcommand{\BuiltInTok}[1]{#1}
\newcommand{\ExtensionTok}[1]{#1}
\newcommand{\PreprocessorTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textit{#1}}}
\newcommand{\AttributeTok}[1]{\textcolor[rgb]{0.77,0.63,0.00}{#1}}
\newcommand{\RegionMarkerTok}[1]{#1}
\newcommand{\InformationTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}}
\newcommand{\WarningTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}}
\newcommand{\AlertTok}[1]{\textcolor[rgb]{0.94,0.16,0.16}{#1}}
\newcommand{\ErrorTok}[1]{\textcolor[rgb]{0.64,0.00,0.00}{\textbf{#1}}}
\newcommand{\NormalTok}[1]{#1}
\usepackage{graphicx,grffile}
\makeatletter
\def\maxwidth{\ifdim\Gin@nat@width>\linewidth\linewidth\else\Gin@nat@width\fi}
\def\maxheight{\ifdim\Gin@nat@height>\textheight\textheight\else\Gin@nat@height\fi}
\makeatother
% Scale images if necessary, so that they will not overflow the page
% margins by default, and it is still possible to overwrite the defaults
% using explicit options in \includegraphics[width, height, ...]{}
\setkeys{Gin}{width=\maxwidth,height=\maxheight,keepaspectratio}
\IfFileExists{parskip.sty}{%
\usepackage{parskip}
}{% else
\setlength{\parindent}{0pt}
\setlength{\parskip}{6pt plus 2pt minus 1pt}
}
\setlength{\emergencystretch}{3em} % prevent overfull lines
\providecommand{\tightlist}{%
\setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}
\setcounter{secnumdepth}{0}
% Redefines (sub)paragraphs to behave more like sections
\ifx\paragraph\undefined\else
\let\oldparagraph\paragraph
\renewcommand{\paragraph}[1]{\oldparagraph{#1}\mbox{}}
\fi
\ifx\subparagraph\undefined\else
\let\oldsubparagraph\subparagraph
\renewcommand{\subparagraph}[1]{\oldsubparagraph{#1}\mbox{}}
\fi
%%% Use protect on footnotes to avoid problems with footnotes in titles
\let\rmarkdownfootnote\footnote%
\def\footnote{\protect\rmarkdownfootnote}
%%% Change title format to be more compact
\usepackage{titling}
% Create subtitle command for use in maketitle
\newcommand{\subtitle}[1]{
\posttitle{
\begin{center}\large#1\end{center}
}
}
\setlength{\droptitle}{-2em}
\title{POLS 503: Assigment 2}
\pretitle{\vspace{\droptitle}\centering\huge}
\posttitle{\par}
\author{}
\preauthor{}\postauthor{}
\predate{\centering\large\emph}
\postdate{\par}
\date{2017-04-21}
\usepackage{amsmath}
\DeclareMathOperator{\sd}{sd}
\begin{document}
\maketitle
This assignment works through an example in Yule (1899):
Yule (1899) is a published example multiple regression analysis in its
modern form.\footnote{See Freedman (1997), Stigler (1990), Stigler
(2016), and Plewis (2017) for discussions of Yule (1899).}
Yule wrote this paper to analyze the effect of policy changes and
implementation on pauperism (poor receiving benefits) in England under
the \href{https://en.wikipedia.org/wiki/English_Poor_Laws}{English Poor
Laws}. In 1834, a new poor law was passed that established a national
welfare system in England and Wales. The New Poor Law created new
administrative districts (Poor Law Unions) to adminster the law. Most
importantly, it attempted to standardize the provision of aid to the
poor. There were two types of aid provided: in-relief or aid provided to
paupers in workhouses where they resided, and out-relief or aid provided
to paupers residing at home. The New Poor Law wanted to decrease
out-relief and increase in-relief in the belief that in-relief, in
particular the quality of life in workhouses, was a deterrence to
poverty and an encouragement for the poor to work harder to avoid
poverty.
Yule identifies that there are various potential causes of the change in
rate of pauperism, including changes in the (1) law, (2) economic
conditions, (3) general social character, (4) moral character, (5) age
distribution of the population (pg. 250).
He astutely notes the following:
\begin{quote}
If, for example, we should find an increase in the proportion of
out-relief associated with (1) an increase in the proportion of the aged
to the whole population, and also (2) an increase in the rate of
pauperism, it might be legitimate to interpret the result in the sense
that changes in out-relief and pauperism were merely simultaneous
concomitants of changes in the proportion of aged-the change of
pauperism not being a direct consequence of the change of
administration, but both direct consequenices of the change in age
distribution. It is evidently most important that we should be able to
decide between two such differenit ilnterpretations of the same facts.
This the method I have used is perfectly competernt to do --- Yule (1899
pg. 250)
\end{quote}
\section{Setup}\label{setup}
\begin{Shaded}
\begin{Highlighting}[]
\KeywordTok{library}\NormalTok{(}\StringTok{"tidyverse"}\NormalTok{)}
\KeywordTok{library}\NormalTok{(}\StringTok{"modelr"}\NormalTok{)}
\CommentTok{# devtools::install_github("jrnold/resamplr")}
\KeywordTok{library}\NormalTok{(}\StringTok{"resamplr"}\NormalTok{)}
\end{Highlighting}
\end{Shaded}
While only a subset of the original data of Yule (1899) was printed in
the article itself, Plewis (2015) reconstructed the orginal data and
Plewis (2017) replicated the original paper. This data is included in
the package \textbf{datums}. This package is not on CRAN, but can be
downloaded from github. \textbf{IMPORTANT} install the latest version of
\textbf{datums} since a few fixes were recently made to the
\texttt{pauperism} dataset.
\begin{Shaded}
\begin{Highlighting}[]
\CommentTok{# devtools::install_github("jrnold/datums")}
\KeywordTok{library}\NormalTok{(}\StringTok{"datums"}\NormalTok{)}
\end{Highlighting}
\end{Shaded}
The data for Yule (1899) is split into two data frames:
\texttt{pauperism\_plu} contains data on the Poor Law Unions (PLU), and
\texttt{pauperism\_year}, panel data with the PLU-year as the unit of
observation.
\begin{Shaded}
\begin{Highlighting}[]
\NormalTok{pauperism <-}
\StringTok{ }\KeywordTok{left_join}\NormalTok{(datums}\OperatorTok{::}\NormalTok{pauperism_plu, datums}\OperatorTok{::}\NormalTok{pauperism_year,}
\DataTypeTok{by =} \StringTok{"ID"}\NormalTok{) }\OperatorTok{%>%}
\StringTok{ }\KeywordTok{mutate}\NormalTok{(}\DataTypeTok{year =} \KeywordTok{as.character}\NormalTok{(year))}
\end{Highlighting}
\end{Shaded}
The data consist of 599 PLUs and the years: 1871, 1881, 1891 (years in
which there was a UK census).
Yule (1899) is explcitly using regression for causal inference. The
outcome variable of interest is:
\begin{itemize}
\tightlist
\item
\textbf{Pauperism} the percentage of the population in receipt of
relief of any kind, less lunatics and vagrants
\end{itemize}
The treatment (policy intervention) is the ration of numbers receiving
outdoor relief to those receiving indoor relief.
\begin{itemize}
\tightlist
\item
\textbf{Out-Relief Ratio:} the ratio of numbers relieved outdoors to
those relieved indoors
\end{itemize}
He will control for two variables that may be associated with the
treatment
\begin{itemize}
\tightlist
\item
\textbf{Proportion of Old:} the proportion of the aged (65 years) to
the whole population since the old are more likely to be poor.
\item
\textbf{Population:} in particular changes in population that may be
proxying for changes in the economic, social, or moral factors of
PLUs.
\end{itemize}
There is also \textbf{Grouping of Unions}, which is a locational
classification based on population density that consists of Rural,
Mixed, Urban, and Metropolitan.
Instead of taking differences or percentages, Yule worked with ``percent
ratio differences'', \(100 \times \frac{x_{t}}{x_{t-1}}\), because he
did not want to work with negative signs, presumably a concern at the
because he was doing arithmetic by hand and this would make calculations
more tedious or error-prone.
\subsection{Original Specification}\label{original-specification}
Run regressions of \texttt{pauper} using the yearly level data with the
following specifications. In Yule (1899), the regressions are
\begin{itemize}
\tightlist
\item
\emph{M1:}
\texttt{paupratiodiff\ \textasciitilde{}\ outratiodiff\ +\ year\ +\ Type}
\item
\emph{M2:}
\texttt{paupratiodiff\ \textasciitilde{}\ outratiodiff\ +\ (popratiodiff\ +\ oldratiodiff)\ *\ (year\ +\ Type)}
\item
\emph{M3:}
\texttt{-1\ \ +\ paupratiodiff\ \textasciitilde{}\ (outratiodiff\ +\ popratiodiff\ +\ oldratiodiff)\ *\ (year\ +\ Type)}
\item
\emph{M4:}
\texttt{paupratiodiff\ \textasciitilde{}\ (outratiodiff\ +\ popratiodiff\ +\ oldratiodiff)\ *\ (year\ +\ Type)}
\end{itemize}
\begin{enumerate}
\def\labelenumi{\arabic{enumi}.}
\item
Present the regressions results in a regression table
\item
Interpret the coefficients for \texttt{outratiodiff} for each model.
\item
Write the equations for each or all models, and describe the model
with a sentence or two. Try to be as concise as possible. Look at
recent journal articles for examples of the wording and format.
\item
What is the difference between \emph{M3} and \emph{M4}. What are the
pros and cons of each parameterization?
\item
Conduct F-tests on the hypotheses:
\item
All interactions in \emph{M4} are 0
\item
The coefficients on \texttt{outratiodiff} in \emph{M4} are the same
across years
\item
The coefficients on \texttt{outratiodiff} in \emph{M4} are the same
across PLU Types
\item
The coefficients on \texttt{outratiodiff} in \emph{M4} are the same
across PLU Types and years.
\end{enumerate}
You can conduct F-tests with the function
\texttt{anova(mod\_unrestricted,\ mod\_restricted)}.
\begin{enumerate}
\def\labelenumi{\arabic{enumi}.}
\setcounter{enumi}{4}
\tightlist
\item
Calculate the predicted value and confidence interval for the PLU with
the median value of \texttt{outratiodiff}, \texttt{popratiodiff}, and
\texttt{oldratiodiff} in each year and PLU Type for these models. Plot
the predicted value and confidence interval of these as point-ranges.
\item
As previously, calculate the predicted value of the median PLU in each
year and PLU Type. But instead of confidence intervals include the
prediction interval. How do the confidence and prediction intervals
differ? What are their definitions?
\end{enumerate}
\subsection{Functional Forms}\label{functional-forms}
The regression line of the model estimated in Yule (1899) (ignoring the
year and region terms and interactions) can be also written as \[
\begin{aligned}[t]
100 \times \frac{\mathtt{pauper2}_t / \mathtt{Popn2_t}}{\mathtt{pauper2}_{t-1} / \mathtt{Popn2_{t-1}}}
&= \beta_0 + \beta_1 \times 100 \times \frac{\mathtt{outratio}_t}{\mathtt{outratio_{t-1}}} \\
& \quad + \beta_2 \times 100 \times \frac{\mathtt{Popn65}_t / \mathtt{Popn2}_{t}}{\mathtt{Popn65}_{t-1} / \mathtt{Popn2}_{t-1}} + \beta_3 \times 100 \times \frac{\mathtt{Popn2}_t}{\mathtt{Popn2}_{t - 1}}
\end{aligned}
\]
\begin{enumerate}
\def\labelenumi{\arabic{enumi}.}
\tightlist
\item
Write a model that includes only the log differences
(\(\log(x_t) - \log(x_{t - 1})\)) with only the \texttt{pauper2},
\texttt{outratio}, \texttt{Popn2}, and \texttt{Popn65} variables.
\item
Estimate the model with logged difference predictors, Year, and month
and interpret the coefficient on \(\log(outratio_t)\).
\item
What are the pros and cons of this parameterization of the model
relative to the one in Yule (1899)? Focus on interpretation and the
desired goal of the inference rather than the formal tests of the
regression. Can you think of other, better functional forms?
\end{enumerate}
\subsection{Non-differenced Model}\label{non-differenced-model}
Suppose you estimate the model (\emph{M5}) without differencing,
\begin{verbatim}
pauper2 ~ outratio + (Popn2 + Prop65) * (year + Type)
\end{verbatim}
\begin{itemize}
\tightlist
\item
Interpret the coefficient on \texttt{outratio}. How is this different
than model \emph{M2}?
\item
What accounts for the different in sample sizes in \emph{M5} and
\emph{M2}?
\item
What model do you think will generally have less biased estimates of
the effect of out-relief on pauperism: \emph{M5} or \emph{M2}? Explain
your reasoning.
\end{itemize}
\subsection{Substantive Effects}\label{substantive-effects}
Read Gross (2014) and McCaskey and Rainey (2015). Use the methods
described in those papers to assess the substantive effects of out-ratio
on the rate of pauperism. Use the model(s) of your choosing.
\subsection{Influential Observations and
Outliers}\label{influential-observations-and-outliers}
\subsubsection{Influential Observations for the
Regression}\label{influential-observations-for-the-regression}
For this use \emph{M2}:
\begin{enumerate}
\def\labelenumi{\arabic{enumi}.}
\tightlist
\item
For each observation, calculate and explain the following:
\end{enumerate}
\begin{itemize}
\tightlist
\item
hat value (\texttt{hatvalues})
\item
standardized error (\texttt{rstandard})
\item
studentized error (\texttt{rstudent})
\item
Cook's distance (\texttt{cooksd})
\end{itemize}
\begin{enumerate}
\def\labelenumi{\arabic{enumi}.}
\setcounter{enumi}{1}
\tightlist
\item
Create an outlier plot and label any outliers. See the example
\href{https://jrnold.github.io/intro-methods-notes/outliers.html\#iver-and-soskice-data}{here}
\item
Using the plot and rules of thumb identify outliers and influential
observations
\end{enumerate}
\subsection{Influential Observations for a
Coefficient}\label{influential-observations-for-a-coefficient}
\begin{enumerate}
\def\labelenumi{\arabic{enumi}.}
\item
Run \emph{M2}, deleting each observation and saving the coefficient
for \texttt{outratiodiff}. This is a method called the jackknife. You
can use a for loop to do this, or you can use the function
\texttt{jackknife} in the package
\href{https://github.com/jrnold/resamplr}{resamplr}.
\begin{enumerate}
\def\labelenumii{\arabic{enumii}.}
\tightlist
\item
For which observations is there the largest change in the
coefficient on \texttt{outratiodiff}?
\item
Which observations have the largest effect on the estimate of
\texttt{outratiodiff}?
\item
How do these observations compare with those that had the largest
effect on the overall regression as measured with Cook's distance?
\item
Compare the results of the jackknife to the \texttt{dfbeta}
statistic for \texttt{outratiodiff}
\end{enumerate}
\item
Aronow and Samii (2015) note that the influence of observations in a
regression coefficient is different than the the influence of
regression observations in the entire regression. Calculate the
observation weights for \texttt{outratiodiff}.
\begin{enumerate}
\def\labelenumii{\arabic{enumii}.}
\tightlist
\item
Regress \texttt{outratiodiff} on the control variables
\item
The weights of the observations are those with the highest squared
errors from this regression. Which observations have the highest
coefficient values?
\item
How do the observations with the highest regression weights compare
with those with the highest changes in the regression coefficient
from the jackknife?
\end{enumerate}
\end{enumerate}
\subsection{Omitted Variable Bias}\label{omitted-variable-bias}
An informal way to assess the potential impact of omitted variables on
the coeficient of the variable of interest is to coefficient variation
when covariates are added as a measure of the potential for omitted
variable bias (Oster 2016). Nunn and Wantchekon (2011) (Table 4)
calculate a simple statistic for omitted variable bias in OLS. This
statistic ``provide{[}s{]} a measure to gauge the strength of the likely
bias arising from unobservables: how much stronger selection on
unobservables, relative to selection on observables, must be to explain
away the full estimated effect.''
\begin{enumerate}
\def\labelenumi{\arabic{enumi}.}
\tightlist
\item
Run a regression without any controls. Denote the coefficient on the
variable of interest as \(\hat\beta_R\).
\item
Run a regression with the full set of controls. Denote the coefficient
on the variable of interest in this regression as \(\hat\beta_F\).
\item
The ratio is \(\hat\beta_F / (\hat\beta_R - \hat\beta_F)\)
\end{enumerate}
Calculate this statistic for \emph{M2} and interpret it.
\subsection{Heteroskedasticity}\label{heteroskedasticity}
\subsubsection{Robust Standard Errors}\label{robust-standard-errors}
\begin{enumerate}
\def\labelenumi{\arabic{enumi}.}
\tightlist
\item
Run \emph{M2} and \emph{M3} with a heteroskedasticity consistent (HAC)
or robust standard error. How does this affect the standard errors on
\texttt{outratio} coefficients? Use the \textbf{sandwich} package to
add HAC standard errors (Zeileis 2004).
\end{enumerate}
\subsubsection{Multiple Regressions}\label{multiple-regressions}
\begin{enumerate}
\def\labelenumi{\arabic{enumi}.}
\item
Run the model with interactions for all years and types
\begin{Shaded}
\begin{Highlighting}[]
\KeywordTok{lm}\NormalTok{(pauper2 }\OperatorTok{~}\StringTok{ }\NormalTok{(outratio }\OperatorTok{+}\StringTok{ }\NormalTok{Popn2 }\OperatorTok{+}\StringTok{ }\NormalTok{Prop65) }\OperatorTok{*}\StringTok{ }\NormalTok{year }\OperatorTok{*}\StringTok{ }\NormalTok{Type }\OperatorTok{-}\StringTok{ }\DecValTok{1}\NormalTok{, }\DataTypeTok{data =}\NormalTok{ pauperism)}
\end{Highlighting}
\end{Shaded}
\item
For each subset of \texttt{year} and \texttt{type} run the regression
\begin{Shaded}
\begin{Highlighting}[]
\KeywordTok{lm}\NormalTok{(pauper2 }\OperatorTok{~}\StringTok{ }\NormalTok{outratio }\OperatorTok{+}\StringTok{ }\NormalTok{Popn2 }\OperatorTok{+}\StringTok{ }\NormalTok{Prop65)}
\end{Highlighting}
\end{Shaded}
\item
Compare the coefficients, standard errors, and regression standard
errors in these regresions.
\end{enumerate}
To run the multiple regressions, save models as a list column
\texttt{mod}, then save the results of \texttt{glance} and \texttt{tidy}
as list columns:
\begin{Shaded}
\begin{Highlighting}[]
\NormalTok{all_interact <-}
\StringTok{ }\KeywordTok{crossing}\NormalTok{(}\DataTypeTok{Type =}\NormalTok{ pauperism}\OperatorTok{$}\NormalTok{Type, }\DataTypeTok{year =} \KeywordTok{c}\NormalTok{(}\DecValTok{1881}\NormalTok{, }\DecValTok{1891}\NormalTok{)) }\OperatorTok{%>%}
\StringTok{ }\KeywordTok{mutate}\NormalTok{(}\DataTypeTok{mod =} \KeywordTok{map2}\NormalTok{(year, Type, }
\ControlFlowTok{function}\NormalTok{(yr, ty) \{}
\KeywordTok{lm}\NormalTok{(paupratiodiff }\OperatorTok{~}\StringTok{ }\NormalTok{outratiodiff }\OperatorTok{+}\StringTok{ }\NormalTok{popratiodiff }\OperatorTok{+}\StringTok{ }\NormalTok{oldratiodiff,}
\DataTypeTok{data =} \KeywordTok{filter}\NormalTok{(pauperism,}
\NormalTok{ year }\OperatorTok{==}\StringTok{ }\NormalTok{yr,}
\NormalTok{ Type }\OperatorTok{==}\StringTok{ }\NormalTok{ty))}
\NormalTok{ \})) }\OperatorTok{%>%}
\StringTok{ }\KeywordTok{mutate}\NormalTok{(}\DataTypeTok{mod_glance =} \KeywordTok{map}\NormalTok{(mod, broom}\OperatorTok{::}\NormalTok{glance),}
\DataTypeTok{mod_tidy =} \KeywordTok{map}\NormalTok{(mod, broom}\OperatorTok{::}\NormalTok{tidy))}
\end{Highlighting}
\end{Shaded}
Now extract parts of model. E.g. Standard errors of the regression:
\begin{Shaded}
\begin{Highlighting}[]
\NormalTok{all_interact }\OperatorTok{%>%}
\StringTok{ }\KeywordTok{mutate}\NormalTok{(}\DataTypeTok{sigma =} \KeywordTok{map_dbl}\NormalTok{(mod_glance, }\ControlFlowTok{function}\NormalTok{(x) x}\OperatorTok{$}\NormalTok{sigma)) }\OperatorTok{%>%}
\StringTok{ }\KeywordTok{select}\NormalTok{(year, Type, sigma)}
\end{Highlighting}
\end{Shaded}
\begin{verbatim}
## # A tibble: 8 × 3
## year Type sigma
## <dbl> <chr> <dbl>
## 1 1881 Metropolitan 9.886436
## 2 1891 Metropolitan 24.790240
## 3 1881 Mixed 16.437527
## 4 1891 Mixed 17.403411
## 5 1881 Rural 13.801753
## 6 1891 Rural 17.078948
## 7 1881 Urban 19.523919
## 8 1891 Urban 25.557318
\end{verbatim}
\subsection{Weighted Regression}\label{weighted-regression}
\begin{enumerate}
\def\labelenumi{\arabic{enumi}.}
\tightlist
\item
Run \emph{M2} and \emph{M3} as weighted regressions, weighted by the
population (\texttt{Popn}) and interpret the coefficients on
\texttt{outratiodiff} and interactions. Informally assess the extent
to which the coefficients are different. Which one does it seem to
affect more?
\item
What are some rationales for weighting by population? See the
discussion in Solon, Haider, and Wooldridge (2013) and Angrist and
Pischke (2014).
\end{enumerate}
\subsection{Cross-Validation}\label{cross-validation}
When using regression for causal inference, model specification and
choice should largely be based on avoiding omitted variables. Another
criteria for selecting models is to use their fit to the data. But a
model's fit to data should not be assessed using only the in-sample
data. That leads to overfitting---and the best model would always be to
include an indicator variable for every observation Instead, a model's
fit to data can be assessed by using its out-of-sample fit. One way to
estimate the \emph{expected} fit of a model to \emph{new} data is
cross-validation.
We want to compare the predictive performance of the following models
\begin{Shaded}
\begin{Highlighting}[]
\NormalTok{mod_formulas <-}\StringTok{ }
\StringTok{ }\KeywordTok{list}\NormalTok{(}
\DataTypeTok{m0 =}\NormalTok{ paupratiodiff }\OperatorTok{~}\StringTok{ }\DecValTok{1}\NormalTok{,}
\DataTypeTok{m1 =}\NormalTok{ paupratiodiff }\OperatorTok{~}\StringTok{ }\NormalTok{year }\OperatorTok{+}\StringTok{ }\NormalTok{Type, }
\DataTypeTok{m2 =}\NormalTok{ paupratiodiff }\OperatorTok{~}\StringTok{ }\NormalTok{outratiodiff }\OperatorTok{+}\StringTok{ }\NormalTok{year }\OperatorTok{+}\StringTok{ }\NormalTok{Type,}
\DataTypeTok{m3 =}\NormalTok{ paupratiodiff }\OperatorTok{~}\StringTok{ }\NormalTok{outratiodiff }\OperatorTok{+}\StringTok{ }\NormalTok{(popratiodiff }\OperatorTok{+}\StringTok{ }\NormalTok{oldratiodiff) }\OperatorTok{*}\StringTok{ }\NormalTok{(year }\OperatorTok{+}\StringTok{ }\NormalTok{Type),}
\DataTypeTok{m4 =} \OperatorTok{-}\DecValTok{1} \OperatorTok{+}\StringTok{ }\NormalTok{paupratiodiff }\OperatorTok{~}\StringTok{ }\NormalTok{(outratiodiff }\OperatorTok{+}\StringTok{ }\NormalTok{popratiodiff }\OperatorTok{+}\StringTok{ }\NormalTok{oldratiodiff) }\OperatorTok{*}\StringTok{ }\NormalTok{(year }\OperatorTok{+}\StringTok{ }\NormalTok{Type),}
\DataTypeTok{m5 =}\NormalTok{ paupratiodiff }\OperatorTok{~}\StringTok{ }\NormalTok{(outratiodiff }\OperatorTok{+}\StringTok{ }\NormalTok{popratiodiff }\OperatorTok{+}\StringTok{ }\NormalTok{oldratiodiff) }\OperatorTok{*}\StringTok{ }\NormalTok{year }\OperatorTok{*}\StringTok{ }\NormalTok{Type}
\NormalTok{ )}
\end{Highlighting}
\end{Shaded}
Let's split the data into 10 (train/test) folds for cross-validation,
\begin{Shaded}
\begin{Highlighting}[]
\NormalTok{pauperism_nonmiss <-}\StringTok{ }
\StringTok{ }\NormalTok{pauperism }\OperatorTok{%>%}
\StringTok{ }\KeywordTok{filter}\NormalTok{(year }\OperatorTok{%in%}\StringTok{ }\KeywordTok{c}\NormalTok{(}\DecValTok{1881}\NormalTok{, }\DecValTok{1891}\NormalTok{)) }\OperatorTok{%>%}
\StringTok{ }\KeywordTok{select}\NormalTok{(paupratiodiff, outratiodiff, popratiodiff, oldratiodiff, year, Type, Region, ID) }\OperatorTok{%>%}
\StringTok{ }\NormalTok{tidyr}\OperatorTok{::}\KeywordTok{drop_na}\NormalTok{()}
\NormalTok{pauperism_10folds <-}
\StringTok{ }\NormalTok{pauperism_nonmiss }\OperatorTok{%>%}
\StringTok{ }\NormalTok{resamplr}\OperatorTok{::}\KeywordTok{crossv_kfold}\NormalTok{(}\DecValTok{10}\NormalTok{)}
\end{Highlighting}
\end{Shaded}
For each model formula \texttt{f}, training data set \texttt{train}, and
test data set, \texttt{test}, run the model specified by \texttt{f} on
\texttt{train}, and predict new observations in \texttt{test}, and
calculate the RMSE from the residuals
\begin{Shaded}
\begin{Highlighting}[]
\NormalTok{mod_rmse_fold <-}\StringTok{ }\ControlFlowTok{function}\NormalTok{(f, train, test) \{}
\NormalTok{ fit <-}\StringTok{ }\KeywordTok{lm}\NormalTok{(f, }\DataTypeTok{data =} \KeywordTok{as.data.frame}\NormalTok{(train))}
\NormalTok{ test_data <-}\StringTok{ }\KeywordTok{as.data.frame}\NormalTok{(test)}
\NormalTok{ err <-}\StringTok{ }\NormalTok{test_data}\OperatorTok{$}\NormalTok{paupratiodiff }\OperatorTok{-}\StringTok{ }\KeywordTok{predict}\NormalTok{(fit, }\DataTypeTok{newdata =}\NormalTok{ test_data)}
\KeywordTok{sqrt}\NormalTok{(}\KeywordTok{mean}\NormalTok{(err }\OperatorTok{^}\StringTok{ }\DecValTok{2}\NormalTok{))}
\NormalTok{\}}
\end{Highlighting}
\end{Shaded}
E.g. for one fold and formula,
\begin{Shaded}
\begin{Highlighting}[]
\KeywordTok{mod_rmse_fold}\NormalTok{(mod_formulas[[}\DecValTok{1}\NormalTok{]], pauperism_10folds}\OperatorTok{$}\NormalTok{train[[}\DecValTok{1}\NormalTok{]],}
\NormalTok{ pauperism_10folds}\OperatorTok{$}\NormalTok{test[[}\DecValTok{1}\NormalTok{]])}
\end{Highlighting}
\end{Shaded}
\begin{verbatim}
## [1] 19.5354
\end{verbatim}
Now write a function that will calculate the average RMSE across folds
for a formula and a cross-validation data frame with \texttt{train} and
\texttt{test} list-columns:
\begin{Shaded}
\begin{Highlighting}[]
\NormalTok{mod_rmse <-}\StringTok{ }\ControlFlowTok{function}\NormalTok{(f, data) \{}
\KeywordTok{map2_dbl}\NormalTok{(data}\OperatorTok{$}\NormalTok{train, data}\OperatorTok{$}\NormalTok{test, }
\ControlFlowTok{function}\NormalTok{(train, test) \{}
\KeywordTok{mod_rmse_fold}\NormalTok{(f, train, test)}
\NormalTok{ \}) }\OperatorTok{%>%}
\StringTok{ }\KeywordTok{mean}\NormalTok{()}
\NormalTok{\}}
\end{Highlighting}
\end{Shaded}
\begin{Shaded}
\begin{Highlighting}[]
\KeywordTok{mod_rmse}\NormalTok{(mod_formulas[[}\DecValTok{1}\NormalTok{]], pauperism_10folds)}
\end{Highlighting}
\end{Shaded}
\begin{verbatim}
## [1] 24.05803
\end{verbatim}
Finall, we want to run \texttt{mod\_rmse} for each formula in
\texttt{mod\_formulas}. It will be easiest to store this in a data
frame:
\begin{Shaded}
\begin{Highlighting}[]
\NormalTok{cv_results <-}\StringTok{ }\KeywordTok{tibble}\NormalTok{(}
\DataTypeTok{model_formula =}\NormalTok{ mod_formulas,}
\DataTypeTok{.id =} \KeywordTok{names}\NormalTok{(mod_formulas),}
\CommentTok{# Formula as a string}
\DataTypeTok{.name =} \KeywordTok{map}\NormalTok{(model_formula,}
\ControlFlowTok{function}\NormalTok{(x) }\KeywordTok{gsub}\NormalTok{(}\StringTok{" +"}\NormalTok{, }\StringTok{" "}\NormalTok{, }\KeywordTok{paste0}\NormalTok{(}\KeywordTok{deparse}\NormalTok{(x), }\DataTypeTok{collapse =} \StringTok{""}\NormalTok{)))}
\NormalTok{)}
\end{Highlighting}
\end{Shaded}
Use \texttt{map} to run \texttt{mod\_rmse} for each model and save it as
a list frame in the data frame,
\begin{Shaded}
\begin{Highlighting}[]
\NormalTok{cv_results <-}
\StringTok{ }\KeywordTok{mutate}\NormalTok{(cv_results,}
\DataTypeTok{cv10_rmse =} \KeywordTok{map}\NormalTok{(model_formula, mod_rmse, }\DataTypeTok{data =}\NormalTok{ pauperism_10folds))}
\end{Highlighting}
\end{Shaded}
In the case of linear regression, the MSE of the Leave-one-out
(\(n\)-fold) cross-validation can be analytically calculated without
having to run \(n\) regressions.
\begin{Shaded}
\begin{Highlighting}[]
\NormalTok{loocv <-}\StringTok{ }\ControlFlowTok{function}\NormalTok{(x) \{}
\KeywordTok{mean}\NormalTok{((}\KeywordTok{residuals}\NormalTok{(x) }\OperatorTok{/}\StringTok{ }\NormalTok{(}\DecValTok{1} \OperatorTok{-}\StringTok{ }\KeywordTok{hatvalues}\NormalTok{(x))) }\OperatorTok{^}\StringTok{ }\DecValTok{2}\NormalTok{)}
\NormalTok{\}}
\end{Highlighting}
\end{Shaded}
We
\begin{Shaded}
\begin{Highlighting}[]
\NormalTok{cv_results <-}\StringTok{ }
\StringTok{ }\KeywordTok{mutate}\NormalTok{(cv_results, }
\DataTypeTok{rmse_loo =} \KeywordTok{map}\NormalTok{(mod_formulas, }\ControlFlowTok{function}\NormalTok{(f) }\KeywordTok{sqrt}\NormalTok{(}\KeywordTok{loocv}\NormalTok{(}\KeywordTok{lm}\NormalTok{(f, }\DataTypeTok{data =}\NormalTok{ pauperism_nonmiss)))))}
\end{Highlighting}
\end{Shaded}
\begin{enumerate}
\def\labelenumi{\arabic{enumi}.}
\tightlist
\item
In the 10-fold cross validation, which model has the best out of
sample prediction?
\item
Using the LOO-CV cross-validation, which model has the best
\item
Does the prediction metric (RMSE) and prediction task---predicting
individual PLUs from other PLUs---make sense? Can you think of others
that you would prefer?
\end{enumerate}
\subsection{Bootstrapping}\label{bootstrapping}
Estimate the 95\% confidence intervals of model with simple
non-parametric bootstrapped standard errors. The non-parametric
bootstrap works as follows:
Let \(\hat\theta\) be the estimate of a statistic. To calculate
bootstrapped standard errors and confidence intervals use the following
procedure.
For samples \(b = 1, ..., B\).
\begin{enumerate}
\def\labelenumi{\arabic{enumi}.}
\tightlist
\item
Draw a sample with replacement from the data
\item
Estimate the statistic of interest and call it \(\theta_b^*\).
\end{enumerate}
Let \(\theta^* = \{\theta_1^*, \dots, \theta_B^*\}\) be the set of
bootstrapped statistics.
\begin{itemize}
\item
standard error: \(\hat\theta\) is \(\sd(\theta^*)\).
\item
confidence interval:
\begin{itemize}
\tightlist
\item
normal approximation. This calculates the confidence interval as
usual but uses the bootstrapped standard error instead of the
classical OLS standard error:
\(\hat\theta \pm t_{\alpha/2,df} \cdot \sd(\theta^*)\)
\item
quantiles: A 95\% confidence interval uses the 2.5\% and 97.5\%
quantiles of \(\theta^*\) for its upper and lower bounds.
\end{itemize}
\end{itemize}
Original model
\begin{Shaded}
\begin{Highlighting}[]
\NormalTok{mod_formula <-}\StringTok{ }\NormalTok{paupratiodiff }\OperatorTok{~}\StringTok{ }\NormalTok{outratiodiff }\OperatorTok{+}\StringTok{ }\NormalTok{(popratiodiff }\OperatorTok{+}\StringTok{ }\NormalTok{oldratiodiff) }\OperatorTok{*}\StringTok{ }\NormalTok{year }\OperatorTok{*}\StringTok{ }\NormalTok{Type}
\NormalTok{mod_orig <-}\StringTok{ }\KeywordTok{lm}\NormalTok{(mod_formula, }\DataTypeTok{data =}\NormalTok{ pauperism_nonmiss)}
\end{Highlighting}
\end{Shaded}
\begin{Shaded}
\begin{Highlighting}[]
\NormalTok{bs_coef_se <-}
\StringTok{ }\NormalTok{resamplr}\OperatorTok{::}\KeywordTok{bootstrap}\NormalTok{(pauperism_nonmiss, }\DecValTok{1024}\NormalTok{) }\OperatorTok{%>%}
\StringTok{ }\CommentTok{# extract the strap column}
\StringTok{ `}\DataTypeTok{[[}\StringTok{`}\NormalTok{(}\StringTok{"sample"}\NormalTok{) }\OperatorTok{%>%}
\StringTok{ }\CommentTok{# run }
\StringTok{ }\KeywordTok{map_df}\NormalTok{(}\ControlFlowTok{function}\NormalTok{(dat) \{}
\KeywordTok{lm}\NormalTok{(mod_formula, }\DataTypeTok{data =}\NormalTok{ dat) }\OperatorTok{%>%}
\StringTok{ }\NormalTok{broom}\OperatorTok{::}\KeywordTok{tidy}\NormalTok{() }\OperatorTok{%>%}
\StringTok{ }\KeywordTok{select}\NormalTok{(term, estimate)}
\NormalTok{ \}) }\OperatorTok{%>%}
\StringTok{ }\CommentTok{# calculate 2.5%, 97.5% and sd of estimates}
\StringTok{ }\KeywordTok{group_by}\NormalTok{(term) }\OperatorTok{%>%}
\StringTok{ }\KeywordTok{summarise}\NormalTok{(}
\DataTypeTok{std.error_bs =} \KeywordTok{sd}\NormalTok{(estimate),}
\DataTypeTok{conf.low_bsq =} \KeywordTok{quantile}\NormalTok{(estimate, }\FloatTok{0.025}\NormalTok{),}
\DataTypeTok{conf.low_bsq =} \KeywordTok{quantile}\NormalTok{(estimate, }\FloatTok{0.975}\NormalTok{)}
\NormalTok{ )}
\end{Highlighting}
\end{Shaded}
Now compare the std.error of the original and the bootstrap for
\texttt{outratiodiff}
\begin{Shaded}
\begin{Highlighting}[]
\NormalTok{broom}\OperatorTok{::}\KeywordTok{tidy}\NormalTok{(mod_orig, }\DataTypeTok{conf.int =} \OtherTok{TRUE}\NormalTok{) }\OperatorTok{%>%}
\StringTok{ }\KeywordTok{select}\NormalTok{(term, estimate, std.error) }\OperatorTok{%>%}
\StringTok{ }\KeywordTok{filter}\NormalTok{(term }\OperatorTok{==}\StringTok{ "outratiodiff"}\NormalTok{) }\OperatorTok{%>%}
\StringTok{ }\KeywordTok{left_join}\NormalTok{(bs_coef_se, }\DataTypeTok{by =} \StringTok{"term"}\NormalTok{)}
\end{Highlighting}
\end{Shaded}
\begin{verbatim}
## term estimate std.error std.error_bs conf.low_bsq
## 1 outratiodiff 0.2274375 0.01433042 0.0188327 0.2685325
\end{verbatim}
The bootstrap standard error is slightly higher. It is similar to the
standard error generated using the heteroskedasticity consistent
standard error.
\begin{Shaded}
\begin{Highlighting}[]
\KeywordTok{sqrt}\NormalTok{(sandwich}\OperatorTok{::}\KeywordTok{vcovHC}\NormalTok{(mod_orig)[}\StringTok{"outratiodiff"}\NormalTok{, }\StringTok{"outratiodiff"}\NormalTok{])}
\end{Highlighting}
\end{Shaded}
\begin{verbatim}
## [1] 0.01985823
\end{verbatim}
It is likely that there is correlation between the error terms of
observations. At the very least, each PLU is included twice; these
observations are likely correlated, so we are effectively overstating
the sample size of our data. One way to account for that is to resample
``PLUs'', not PLU-years. This cluster-bootstrap will resample each PLU
(and all its observations), rather than resampling the observations
themselves.
\begin{Shaded}
\begin{Highlighting}[]
\NormalTok{pauperism_nonmiss }\OperatorTok{%>%}
\StringTok{ }\KeywordTok{group_by}\NormalTok{(ID) }\OperatorTok{%>%}
\StringTok{ }\NormalTok{resamplr}\OperatorTok{::}\KeywordTok{bootstrap}\NormalTok{(}\DecValTok{1024}\NormalTok{) }\OperatorTok{%>%}
\StringTok{ }\CommentTok{# extract the strap column}
\StringTok{ `}\DataTypeTok{[[}\StringTok{`}\NormalTok{(}\StringTok{"sample"}\NormalTok{) }\OperatorTok{%>%}
\StringTok{ }\CommentTok{# run }
\StringTok{ }\KeywordTok{map_df}\NormalTok{(}\ControlFlowTok{function}\NormalTok{(dat) \{}
\KeywordTok{lm}\NormalTok{(mod_formula, }\DataTypeTok{data =}\NormalTok{ dat) }\OperatorTok{%>%}
\StringTok{ }\NormalTok{broom}\OperatorTok{::}\KeywordTok{tidy}\NormalTok{() }\OperatorTok{%>%}
\StringTok{ }\KeywordTok{select}\NormalTok{(term, estimate)}
\NormalTok{ \}) }\OperatorTok{%>%}
\StringTok{ }\CommentTok{# calculate 2.5%, 97.5% and sd of estimates}
\StringTok{ }\KeywordTok{group_by}\NormalTok{(term) }\OperatorTok{%>%}
\StringTok{ }\KeywordTok{summarise}\NormalTok{(}
\DataTypeTok{std.error_bs =} \KeywordTok{sd}\NormalTok{(estimate),}
\DataTypeTok{conf.low_bsq =} \KeywordTok{quantile}\NormalTok{(estimate, }\FloatTok{0.025}\NormalTok{),}
\DataTypeTok{conf.low_bsq =} \KeywordTok{quantile}\NormalTok{(estimate, }\FloatTok{0.975}\NormalTok{)}
\NormalTok{ ) }\OperatorTok{%>%}
\StringTok{ }\KeywordTok{filter}\NormalTok{(term }\OperatorTok{==}\StringTok{ "outratiodiff"}\NormalTok{)}
\end{Highlighting}
\end{Shaded}
\begin{verbatim}
## # A tibble: 1 × 3
## term std.error_bs conf.low_bsq
## <chr> <dbl> <dbl>
## 1 outratiodiff 0.01808499 0.2654708
\end{verbatim}
However, this yields a standard error not much different than the Robust
standard error.
\begin{enumerate}
\def\labelenumi{\arabic{enumi}.}
\tightlist
\item
Try bootstrapping ``Region'' and ``BoothGroup''. Do either of these
make much difference in the standard errors.
\end{enumerate}
\subsection*{References}\label{references}
\addcontentsline{toc}{subsection}{References}
\hypertarget{refs}{}
\hypertarget{ref-AngristPischke2014a}{}
Angrist, Joshua D., and Jörn-Steffen Pischke. 2014. \emph{Mastering
`Metrics}. Princeton UP.
\hypertarget{ref-AronowSamii2015a}{}
Aronow, Peter M., and Cyrus Samii. 2015. ``Does Regression Produce
Representative Estimates of Causal Effects?'' \emph{American Journal of
Political Science} 60 (1). Wiley-Blackwell: 250--67.
doi:\href{https://doi.org/10.1111/ajps.12185}{10.1111/ajps.12185}.
\hypertarget{ref-Freedman_1997}{}
Freedman, David. 1997. ``From Association to Causation via Regression.''
\emph{Advances in Applied Mathematics} 18 (1). Elsevier BV: 59--110.
doi:\href{https://doi.org/10.1006/aama.1996.0501}{10.1006/aama.1996.0501}.
\hypertarget{ref-Gross2014a}{}
Gross, Justin H. 2014. ``Testing What Matters (If You Must Test at All):
A Context-Driven Approach to Substantive and Statistical Significance.''
\emph{American Journal of Political Science} 59 (3). Wiley-Blackwell:
775--88.
doi:\href{https://doi.org/10.1111/ajps.12149}{10.1111/ajps.12149}.
\hypertarget{ref-McCaskeyRainey2015a}{}
McCaskey, Kelly, and Carlisle Rainey. 2015. ``Substantive Importance and
the Veil of Statistical Significance.'' \emph{Statistics, Politics and
Policy} 6 (1-2). Walter de Gruyter GmbH.
doi:\href{https://doi.org/10.1515/spp-2015-0001}{10.1515/spp-2015-0001}.
\hypertarget{ref-NunnWantchekon2011a}{}
Nunn, Nathan, and Leonard Wantchekon. 2011. ``The Slave Trade and the
Origins of Mistrust in Africa.'' \emph{American Economic Review} 101
(7): 3221--52.
doi:\href{https://doi.org/10.1257/aer.101.7.3221}{10.1257/aer.101.7.3221}.
\hypertarget{ref-Oster2016a}{}
Oster, Emily. 2016. ``Unobservable Selection and Coefficient Stability:
Theory and Evidence.'' \emph{Journal of Business \& Economic
Statistics}, September. Informa UK Limited, 0--0.
doi:\href{https://doi.org/10.1080/07350015.2016.1227711}{10.1080/07350015.2016.1227711}.
\hypertarget{ref-Plewis2015a}{}
Plewis, Ian. 2015. ``Census and Poor Law Union Data, 1871-1891.'' SN
7822. UK Data Service; data collection.
doi:\href{https://doi.org/10.5255/UKDA-SN-7822-1}{10.5255/UKDA-SN-7822-1}.
\hypertarget{ref-Plewis2017a}{}
---------. 2017. ``Multiple Regression, Longitudinal Data and Welfare in
the 19th Century: Reflections on Yule (1899).'' \emph{Journal of the
Royal Statistical Society: Series A (Statistics in Society)}, February.
Wiley-Blackwell.
doi:\href{https://doi.org/10.1111/rssa.12272}{10.1111/rssa.12272}.
\hypertarget{ref-SolonHaiderWooldridge2013a}{}
Solon, Gary, Steven Haider, and Jeffrey Wooldridge. 2013. ``What Are We
Weighting for?'' National Bureau of Economic Research.
doi:\href{https://doi.org/10.3386/w18859}{10.3386/w18859}.
\hypertarget{ref-Stigler1990a}{}
Stigler, Stephen M. 1990. \emph{The History of Statistics: The
Measurement of Uncertainty Before 1900}. HARVARD UNIV PR.
\url{http://www.ebook.de/de/product/3239165/stephen_m_stigler_the_history_of_statistics_the_measurement_of_uncertainty_before_1900.html}.
\hypertarget{ref-Stigler2016a}{}
---------. 2016. \emph{The Seven Pillars of Statistical Wisdom}. Harvard
University Press.
\url{http://www.ebook.de/de/product/25237216/stephen_m_stigler_the_seven_pillars_of_statistical_wisdom.html}.
\hypertarget{ref-Yule1899a}{}
Yule, G. Udny. 1899. ``An Investigation into the Causes of Changes in
Pauperism in England, Chiefly During the Last Two Intercensal Decades
(Part I.).'' \emph{Journal of the Royal Statistical Society} 62 (2).
JSTOR: 249. doi:\href{https://doi.org/10.2307/2979889}{10.2307/2979889}.
\hypertarget{ref-Zeileis2004a}{}
Zeileis, Achim. 2004. ``Econometric Computing with Hc and Hac Covariance
Matrix Estimators.'' \emph{Journal of Statistical Software} 11 (1):
1--17.
doi:\href{https://doi.org/10.18637/jss.v011.i10}{10.18637/jss.v011.i10}.
\end{document}