-
Notifications
You must be signed in to change notification settings - Fork 2
/
Template_Problem_2.R
73 lines (48 loc) · 1.41 KB
/
Template_Problem_2.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
library(ggplot2)
library(dplyr)
# a) Download and read the data
pop <- read.csv('http://econometrics2018.s3-website.eu-central-1.amazonaws.com/data/cex16pop.csv')
# a) Calculate the population mean for male and female
popTaxesMale <-
popTaxesFemale <-
popMeanMale <- mean(???)
popMeanFemale <- mean(???)
# b) Boxplots for the distribution of taxes by sex
ggplot(data = pop, aes(y = ???, x = ???)) +
geom_boxplot() +
ylim(c(-2000, 30000))
# c) # Select a sample
N <- nrow(pop)
## This time we need to sample rows from a table, not single values
sampledRows1 <- sample(1:N, size = 100, replace = TRUE)
sample1 <- pop[sampledRows1, ]
#d)
## Test hypothesis 1
t.test(taxes ~ sex, data = sample1)
t.test(taxes ~ sex, data = sample1)
## Test hypothesis 2
t.test(taxes ~ sex, data = sample1, alternative = )
## Test hypothesis 3
t.test(taxes ~ sex, data = sample1, alternative = )
## Test hypothesis 3
# e)
## R: number of samples
R <- 2000
## sample size
n <- 500
sampleRows <- sample(1:N, size = n * R, replace = TRUE)
samplesData <- pop[sampleRows, ]
samplesData <- within(samplesData, {
samplenIndex <- rep(1:R, each = n)
})
rejectH0 <- function(taxes, sex) {
testResult <- t.test(taxes ~ sex, alternative = 'two.sided')
return(testResult$p.value < 0.05)
}
samplesSummary <-
samplesData %>%
group_by(sampleIndex) %>%
summarise(
rejectedH0 = rejectH0(taxes, sex)
)
table(samplesSummary$rejectedH0)