-
Notifications
You must be signed in to change notification settings - Fork 4
/
Fitting_Levy_StandardErrors.R
126 lines (91 loc) · 6.96 KB
/
Fitting_Levy_StandardErrors.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# Function to do Levy alpha stable parameter error estimates using bootstraps.
# TODO: File Fitting_Levy_Boot.R should be merged into this one.
# - They duplicate much code.
# - This one is nicer and returns a data frame, not a list like Fitting_Levy_Boot.R
############ 0. Basic Set up ############
## 0.1. loading of required libraries
if (!'pacman' %in% installed.packages()[,'Package']) install.packages('pacman', repos='http://cran.r-project.org')
pacman::p_load(boot,dplyr,StableEstim,devtools)
## 0.2 loading fitting functions from fitting.levy package
devtools::load_all("fittinglevy")
## 0.3 remaining function definitions:
# the fuction for the fitting result
fun_fit_levy <- function(dat, bin_num, cond_ind, var_ind, c_names, cut_num, neg_cut, pov_cut) { # the function takes 8 arguments: 1) data generated and cleaned in section 0.2, 2) the number of bins, 3) the index of the variable that is used for the conditional class, 4) the target variable name, 5) the name of class, 6) the minimum number of observations for each class, 7) the cutting point on the left tail, 8) the cutting point on the right tail
results_df <- data.frame(Fit_Variable=character(), Separator_Variable=character(), Class=character(), class_idx=integer(), Country=character(), country_idx=integer(), Observations=integer(), Levy_alpha=double(), Levy_beta=double(), Levy_gamma=double(), Levy_delta=double(), Levy_alpha_Standard_Error=double(), Levy_beta_Standard_Error=double(), Levy_gamma_Standard_Error=double(), Levy_delta_Standard_Error=double(), stringsAsFactors=FALSE)
c_uni_list <- list()
c_uni_num_list <- list()
for (k in 1:length(dat)) {
print(k)
zz <- dat[[k]] %>%
select(IDNR, Year, COMPCAT, NACE_CAT, LP, LP_diff, EMPL) %>% # Firm ID, Year, Firm Size, Industry ID, Labor Produtivity, Labor Productivity Change, Employment
filter(EMPL > 1) %>% # remove self-employed persons
mutate(LP = LP / 1000) %>% # change the unit scale of the labor productivity by dividing it by 1000
mutate(LP_diff = LP_diff / 1000) # percentage unit for the growth variables
zz <- as.data.frame(zz)
zz$Cond <- zz[, cond_ind] # create a new column of the class variable
zz$Var <- zz[, var_ind] # create a new column of the value variable
zz <- zz %>%
select(IDNR, Year, Var, Cond) %>%
na.omit() %>%
filter(Var > quantile(Var, neg_cut) & Var < quantile(Var, pov_cut)) %>% # cut the tail
group_by(Cond) %>%
filter(length(IDNR) > cut_num) # set the minimum number of obs for each class
if (nrow(zz) != 0) {
c_uni <- unique(zz$Cond) # unique class
c_uni_name <- c()
c_uni_num <- c()
for (i in 1:length(c_uni)) {
c_uni_num[i] <- which(c_names %in% c_uni[i])
}
c_uni_num <- sort(c_uni_num)
c_uni_name <- c_names[c_uni_num] # record the class names
c_list <- list()
for (c in 1:length(c_uni_name)) {
print(paste(length(c_uni), c))
c_lp <- zz$Var[zz$Cond == c_uni_name[c]] # for each class
levy_result <- levy_fitting(dat_t = c_lp, bin_num = bin_num, include_standarderror=TRUE, include_Soofi=FALSE, fitting_method="GMM") # Levy estimation
results_df[nrow(results_df)+1,] = list(var_ind, cond_ind, c_uni_name[[c]], c, country_names[[k]], k, length(c_lp), levy_result$levy_para[[1]], levy_result$levy_para[[2]], levy_result$levy_para[[3]], levy_result$levy_para[[4]], levy_result$standard_errors[[1]], levy_result$standard_errors[[2]], levy_result$standard_errors[[3]], levy_result$standard_errors[[4]])
print(results_df)
#results_df <- data.frame(Fit_Variable=character(), Separator_Variable=character(), Class=character(), class_idx=integer(), Country=character(), country_idx=integer(), Observations=integer(), Levy_alpha=double(), Levy_beta=double(), Levy_gamma=double(), Levy_delta=double(), Levy_alpha_Standard_Error=double(), Levy_beta_Standard_Error=double(), Levy_gamma_Standard_Error=double(), Levy_delta_Standard_Error=double(), stringsAsFactors=FALSE)
#c_list[[c]] <- list(levy_para = levy_result$levy_para, levy_soofi = levy_result$levy_soofi, est_levy_std_error = levy_result$est_levy_std_error, data_mid = levy_result$data_mid , data_p = levy_result$data_p, levy_q = levy_result$levy_q)
}
# c_uni_list[[k]] <- c_uni_name # record the ordered name of unique class
#c_uni_list_2[[k]] <- c_uni_name_2 #
# c_uni_num_list[[k]] <- c_uni_num # record the ordered numeric name of unique class
# result_list[[k]] <- c_list # record the result from "fun_info_gen"
}
}
# all_list <- list(result_list, c_uni_list, c_uni_num_list)
return(results_df)
}
# main entry point
## 1.1. loading of required data and cleaning
#load("All_list_Cleaned_cut.Rda") ## load the data file created from "Productivity_Analysis_Data.Rmd"
load("All_list_Cleaned.Rda", verbose=T) ## load the data file created from "Productivity_Analysis_Data.Rmd"
#load("Labels.Rda")
############ 2. Fit the Levy distribution ############
## note the following index
# con_ind: 2: year, 3: size, 4: industry
# var_ind: 5: LP, 6: LP_change, 7: TFP growth
## set up the cut-off point
neg_cut <- 0.0025 # negative cut-off point
pov_cut <- 0.9975 # positive cut-off point
year_names <- 2006:2015
## Year class
# LP conditional on year (year class)
LP_year_Levy_GMM_df_SE <- fun_fit_levy(dat = All_list_Cleaned, bin_num = 100, cond_ind = "Year", var_ind = "LP", c_names = year_names, cut_num = 10000, neg_cut = neg_cut, pov_cut = pov_cut)
# LP_change conditional on year
LP_Change_year_Levy_GMM_df_SE <- fun_fit_levy(dat = All_list_Cleaned, bin_num = 100, cond_ind = "Year", var_ind = "LP_diff", c_names = year_names, cut_num = 10000, neg_cut = neg_cut, pov_cut = pov_cut)
save(LP_year_Levy_GMM_df_SE, LP_Change_year_Levy_GMM_df_SE, file = "Year_Levy_GMM_df_SE.Rda")
## Size class
# LP conditional on size
LP_size_Levy_GMM_df_SE <- fun_fit_levy(dat = All_list_Cleaned, bin_num = 100, cond_ind = "COMPCAT", var_ind = "LP", c_names = size_names_long, cut_num = 5000, neg_cut = neg_cut, pov_cut = pov_cut)
# LP_change conditional on size
LP_Change_size_Levy_GMM_df_SE <- fun_fit_levy(dat = All_list_Cleaned, bin_num = 100, cond_ind = "COMPCAT", var_ind = "LP_diff", c_names = size_names_long, cut_num = 5000, neg_cut = neg_cut, pov_cut = pov_cut)
save(LP_size_Levy_GMM_df_SE, LP_Change_size_Levy_GMM_df_SE, file = "Size_Levy_GMM_df_SE.Rda")
## Industry class
# LP conditional on sector
LP_ind_Levy_GMM_df_SE <- fun_fit_levy(dat = All_list_Cleaned, bin_num = 100, cond_ind = "NACE_CAT", var_ind = "LP", c_names = ind_name_table$ind_names, cut_num = 1000, neg_cut = neg_cut, pov_cut = pov_cut)
# LP_change conditional on sector
LP_Change_ind_Levy_GMM_df_SE <- fun_fit_levy(dat = All_list_Cleaned, bin_num = 100, cond_ind = "NACE_CAT", var_ind = "LP_diff", c_names = ind_name_table$ind_names, cut_num = 1000, neg_cut = neg_cut, pov_cut = pov_cut)
save(LP_ind_Levy_GMM_df_SE, LP_Change_ind_Levy_GMM_df_SE, file = "Industry_Levy_GMM_df_SE.Rda")