forked from vynguyen92/publish_nhanes_data_1988_2018
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathf - include_column_for_harmonized_codenames.R
executable file
·63 lines (51 loc) · 3.04 KB
/
f - include_column_for_harmonized_codenames.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
########################### FUNCTION TO INCLUDE A COLUMN FOR HARMONIZED CODENAMES ############################
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# Purpose: This function is to include a column with the harmonized codenames for ease of extraction.
#
# Inputs: vector_original_codenames - vector of the column names containing the original codenames
# vector_new_correcting_codenames - vector of the new column names containined the corrected codenames
# df_cleaning_doc - dataframe containing the cleaning documentations
#
# Outputs: df_cleaning_doc - dataframe of the cleaning documentation with a new column of harmonized codename
include_column_for_harmonized_codenames <- function(vector_original_codenames
, vector_new_correcting_codenames
, df_cleaning_doc)
{
# Determine the number of codename types
num_correcting_codenames <- length(vector_original_codenames)
# For the variable codenames, make a new column to include the harmonized codenames
for(j in seq(num_correcting_codenames))
{
# Extract a codename pertaining to the original codenames
original_codename_j <- vector_original_codenames[j]
# print(original_codename_j)
# Extract a new column name
new_correcting_codename_j <- vector_new_correcting_codenames[j]
# print(new_correcting_codename_j)
# Make a temporary vector with the codenames of the original
temp_codenames_to_use <- df_cleaning_doc[,original_codename_j] %>%
unlist(., use.names = FALSE)
# print(unique(temp_codenames_to_use))
# Define a string to contain the name of the new column showing the corrections
corrected_codename_j <- paste("corrected"
, original_codename_j
, sep = "_")
# Extract the vector containing the corrected codenames
codenames_corrected_j <- df_cleaning_doc[,corrected_codename_j] %>%
unlist(., use.names = FALSE)
# print(unique(codenames_corrected_j))
# Extract only the corrected codenames that is not missing
index_corrected_codename <- which(!is.na(codenames_corrected_j))
# print(index_corrected_codename)
# Ensure that the corrected codenames replace the original codenames for those that were harmonized
# print(unique(temp_codenames_to_use[index_corrected_codename]))
temp_codenames_to_use[index_corrected_codename] <- codenames_corrected_j[index_corrected_codename]
# print(unique(temp_codenames_to_use[index_corrected_codename]))
# Make a new column with the harmonized codenames
df_cleaning_doc[,new_correcting_codename_j] <- temp_codenames_to_use
}
# Deem the dataset as updated with the new column of harmonized codenames
df_cleaning_doc_updated <- df_cleaning_doc
return(df_cleaning_doc_updated)
}