forked from vynguyen92/publish_nhanes_data_1988_2018
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathf - exclude_extraneous_indicators.R
112 lines (79 loc) · 3.26 KB
/
f - exclude_extraneous_indicators.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
exclude_extraneous_indicators <- function(dataset_comments
, df_doc_cleaning)
{
subset_doc_cleaning <- df_doc_cleaning %>%
filter(is.na(comment_codename_use) == FALSE)
comments_codenames <- subset_doc_cleaning %>%
pull(comment_codename_use) %>%
unique(.)
num_unique_comments <- length(comments_codenames)
for(i in seq(num_unique_comments))
{
comment_codename_i <- comments_codenames[i]
# comment_codename_i <- "URDUCDLC"
chemical_codename_i <- subset_doc_cleaning %>%
filter(comment_codename_use == comment_codename_i) %>%
pull(variable_codename_use) %>%
unique(.)
# chemical_codename_i <- "URXUCD"
if(length(chemical_codename_i) > 1)
{
# print(comment_codename_i)
index_ending_la <- which(grepl("LA$|L$",chemical_codename_i) == TRUE)
chemical_codename_i <- chemical_codename_i[index_ending_la]
# print(chemical_codename_i)
}
subset_chemical_comment_i <- dataset_comments %>%
select(all_of(comment_codename_i)
, all_of(chemical_codename_i))
colnames(subset_chemical_comment_i)[1] <- "comments"
colnames(subset_chemical_comment_i)[2] <- "measurements"
subset_chemical_comment_i <- subset_chemical_comment_i %>%
mutate(missing = ifelse(is.na(measurements) == TRUE
, 1
, 0)) %>%
mutate(comments = ifelse(missing == 1
, NA
, comments))
# View(subset_chemical_comment_i %>% unique(.))
dataset_comments[,comment_codename_i] <- subset_chemical_comment_i$comments
num_comments <- sum(!is.na(dataset_comments[,comment_codename_i]))
# print(num_comments)
num_measurements <- sum(!is.na(dataset_comments[,chemical_codename_i]))
# print(num_measurements)
if(num_comments != num_measurements)
{
subset_nhanes_comments_chem_i <- dataset_comments %>%
select(all_of(comment_codename_i)
, all_of(chemical_codename_i)
, SDDSRVYR) %>%
unique(.) %>%
filter(is.na(eval(parse(text = comment_codename_i))) == TRUE) %>%
filter(is.na(eval(parse(text = chemical_codename_i))) == FALSE)
# View(subset_nhanes_comments_chem_i)
cycle_missing_comments <- subset_nhanes_comments_chem_i %>%
pull(SDDSRVYR) %>%
unique(.)
cycles_comments_doc_cleaning_i <- df_doc_cleaning %>%
filter(comment_codename_use == comment_codename_i) %>%
pull(SDDSRVYR) %>%
unique(.)
intersect_cycles <- intersect(cycle_missing_comments
, cycles_comments_doc_cleaning_i)
no_intersecting_cycles <- is_empty(intersect_cycles)
if(no_intersecting_cycles == FALSE)
{
print(comment_codename_i)
print(chemical_codename_i)
# print(num_comments)
# print(num_measurements)
#
print(cycle_missing_comments)
print(cycles_comments_doc_cleaning_i)
print(intersect_cycles)
}
}
}
dataset_cleaner <- dataset_comments
return(dataset_cleaner)
}