forked from vynguyen92/publish_nhanes_data_1988_2018
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathf - clean_dumb.R
executable file
·83 lines (79 loc) · 2.13 KB
/
f - clean_dumb.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
clean_duplicates_of_seqn_from_cycles <- function(list_unclean,
unclean_response)
{
library(tidyverse)
df_cycles <- unclean_response %>%
group_by(SEQN_new) %>%
summarise(SDDSRVYR = min(SDDSRVYR)) %>%
ungroup(.) %>%
mutate(keep = TRUE)
# View(df_cycles)
unclean_response <- unclean_response %>%
full_join(.
, df_cycles
, by = c("SEQN_new"
, "SDDSRVYR"))
# num_cycles <- length(list_unclean)
# # print(num_cycles)
#
# names_datasets <- names(list_unclean)
#
# unclean_response <- unclean_response #%>%
# # mutate(keep = rep(NA
# # , times = nrow(.)))
# for(i in c(3,4))
# {
#
# cycle_name_i <- names_datasets[i]
# print(cycle_name_i)
#
# cycle_i <- strsplit(cycle_name_i
# , split = " ") %>%
# unlist(.) %>%
# .[2] %>%
# as.numeric(.)
# print(cycle_i)
#
# subset_response <- list_unclean[[i]]
#
# df_SEQN_cycle <- subset_response %>%
# select(SEQN, SEQN_new, SDDSRVYR) %>%
# mutate(cycle = SDDSRVYR) %>%
# unique(.)
# # View(df_SEQN_cycle)
#
# unclean_response <- unclean_response %>%
# left_join(.
# , df_SEQN_cycle
# , by = c("SEQN"
# , "SEQN_new"
# , "SDDSRVYR"))
#
#
# index_keep <- which(unclean_response$SDDSRVYR == unclean_response$cycle)
# print(index_keep)
#
# unclean_response <- unclean_response %>%
# select(-cycle)
#
# # View(unclean_response %>%
# # select(SEQN, SEQN_new, SDDSRVYR, cycle, keep))
#
#
# # subset_response_i <- unclean_response %>%
# # filter(cycle == cycle_i)
# # View(subset_response_i)
#
# # if(i == 1)
# # {
# # df_cycle <- df_SEQN_cycle
# # } else {
# # df_cycle <- df_cycle %>%
# # full_join(.
# # , df_SEQN_cycle
# # , by = colnames(.))
# # }
# }
View(unclean_response %>%
select(SEQN, SEQN_new, SDDSRVYR, keep))
}