-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathget_indicator3_data.R
94 lines (66 loc) · 3.63 KB
/
get_indicator3_data.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
###
### This R function takes as input the output of the Kobo form "International Genetic Indicator testing"
### and reformat its in order to have the data in a dataframe useful for estimating
### Genetic Diversity Indicator 3 (the number of taxa with genetic monitoring)
### the function also creates new useful variables,
### like taxon name and if the taxon was assessed only a single time or multiple times
###
### If you use this script, please check https://github.com/AliciaMstt/GeneticIndicators
### for citation guidelines
get_indicator3_data<-function(kobo_output=kobo_output){
### Arguments:
# kobo_output = a data frame object read into R from the `.csv` file
# resulting from exporting the Kobotoolbox data from the form
# "International Genetic Indicator testing" wit the settings explaiend at
# https://github.com/AliciaMstt/GeneticIndicators
### Needed libraries:
# library(tidyr)
# library(dplyr)
# library(utile.tools)
# library(stringr)
###
### Function
###
### Get data
kobo_output<-kobo_output
### Separate data
# create a variable with the full taxon name if this variable doesn't exist already
# (raw kobo output doesn't include it, but it may exists in a "clean" version of the
# output if ran through the quality check pipeline)
if("taxon" %in% colnames(kobo_output)){
print("the data already contained a taxon column, that was used instead of creating a new one")
}else {
kobo_output<-kobo_output %>%
mutate(taxon=(utile.tools::paste(genus, species, subspecies_variety, na.rm=TRUE))) %>%
# remove white space at the end of the name
mutate(taxon=str_trim(taxon, "right"))
}
## Add a variable to the metadata stating if the taxon was assessed multiple times or only a single time
# object with duplicated taxa within a single country
# duplicated() is run twice, the second time with fromLast = TRUE so that
# the first occurrence is also accounted for, i.e. we can subset all records with the same taxon for a given country
kobo_output_duplicates <- kobo_output[which(duplicated(kobo_output[c('taxon', 'country_assessment')]) | duplicated(kobo_output[c('taxon', 'country_assessment')], fromLast = TRUE)), ]
# if it is a duplicate then tag it as multi_assessment, if it is not duplicated within the country then single
kobo_output <- kobo_output %>%
mutate(multiassessment= if_else(
X_uuid %in% kobo_output_duplicates$X_uuid, "multiassessment", "single_assessment"))
##### Process data already including taxon column
indicator3_data <- kobo_output %>%
# create variable with year in which assessment was done (based on date the form was compleated)
mutate(year_assesment=substr(end,1,4)) %>%
## select relevant columns
# taxon and assessment info
dplyr::select(country_assessment, taxonomic_group, taxon, scientific_authority,
genus, taxon, year_assesment, name_assessor, email_assessor,
# indicator 3 data (section 7 of the form)
gen_studies, temp_gen_monitoring, gen_monitoring_years, source_genetic_studies,
# kobo validation status
X_validation_status,
# uuid, this is a unique id generated by kobo for each unique record. It will be used to differentiate different records even if the same species is evaluated twice
X_uuid,
# multiassessment
multiassessment) %>%
# change all "" (empty) cells to NA
mutate_all(list(~na_if(.,"")))
# End of function
}