-
Notifications
You must be signed in to change notification settings - Fork 2
/
bmProcessingScripts.R
158 lines (143 loc) · 5.35 KB
/
bmProcessingScripts.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
# Load the SQLite library
library("RSQLite");
dbfile = "Z:/BioModelos/DB_current/production.sqlite3" # Assign the sqlite datbase and full path to a variable
sqlite = dbDriver("SQLite") # Instantiate the dbDriver to a convenient object
mydb = dbConnect(sqlite, dbfile) # Assign the connection string to a connection object
#dbListTables(mydb) # Request a list of tables using the connection object
#Update CSVs (Agregate reports from all users)
#getPtError.R
#Gets the error reports from BioModelos DB
#species: the species to query.
#date: the date beyond which error reports are considered in YYYY-MM-DD format.
#con: an SQLiteConnection object
getPtError<-function(con, species, date){
pt.errors.q <- paste0("SELECT s.sci_name AS Especies, pc.recordId AS 'ID Registro', pc.isOutlier AS 'Outlier Geográfico', pc.geoIssue AS 'Georreferenciación dudosa', pc.idIssue AS 'Identificación dudosa', pc.oldTaxonomy AS 'Nombre desactualizado', pc.inCaptivity AS 'Individuo introducido', pc.otherIssues AS 'Otros', pc.comment AS Comentarios, u.name AS 'Reportado por', pc.created_at AS 'Reportado'
FROM point_comments AS pc
JOIN users AS u ON pc.user_id = u.id
JOIN species AS s ON pc.species_id = s.id
WHERE pc.created_at > \'", date, "\' AND s.sci_name = \'", species,
"\' ORDER BY s.sci_name")
q.result <-dbSendQuery(con, pt.errors.q)
pt.errors <- fetch(q.result)
dbClearResult(q.result)
if(nrow(pt.errors)>0){
return(pt.errors)
} else {
return(NULL)
}
}
#bmJSON2table
#Cast a JSON string from a BioModelos edition into a data frame (single row)
#Returns NULL value for geometries other than Point.
bmJSON2table<-function(json){
interp.json<-tryCatch(fromJSON(json),
error=function(err){
print(paste("MY_ERROR: ",err))
return(NULL)
})
if(is.null(interp.json)){
return(NULL)
}
if(interp.json$features$geometry$type=="Point"){
df<-cbind(geometry=interp.json$features$geometry$type,
lon=unlist(interp.json$features$geometry$coordinates)[1],
lat=unlist(interp.json$features$geometry$coordinates)[2],
interp.json$features$properties)
return(df)
} else {
return(NULL)
}
}
#Get added records
getNewRecs<-function(con,species, date){
edits.q <- paste0("SELECT r.id AS 'Edición ID', s.sci_name AS Especie, m.description AS Umbral, m.level AS Nivel, r.GeoJSON AS 'Tipo de Edición', u.name AS Experto, r.created_at AS Fecha
FROM species AS s
JOIN models AS m ON s.id = m.species_id
JOIN reviews AS r ON r.model_id = m.id
JOIN users AS u ON u.id = r.user_id
WHERE r.created_at > \'", date, "\' AND s.sci_name = \'", species,
"\' ORDER BY s.sci_name")
q.result <-dbSendQuery(con, edits.q)
edits <- fetch(q.result)
dbClearResult(q.result)
df<-data.frame()
if(nrow(edits)>0){
for(i in 1:nrow(edits)){
result<-bmJSON2table(edits$`Tipo de Edición`[i])
if(!is.null(result)){
df <- rbind(df,result)
}
}
}
if(nrow(df)>0){
return(df)
} else {
return(NULL)
}
}
Cast2BMcsv<-function(df){
result<-data.frame(id=rep("BioModelos user",nrow(df)),
species=df$Especie,
EspecieOriginal=rep(NA,nrow(df)),
lon=df$lon,
lat=df$lat,
Localidad=df$Localidad,
Municipio=rep(NA,nrow(df)),
Departamento=rep(NA,nrow(df)),
Pais=rep(NA,nrow(df)),
Altitud=rep(NA,nrow(df)),
Fecha=df$`Fecha de Registro`,
Institucion=rep(NA,nrow(df)),
Colector=df$Observador,
Evidencia=df$Cita,
Publico=rep("Si",nrow(df)))
return(result)
}
UpdateCSV<-function(con, sp.name, date, file.name, in.folder, out.folder){
in.file <- paste0(in.folder, "/", file.name)
if(file.exists(in.file)){
df0<-read.csv(in.file)
#Get error reports and added records
df1<-getPtError(con,sp.name, date)
df2<-getNewRecs(con,sp.name, date)
if(is.null(df1)&is.null(df2)){
print(paste("Nothing to update for",sp.name))
return(NULL)
}
#Remove records with errors
if(!is.null(df1)){
results <- df0[!(df0$id%in%df1$`ID Registro`), ]
} else {
results <- df0
}
#Add records
if(!is.null(df2)){
results<-rbind(results,Cast2BMcsv(df2))
}
write.csv(results, paste0(out.folder,"/",file.name),row.names=FALSE)
return(results)
} else {
print(paste("File",file.name,"does not exist"))
return(NULL)
}
}
###Update CSV for species in a list
sp.list<-read.csv("D:/Projects/TallerBST/verifList.csv",as.is=T)[,1]
file.names<-paste0(sub(" ","_",sp.list),".csv")
occs<-data.frame()
for(i in 1:length(sp.list)){
df.row<-UpdateCSV(mydb, sp.list[i], "2016-10-12", file.names[i],
"C:/Workspace/prueba/CSV", "C:/Workspace/prueba/CSV2")
if(!is.null(df.row)){
occs<-rbind(occs,df.row)
}
}
hasErrors<-rep(0,length(sp.list))
hasNewRecs<-rep(0,length(sp.list))
for (i in 1:length(sp.list)){
df1<-getPtError(mydb,sp.list[i], '2016-10-12')
df2<-getNewRecs(mydb,sp.list[i], '2016-10-12')
if(!is.null(df1)) hasErrors[i] <- nrow(df1)
if(!is.null(df2)) hasNewRecs[i] <- nrow(df2)
}
View(cbind(sp.list,hasErrors,hasNewRecs))