diff --git a/.gitignore b/.gitignore index 32a03c3..a1cfd8f 100644 --- a/.gitignore +++ b/.gitignore @@ -40,3 +40,4 @@ man/figures/*.png rsconnect/ .Rproj.user inst/doc +debug.log diff --git a/DESCRIPTION b/DESCRIPTION index 496566a..8eb0f05 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: yaps Title: Track estimation using YAPS (Yet Another Positioning Solver) -Version: 1.2.0.9111 +Version: 1.2.1.9000 Authors@R: c( person("Henrik", "Baktoft", email = "hba@aqua.dtu.dk", role = c("cre", "aut")), person("Karl", "Gjelland", role=c("aut")), person("Uffe H.", "Thygesen", role=c("aut")), @@ -11,7 +11,7 @@ Depends: R (>= 3.5.0) License: GPL-3 + file LICENSE Encoding: UTF-8 LazyData: true -RoxygenNote: 6.1.1 +RoxygenNote: 7.1.1 LinkingTo: Rcpp, TMB, RcppEigen Imports: circular, data.table, ggplot2, Matrix, nloptr, plyr, Rcpp, RcppEigen, reshape2, splusTimeSeries, stats, tictoc, TMB, viridis, zoo Suggests: diff --git a/NAMESPACE b/NAMESPACE index 72ddc95..2a33a61 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,14 +2,20 @@ export(alignBurstSeq) export(applySync) +export(checkInpSync) +export(fineTuneSyncModel) +export(getBbox) export(getDatTmb) export(getInits) export(getInp) export(getInpSync) export(getParams) +export(getSyncCoverage) export(getSyncModel) export(getToaYaps) +export(plotBbox) export(plotSyncModelCheck) +export(plotSyncModelHydros) export(plotSyncModelResids) export(plotYaps) export(prepDetections) @@ -41,12 +47,15 @@ importFrom(ggplot2,geom_smooth) importFrom(ggplot2,geom_tile) importFrom(ggplot2,geom_violin) importFrom(ggplot2,geom_vline) +importFrom(ggplot2,ggtitle) importFrom(ggplot2,labs) importFrom(ggplot2,scale_fill_gradientn) importFrom(ggplot2,scale_y_log10) importFrom(ggplot2,theme) importFrom(ggplot2,xlab) +importFrom(ggplot2,xlim) importFrom(ggplot2,ylab) +importFrom(ggplot2,ylim) importFrom(grDevices,dev.off) importFrom(grDevices,pdf) importFrom(graphics,abline) diff --git a/NEWS.md b/NEWS.md index 9e8a550..80a8a71 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,29 @@ +# yaps v1.2.1.9000 + +## New stuff +* New site added to github pages intended to collect yaps-related resources, how-tos etc. (https://baktoft.github.io/yaps/) +* Add first step-by-step tutorial to yaps pages +* Add function to sequentially fine-tune sync model. +* Add option to use speed of sound based on logged temperatur in the synchronisation process. +* Add function to fine-tune sync_model based on residual threshold +* Add plot to check temporal stability of sync_models. Try plotSyncModelResids(sync_model, by='temporal') +* Add option to impose spatial constraints (BBox only) and plot a visual of the constraint. Mainly used to constrain parameter space and increase speed and convergence. +* Add various checks in checkInpSync() +* Add option to use selective downsampling in getInpSync(). +* Add function to plot hydros from sync_model - usefull if hydros were re-positioned during getSyncModel(). +* Add option to estimate Z-dimension (depth) of tracks. + +## Bug fixes +* Fix nasty bug in likelihood contribution of ToP-estimation when using random burst interval (ping_type = 'rbi') +* Fix bug in getInpSync() - failed if sync_tag was only heard on own hydro +* Eliminate estimation of log_sigma_hydros_xy in sync_model +* Relax priors on SS in both track and sync model - consider to switch to softplus instead +* Return plsd object from getSyncModel +* Fix bug in getToaYaps() that allowed too short/too high BI to pass through when using ping_type='rbi' and very fast transmitters + + + + # yaps v1.2.0.9111 ## New stuff diff --git a/R/applySync.R b/R/applySync.R index df4723a..583b353 100644 --- a/R/applySync.R +++ b/R/applySync.R @@ -14,7 +14,9 @@ applySync <- function(toa, hydros="", sync_model){ if(type=="toa_matrix"){ - stop("USE OF LINEAR CORRECTION IS NOT IMPLEMENTED IN APPLYING SYNC TO A MATRIX YET!!!") + if(sum(inp_synced$inp_params$lin_corr_coeffs != 0)){ + stop("ERROR: Use of linear correction is not yet implemented in applying sync to a matrix!\n If linear corrections are used in sync, these are ignored in this step and results will be wrong!\n") + } offset_idx_mat <- matrix(findInterval(toa, ks), ncol=ncol(toa)) offset_level_mat <- matrix(inp_synced$inp_params$offset_levels[offset_idx_mat, 1], ncol=ncol(offset_idx_mat)) diff --git a/R/checkInpSync.R b/R/checkInpSync.R new file mode 100644 index 0000000..4cc848c --- /dev/null +++ b/R/checkInpSync.R @@ -0,0 +1,66 @@ +#' Check consistency of `inp_sync` object obtained from `getInpSync()` +#' +#' @inheritParams getInpSync +#' @param inp_sync Object obtained using `getInpSync()` +#' @export +checkInpSync <- function(inp_sync, silent_check){ + # speed of sound stuff + if(!silent_check & inp_sync$dat_tmb_sync$ss_data_what != "data"){cat("WARNING: getSyncModel() will estimate speed of sound. It is strongly advised to use data instead!\n")} + if(inp_sync$dat_tmb_sync$ss_data_what == "data" ){ + stopifnot(length(inp_sync$dat_tmb_sync$ss_data_vec) == inp_sync$dat_tmb_sync$np) + } + + # is timekeeper a fixed hydro? + if(!silent_check & inp_sync$dat_tmb_sync$fixed_hydros_vec[inp_sync$dat_tmb_sync$tk ] != 1){ + cat("NOTE: The designated time-keeper is not a fixed hydrophone - is this intentional?\n") + } + + # are dimensions correct? + stopifnot(ncol(inp_sync$dat_tmb_sync$toa) == inp_sync$dat_tmb_sync$nh) + stopifnot(nrow(inp_sync$dat_tmb_sync$toa) == inp_sync$dat_tmb_sync$np) + + # check that all offset_idx are present in the offset_idx vector + stopifnot(inp_sync$dat_tmb_sync$n_offset_idx == length(unique(inp_sync$dat_tmb_sync$offset_idx))) + + # check that all hydro x offset_idx combination have at least some data... + # trigger thresholds for N is somewhat arbitrary.. + sync_coverage <- getSyncCoverage(inp_sync, plot=FALSE) + if(!silent_check & min(sync_coverage$N) <= 5) { + cat("WARNING: At least one hydro x offset_idx combination has less than 5 observations. This/these hydro(s) might not be synced in that/these period(s)!\n Run getSyncCoverage(inp_sync, plot=TRUE) for more info\n") + } + if(!silent_check & min(sync_coverage$N) < 10) { + cat("WARNING: At least one hydro has less than 10 pings in an offset_idx - try getSyncCoverage(inp_sync, plot=TRUE) for visual\n and rerun getInpSync() with increased keep_rate\n") + } else if(!silent_check & min(sync_coverage$N) < 50) { + cat("NOTE: At least one hydro has less than 50 pings in an offset_idx - try getSyncCoverage(inp_sync, plot=TRUE) for visual\n and rerun getInpSync() with increased keep_rate\n") + } + return(sync_coverage) +} + +#' Quick overview to check if all hydros have enough data within each offset period. +#' +#' @inheritParams checkInpSync +#' @param plot Logical indicating whether to plot a visual or not. +#' @export +getSyncCoverage <- function(inp_sync, plot=FALSE){ + toa <- inp_sync$dat_tmb_sync$toa + nh <- ncol(toa) + offset_idx <- inp_sync$dat_tmb_sync$offset_idx + + toa_long <- data.table::data.table(reshape2::melt(toa)) + colnames(toa_long) <- c('ping', 'h','toa') + toa_long[, offset_idx := rep(offset_idx, nh)] + sync_coverage <- data.table::data.table(reshape2::melt(with(toa_long[!is.na(toa)], table(h, offset_idx)))) + colnames(sync_coverage) <- c('h', 'offset_idx' ,'N') + + if(plot){ + p <- ggplot2::ggplot(sync_coverage) + p <- p + geom_point(aes(offset_idx, N), col="steelblue") + p <- p + geom_point(data=sync_coverage[N < 50], aes(offset_idx, N), col="blue", size=2) + p <- p + geom_point(data=sync_coverage[N < 10], aes(offset_idx, N), col="orange", size=2) + p <- p + geom_point(data=sync_coverage[N <= 5], aes(offset_idx, N), col="red", size=2) + p <- p + facet_wrap(~h) + ylim(0, max(sync_coverage$N)) + print(p) + } + + return(sync_coverage) +} \ No newline at end of file diff --git a/R/fineTuneSyncModel.R b/R/fineTuneSyncModel.R new file mode 100644 index 0000000..6cdd73c --- /dev/null +++ b/R/fineTuneSyncModel.R @@ -0,0 +1,52 @@ +#' Fine-tune an already fitted sync_model +#' Wrapper function to re-run getSyncModel() using the same data, but excluding outliers. Note dimensions of data might change if eps_threshold results in empty rows in the TOA-matrix. +#' @param sync_model sync_model obtained using getSyncModel() +#' @param eps_threshold Maximum value of residual measured in meter assuming speed of sound = 1450 m/s +#' @param silent logical whether to make getSyncModel() silent +#' @export +fineTuneSyncModel <- function(sync_model, eps_threshold, silent=TRUE){ + # original inp_sync + inp_sync <- sync_model$inp_synced + + # getting resids, identify outliers and get rid of them everywhere in inp_sync + resids <- sync_model$report$eps + resids[resids == 0] <- NA + outliers <- which(abs(resids)*1450 > eps_threshold) + inp_sync$dat_tmb_sync$toa_offset[outliers] <- NA + inp_sync$inp_params$toa[outliers] <- NA + + # check if any empty rows now exists - if so get rid of them entirely + nobs <- apply(inp_sync$dat_tmb_sync$toa_offset, 1, function(k) sum(!is.na(k))) + empty_rows <- which(nobs < inp_sync$inp_params$min_hydros) + + if(length(empty_rows) > 0){ + inp_sync$dat_tmb_sync$toa_offset <- inp_sync$dat_tmb_sync$toa_offset[-empty_rows, ] + inp_sync$dat_tmb_sync$sync_tag_idx_vec <- inp_sync$dat_tmb_sync$sync_tag_idx_vec[-empty_rows] + inp_sync$dat_tmb_sync$offset_idx <- inp_sync$dat_tmb_sync$offset_idx[-empty_rows] + inp_sync$dat_tmb_sync$ss_idx <- inp_sync$dat_tmb_sync$ss_idx[-empty_rows] + inp_sync$dat_tmb_sync$np <- inp_sync$dat_tmb_sync$np - length(empty_rows) + + if(inp_sync$dat_tmb_sync$ss_data_what == "data"){ + inp_sync$dat_tmb_sync$ss_data_vec = inp_sync$dat_tmb_sync$ss_data_vec [-empty_rows] + } + + inp_sync$params_tmb_sync$TOP <- inp_sync$params_tmb_sync$TOP[-empty_rows] + + inp_sync$inp_params$toa <- inp_sync$inp_params$toa[-empty_rows, ] + } + + # # # attempt to speed up next getSyncModel() by setting initial param values to relevant values + # # # If NAs in OFFSET or SLOPEs it will not work... + # inp_sync$params_tmb_sync$OFFSET <- sync_model$pl$OFFSET + # inp_sync$params_tmb_sync$SLOPE1 <- sync_model$pl$SLOPE1 + # inp_sync$params_tmb_sync$SLOPE2 <- sync_model$pl$SLOPE2 + # inp_sync$params_tmb_sync$SS <- sync_model$pl$SS # Disabled because option to use ss_data is implemented... + # inp_sync$params_tmb_sync$LOG_SIGMA_HYDROS_XY <- sync_model$pl$LOG_SIGMA_HYDROS_XY + inp_sync$params_tmb_sync$TRUE_H <- sync_model$pl$TRUE_H + inp_sync$params_tmb_sync$LOG_SIGMA_TOA <- sync_model$pl$LOG_SIGMA_TOA + + # run getSyncModel() using the tuned inp_sync + sync_model_tuned <- getSyncModel(inp_sync, silent=silent) + + return(sync_model_tuned) +} diff --git a/R/getBbox.R b/R/getBbox.R new file mode 100644 index 0000000..5124f16 --- /dev/null +++ b/R/getBbox.R @@ -0,0 +1,17 @@ +#' Get a standard bounding box to impose spatial constraints +#' +#' Standard is a rectangle based on coordinates of outer hydros +- the buffer in meters +#' Returns a vector of lenght 6: c(x_min, x_max, y_min, y_max, eps, pen). Limits are given in UTM coordinates. +#' @param buffer Number of meters the spatial domain extends beyound the outer hydros. +#' @param eps Specifies how well-defined the borders are (eps=1E-2 is very sharp, eps=100 is very soft). +#' @param pen Specifies the penalty multiplier. +#' @inheritParams getInp +#' @export +getBbox <- function(hydros, buffer=5, eps=1E-3, pen=1){ + x_min <- hydros[which.min(hydros$hx), hx] - buffer + x_max <- hydros[which.max(hydros$hx), hx] + buffer + y_min <- hydros[which.min(hydros$hy), hy] - buffer + y_max <- hydros[which.max(hydros$hy), hy] + buffer + bbox <- c(x_min, x_max, y_min, y_max, eps, pen) + return(bbox) +} diff --git a/R/getToaYaps.R b/R/getToaYaps.R index 06f62a3..a357c8e 100644 --- a/R/getToaYaps.R +++ b/R/getToaYaps.R @@ -25,14 +25,18 @@ getToaYaps <- function(synced_dat, hydros, rbi_min, rbi_max){ ts_focal <- as.matrix(as.data.frame(ts_focal)) ts_focal <- ts_focal/10 toa <- ts_focal - dim(toa) + dimnames(toa) <- NULL # remove rows with too short BI... top1 <- rowMeans(toa, na.rm=TRUE) diffs1 <- c(diff(top1),NA) nobs <- apply(toa, 1, function(k) sum(!is.na(k))) - rem_idx <- which(diffs1 < rbi_min-1) # THIS NEEDS TO BE SET BASED ON USED SYSTEM - 1 IS TOO HIGH FOR HR-LIKE + if(rbi_min > 10){ + rem_idx <- which(diffs1 < rbi_min-1) # THIS NEEDS TO BE SET BASED ON USED SYSTEM - 1 IS TOO HIGH FOR HR-LIKE + } else { + rem_idx <- which(diffs1 < rbi_min-0.1) # rbi_min < 10 should always be HR??? + } if(length(rem_idx) > 0){ toa[rem_idx, ] <- NA } @@ -47,6 +51,12 @@ getToaYaps <- function(synced_dat, hydros, rbi_min, rbi_max){ nobs <- apply(toa, 1, function(k) sum(!is.na(k))) first_ping <- which(nobs >= 2)[1] last_ping <- rev(which(nobs >= 2))[1] + + if(is.na(first_ping) | is.na(last_ping) | first_ping == last_ping){ + print("FATAL ERROR: Not enough data to produce toa") + return(FALSE) + } + toa <- toa[first_ping:last_ping, ] # remake toa-matrix to include pings missed by all hydros... @@ -57,7 +67,11 @@ getToaYaps <- function(synced_dat, hydros, rbi_min, rbi_max){ pings <- data.table::data.table(top=top2, diff=diffs2) pings[, toa_idx:=1:.N] pings[, ping2next := 1] - pings[, next_ping_too_late := diff > rbi_max+1] + if(rbi_max > 15){ + pings[, next_ping_too_late := diff > rbi_max+1] # same as for rbi_min above when trying to exclude impossible pings + } else { + pings[, next_ping_too_late := diff > rbi_max+.1] + } pings[next_ping_too_late==TRUE, ping2next:=ping2next+round(diff/rbi_max)] pings[, ping_idx:=cumsum(c(1,ping2next[-.N]))] toa_all <- matrix(ncol=ncol(toa), nrow=max(pings$ping_idx)) diff --git a/R/plotBbox.R b/R/plotBbox.R new file mode 100644 index 0000000..ee3ff94 --- /dev/null +++ b/R/plotBbox.R @@ -0,0 +1,36 @@ +#' Graphical representation of spatial constraints +#' @inheritParams getInp +#' @export +plotBbox <- function(hydros, bbox){ + softplus <- function(x, eps){return (0.5*(x+sqrt(x*x+eps*eps))) } + + x_space_min <- floor(hydros[which.min(hydros$hx), hx] - 20) + x_space_max <- ceiling(hydros[which.max(hydros$hx), hx] + 20) + y_space_min <- floor(hydros[which.min(hydros$hy), hy] - 20) + y_space_max <- ceiling(hydros[which.max(hydros$hy), hy] + 20) + + xs <- x_space_min:x_space_max + ys <- y_space_min:y_space_max + + x_min <- bbox[1] + x_max <- bbox[2] + y_min <- bbox[3] + y_max <- bbox[4] + eps <- bbox[5] + pen <- bbox[6] + + pen_mat <- (outer(xs,ys, FUN=function(i,j) { + pen * (softplus(i - x_max, eps=eps) + softplus(x_min - i, eps=eps) + + softplus(j - y_max, eps=eps) + softplus(y_min - j, eps=eps)) + })) + + + pen_long <- data.table::data.table(reshape2::melt(pen_mat)) + pen_long[, x := xs[Var1]] + pen_long[, y := ys[Var2]] + pen_long[, pen := value] + + p <- ggplot2::ggplot(pen_long) + geom_tile(aes(x, y, fill=(pen))) + coord_fixed(ratio=1) + viridis::scale_fill_viridis() + geom_point(data=hydros, aes(x=hx, y=hy), col="red", size=2) + print(p) +} + diff --git a/R/prepTmb.R b/R/prepTmb.R index 9c85f05..d65408d 100644 --- a/R/prepTmb.R +++ b/R/prepTmb.R @@ -11,14 +11,14 @@ #' @param ss_data_what What speed of sound (ss) data to be used. Default ss_data_what='est': ss is estimated by the model. Alternatively, if ss_data_what='data': ss_data must be provided and length(ss_data) == ncol(toa) #' @param ss_data Vector of ss-data to be used if ss_data_what = 'est'. Otherwise ss_data <- 0 (default) #' @param biTable Table of known burst intervals. Only used when pingType == "pbi". Default=NULL -#' @param z_vec Vector of known depth values (positive real). Default=NULL is which case no 3D is assumed. Estimation of depth from detections is currently not supported. - +#' @param z_vec Vector of known depth values (positive real). Default=NULL is which case no 3D is assumed. If z_vec = "est" depth will be estimated. +#' @param bbox Spatial constraints in the form of a bounding box. See ?getBbox for details. #' @return List of input data ready for use in TMB-call #' @export -getInp <- function(hydros, toa, E_dist, n_ss, pingType, sdInits=1, rbi_min=0, rbi_max=0, ss_data_what='est', ss_data=0, biTable=NULL, z_vec=NULL){ +getInp <- function(hydros, toa, E_dist, n_ss, pingType, sdInits=1, rbi_min=0, rbi_max=0, ss_data_what='est', ss_data=0, biTable=NULL, z_vec=NULL, bbox=NULL){ inp_params <- getInpParams(hydros, toa, pingType) - datTmb <- getDatTmb(hydros, toa, E_dist, n_ss, pingType, rbi_min, rbi_max, ss_data_what, ss_data, biTable, inp_params, z_vec) + datTmb <- getDatTmb(hydros, toa, E_dist, n_ss, pingType, rbi_min, rbi_max, ss_data_what, ss_data, biTable, inp_params, z_vec, bbox) params <- getParams(datTmb) inits <- getInits(datTmb, sdInits) return(list( @@ -39,7 +39,7 @@ getInp <- function(hydros, toa, E_dist, n_ss, pingType, sdInits=1, rbi_min=0, rb #' #' @return List for use in TMB. #' @export -getDatTmb <- function(hydros, toa, E_dist, n_ss, pingType, rbi_min, rbi_max, ss_data_what, ss_data, biTable, inp_params, z_vec){ +getDatTmb <- function(hydros, toa, E_dist, n_ss, pingType, rbi_min, rbi_max, ss_data_what, ss_data, biTable, inp_params, z_vec, bbox){ T0 <- inp_params$T0 Hx0 <- inp_params$Hx0 Hy0 <- inp_params$Hy0 @@ -72,9 +72,21 @@ getDatTmb <- function(hydros, toa, E_dist, n_ss, pingType, rbi_min, rbi_max, ss_ if(is.null(z_vec)){ how_3d <- 'none' z_vec <- c(1) + } else if(z_vec == "est") { + how_3d <- 'est' + z_vec <- c(1) } else { how_3d <- 'data' } + + if(is.null(bbox)){ + bbox <- NA + } else { + bbox[1] <- bbox[1] - inp_params$Hx0 + bbox[2] <- bbox[2] - inp_params$Hx0 + bbox[3] <- bbox[3] - inp_params$Hy0 + bbox[4] <- bbox[4] - inp_params$Hy0 + } datTmb <- list( model = "yaps_track", @@ -95,7 +107,8 @@ getDatTmb <- function(hydros, toa, E_dist, n_ss, pingType, rbi_min, rbi_max, ss_ approxBI = approxBI, biTable = c(1), how_3d = how_3d, - z_vec = z_vec + z_vec = z_vec, + bbox = bbox ) if(pingType == 'pbi') {datTmb$biTable = biTable} @@ -124,13 +137,17 @@ getParams <- function(datTmb){ # , log_t_part = 0 #Mixture ratio between Gaussian and t ) + if(datTmb$how_3d == "est"){ + out$Z <- stats::runif(ncol(datTmb$toa), -10, 0) + } + # # # ss related if(datTmb$ss_data_what == 'est'){ out$logD_v <- 0 #diffusivity of speed of sound (D_v in ms) out$ss <- stats::rnorm(datTmb$n_ss, 1450, 5) #speed of sound } - # # # Edist realted + # # # Edist related if(datTmb$Edist[1] == 1){ out$logSigma_toa = 0 #sigma for Gaussian } else if(datTmb$Edist[2] == 1){ diff --git a/R/runYaps.R b/R/runYaps.R index 54ad0b5..2469468 100644 --- a/R/runYaps.R +++ b/R/runYaps.R @@ -12,6 +12,9 @@ runYaps <- function(inp, maxIter=1000, getPlsd=TRUE, getRep=TRUE, silent=TRUE, opt_fun='nlminb', opt_controls=list(), bounds=list()){ print("Running yaps...") random <- c("X", "Y", "top") + if(inp$datTmb$how_3d == "est"){ + random <- c(random, "Z") + } if(inp$datTmb$ss_data_what == 'est'){ random <- c(random, 'ss') } diff --git a/R/syncGetters.R b/R/syncGetters.R index db2e106..07ec674 100644 --- a/R/syncGetters.R +++ b/R/syncGetters.R @@ -1,10 +1,10 @@ #' Get sync model from inp_sync object obtained by getInpSync() #' @param inp_sync Input data prepared for the sync model using `getInpSync()` #' @param silent Keep TMB quiet -#' @param fine_tune Logical. Wheter to re-run the sync model excluding residual outliers +#' @param fine_tune Logical. Whether to re-run the sync model excluding residual outliers. Consider to use fineTuneSyncModel() instead. #' @param max_iter Max number of iterations to run TMB. Default=100 seems to work in most cases. #' @export -getSyncModel <- function(inp_sync, silent=TRUE, fine_tune=TRUE, max_iter=100){ +getSyncModel <- function(inp_sync, silent=TRUE, fine_tune=FALSE, max_iter=100){ dat_tmb <- inp_sync$dat_tmb_sync params <- inp_sync$params_tmb_sync random <- inp_sync$random_tmb_sync @@ -13,60 +13,55 @@ getSyncModel <- function(inp_sync, silent=TRUE, fine_tune=TRUE, max_iter=100){ cat(paste0(Sys.time(), " \n")) cat(". Running optimization of the sync model. Please be patient - this can take a long time. \n") + if(fine_tune){cat(".... fine tuning is enabled, but is getting deprecated in this function. Consider to use the function fineTuneSyncModel() instead. See ?fineTuneSyncModel for info. \n")} tictoc::tic("Fitting sync model: ") opt <- c() pl <- c() plsd <- c() obj <- c() - - sync_done <- FALSE - i <- 1 - while(!sync_done){ - tictoc::tic(paste0("... iteration ", i)) - obj <- c() - opt <- c() - report <- c() - gc() - - # config(DLL="yaps_sync") - # ## Reduce memory peak of a parallel model by creating tapes in serial - # config(tape.parallel=0, DLL="yaps_sync") - obj <- TMB::MakeADFun(data = dat_tmb, parameters = params, random = random, DLL = "yaps", inner.control = list(maxit = max_iter), silent=silent) - - if(silent){ - opt <- suppressWarnings(stats::nlminb(inits,obj$fn,obj$gr)) - } else { - opt <- stats::nlminb(inits,obj$fn,obj$gr) - } - obj$fn() - pl <- obj$env$parList() # List of estimates - obj_val <- opt$objective - cat(paste0(".. ", Sys.time()), " \n") - cat(".... obj = ", obj_val, " \n") - report <- obj$report() + tictoc::tic() + obj <- c() + opt <- c() + report <- c() + gc() - crazy_outliers <- which(abs(report$eps_toa) > 10) - fine_outliers <- which(abs(report$eps_toa)*1450 > 100) - if(length(crazy_outliers > 0)){ - cat(".... some extreme outliers potentially affecting the model where identified and removed - rerunning sync_model \n") - dat_tmb$toa_offset[crazy_outliers] <- NA - } else if(fine_tune & length(fine_outliers > 0)){ - cat(".... fine_tune is enable (fine_tune = TRUE) - some outliers where identified and removed - rerunning sync_model \n") - dat_tmb$toa_offset[fine_outliers] <- NA - } else { - sync_done <- TRUE - } - tictoc::toc() - i <- i +1 + # config(DLL="yaps_sync") + # ## Reduce memory peak of a parallel model by creating tapes in serial + # config(tape.parallel=0, DLL="yaps_sync") + obj <- TMB::MakeADFun(data = dat_tmb, parameters = params, random = random, DLL = "yaps", inner.control = list(maxit = max_iter), silent=silent) + + if(silent){ + opt <- suppressWarnings(stats::nlminb(inits,obj$fn,obj$gr)) + } else { + opt <- stats::nlminb(inits,obj$fn,obj$gr) } - # jointrep <- try(TMB::sdreport(obj, getJointPrecision=TRUE), silent=silent) - # param_names <- rownames(summary(jointrep)) - # sds <- summary(jointrep)[,2] - # summ <- data.frame(param=param_names, sd=sds) - # plsd <- split(summ[,2], f=summ$param) + obj$fn() + pl <- obj$env$parList() # List of estimates + obj_val <- opt$objective + cat(paste0(".. ", Sys.time()), " \n") + cat(".... obj = ", obj_val, " \n") + report <- obj$report() + + crazy_outliers <- which(abs(report$eps_toa)*1450 > 10000) + fine_outliers <- which(abs(report$eps_toa)*1450 > 1000) + if(length(crazy_outliers > 0)){ + cat(".... some extreme outliers potentially affecting the model where identified \n Consider to run fineTuneSyncModel(sync_model, eps_threshold=10000). See ?fineTuneSyncModel for more info. \n") + # dat_tmb$toa_offset[crazy_outliers] <- NA + } else if(fine_tune){ + cat(".... fine tuning is enabled, but is deprecated. Use the function fineTuneSyncModel() instead. See ?fineTuneSyncModel for info. \n") + # dat_tmb$toa_offset[fine_outliers] <- NA + } + + tictoc::toc() + + jointrep <- try(TMB::sdreport(obj, getJointPrecision=TRUE), silent=silent) + param_names <- rownames(summary(jointrep)) + sds <- summary(jointrep)[,2] + summ <- data.frame(param=param_names, sd=sds) + plsd <- split(summ[,2], f=summ$param) pl$TRUE_H[,1] <- pl$TRUE_H[,1] + inp_params$Hx0 pl$TRUE_H[,2] <- pl$TRUE_H[,2] + inp_params$Hy0 @@ -80,7 +75,7 @@ getSyncModel <- function(inp_sync, silent=TRUE, fine_tune=TRUE, max_iter=100){ cat("Sync model done \n") cat("Consider saving the sync model for later use - e.g. save(sync_model, file='path_to_sync_save'). \n") tictoc::toc() - return(list(pl=pl, report=report, obj_val=obj_val, eps_long=eps_long, inp_synced=inp_sync)) + return(list(pl=pl, plsd=plsd, report=report, obj_val=obj_val, eps_long=eps_long, inp_synced=inp_sync)) } @@ -93,14 +88,28 @@ getSyncModel <- function(inp_sync, silent=TRUE, fine_tune=TRUE, max_iter=100){ #' @param time_keeper_idx Index of the hydrophone to use as time keeper. Could e.g. be the one with smallest overall clock-drift. #' @param fixed_hydros_idx Vector of hydro idx's for all hydrophones where the position is assumed to be known with adequate accuracy and precission. Include as many as possible as fixed hydros to reduce overall computation time and reduce overall variability. As a bare minimum two hydros need to be fixed, but we strongly advice to use more than two. #' @param n_offset_day Specifies the number of hydrophone specific quadratic polynomials to use per day. For PPM based systems, 1 or 2 is often adeqaute. -#' @param n_ss_day Specifies number of speed of sound to estimate per day. Future versions will enable use of logged water temperature instead. However, estimating SS gives an extra option for sanity-checking the final sync-model. -#' @param keep_rate Syncing large data sets can take a long time. However, there is typically an excess number of sync tag detections and a sub-sample is typically enough for good synchronization. This parameter specifies the proportion (0-1) of data to keep when sub-sampling. +#' @param n_ss_day Specifies number of speed of sound to estimate per day if no ss data is supplied. It is recommended to use logged water temperature instead. However, estimating SS gives an extra option for sanity-checking the final sync-model. +#' @param ss_data_what Indicates whether to estimate ("est") speed of sound or to use data based on logged water temperature ("data"). +#' @param ss_data data.table containing timestamp and speed of sound for the entire period to by synchronised. Must contain columns 'ts' (POSIXct timestamp) and 'ss' speed of sound in m/s (typical values range 1400 - 1550). +#' @param keep_rate Syncing large data sets can take a really long time. However, there is typically an excess number of sync tag detections +#' and a sub-sample is typically enough for good synchronization. +#' This parameter EITHER specifies a proportion (0-1) of data to keep when sub-sampling +#' OR (if keep_rate > 10) number of pings (approximate) to keep in each hydro X offset_idx combination if enough exists. #' @param excl_self_detect Logical whether to excluded detections of sync tags on the hydros they are co-located with. Sometimes self detections can introduce excessive residuals in the sync model in which case they should be excluded. #' @param lin_corr_coeffs Matrix of coefficients used for pre-sync linear correction. dim(lin_corr_coeffs)=(#hydros, 2). +#' @param silent_check Logical whether to get output from checkInpSync(). Default is FALSE #' @export -getInpSync <- function(sync_dat, max_epo_diff, min_hydros, time_keeper_idx, fixed_hydros_idx, n_offset_day, n_ss_day, keep_rate=1, excl_self_detect=TRUE, lin_corr_coeffs=NA){ - sync_dat <- appendDetections(sync_dat) +getInpSync <- function(sync_dat, max_epo_diff, min_hydros, time_keeper_idx, fixed_hydros_idx, n_offset_day, n_ss_day, keep_rate=1, excl_self_detect=TRUE, lin_corr_coeffs=NA, ss_data_what="est", ss_data=c(0), silent_check=FALSE){ + if(length(unique(sync_dat$hydros$serial)) != nrow(sync_dat$hydros)){ + print(sync_dat$hydros[, .N, by=serial][N>=2]) + stop("ERROR: At least one hydrophone serial number is used more than once in sync_dat$hydros!\n") + } + + if(keep_rate <=0 | (keep_rate > 1 & keep_rate < 10) | (keep_rate >= 10 & keep_rate %% 1 != 0)){ + stop("ERROR: Invalid keep_rate! Must be either ]0;1] or integer >= 10\n") + } + sync_dat <- appendDetections(sync_dat) if(is.na(lin_corr_coeffs[1])){ lin_corr_coeffs <- matrix(0, nrow=nrow(sync_dat$hydros), ncol=2, byrow=TRUE) @@ -110,26 +119,53 @@ getInpSync <- function(sync_dat, max_epo_diff, min_hydros, time_keeper_idx, fixe T0 <- min(sync_dat$detections$epo) + if (is.null(sync_dat$hydros$hlabel)) sync_dat$hydros[, hlabel := paste(idx)] # Each hydrofone may have a label different from it's idx, if provided on input + inp_H_info <- getInpSyncHInfo(sync_dat) - inp_toa_list <- getInpSyncToaList(sync_dat, max_epo_diff, min_hydros, excl_self_detect, keep_rate) - fixed_hydros_vec <- getFixedHydrosVec(sync_dat, fixed_hydros_idx) - offset_vals <- getOffsetVals(inp_toa_list, n_offset_day) - ss_vals <- getSsVals(inp_toa_list, n_ss_day) + inp_toa_list_all <- getInpSyncToaList(sync_dat, max_epo_diff, min_hydros, excl_self_detect) + fixed_hydros_vec <- getFixedHydrosVec(sync_dat, fixed_hydros_idx) + offset_vals_all <- getOffsetVals(inp_toa_list_all, n_offset_day) + inp_toa_list <- getDownsampledToaList(inp_toa_list_all, offset_vals_all, keep_rate) + offset_vals <- getOffsetVals(inp_toa_list, n_offset_day) + ss_vals <- getSsVals(inp_toa_list, n_ss_day) + if(ss_data_what == "data"){ + ss_data_vec <- getSsDataVec(inp_toa_list, ss_data) + } else { + ss_data_vec <- c(0) + } - dat_tmb_sync <- getDatTmbSync(sync_dat, time_keeper_idx, inp_toa_list, fixed_hydros_vec, offset_vals, ss_vals, inp_H_info, T0) - params_tmb_sync <- getParamsTmbSync(dat_tmb_sync) - random_tmb_sync <- c("TOP", "OFFSET", "SLOPE1", "SLOPE2", "SS", "TRUE_H") - inits_tmb_sync <- c(3, rep(-3,dat_tmb_sync$nh)) + dat_tmb_sync <- getDatTmbSync(sync_dat, time_keeper_idx, inp_toa_list, fixed_hydros_vec, offset_vals, ss_vals, inp_H_info, T0, ss_data_what, ss_data_vec) + params_tmb_sync <- getParamsTmbSync(dat_tmb_sync, ss_data_what) + if(ss_data_what == "est"){ + random_tmb_sync <- c("TOP", "OFFSET", "SLOPE1", "SLOPE2", "SS", "TRUE_H") + } else { + random_tmb_sync <- c("TOP", "OFFSET", "SLOPE1", "SLOPE2", "TRUE_H") + } + # inits_tmb_sync <- c(3, rep(-3,dat_tmb_sync$nh)) + inits_tmb_sync <- c(3) inp_params <- list(toa=inp_toa_list$toa, T0=T0, Hx0=inp_H_info$Hx0, Hy0=inp_H_info$Hy0, offset_levels=offset_vals$offset_levels, ss_levels=ss_vals$ss_levels, max_epo_diff=max_epo_diff, hydros=sync_dat$hydros, - lin_corr_coeffs=lin_corr_coeffs + lin_corr_coeffs=lin_corr_coeffs, min_hydros=min_hydros, ss_data=ss_data ) - - return(list(dat_tmb_sync=dat_tmb_sync, params_tmb_sync=params_tmb_sync, random_tmb_sync=random_tmb_sync, inits_tmb_sync=inits_tmb_sync, inp_params=inp_params)) + + inp_sync <- list(dat_tmb_sync=dat_tmb_sync, params_tmb_sync=params_tmb_sync, random_tmb_sync=random_tmb_sync, inits_tmb_sync=inits_tmb_sync, inp_params=inp_params) + inp_sync$inp_params$sync_coverage <- checkInpSync(inp_sync, silent_check) + return(inp_sync) } +#' Internal function. Extract speed of sounds for each timestamp used in sync-process from supplied data. +#' @inheritParams getInpSync +#' @noRd +getSsDataVec <- function(inp_toa_list, ss_data){ + roll <- data.table::data.table(ts = as.POSIXct(inp_toa_list$epo_self_vec, origin="1970-01-01", tz="UTC")) + data.table::setkey(ss_data, ts) + data.table::setkey(roll, ts) + ss_data_vec <- ss_data[roll, roll="nearest"]$ss + return(ss_data_vec) +} + #' Internal function. Apply linear correction matrix to epofrac before sync #' @inheritParams getInpSync #' @noRd @@ -151,23 +187,78 @@ applyLinCorCoeffsInpSync <- function(sync_dat, lin_corr_coeffs){ getInpSyncToaList <- function(sync_dat, max_epo_diff, min_hydros, excl_self_detect, keep_rate, lin_corr_coeffs){ toa_list_gross <- buildToaListGross(sync_dat, excl_self_detect) toa_list_pruned <- pruneToaListGross(toa_list_gross, max_epo_diff, min_hydros) - toa_list_downsampled <- downsampleToaList(toa_list_pruned, keep_rate) + return(toa_list_pruned) +} + +#' Internal warpper function to do downsampling of the toa +#' @inheritParams getInpSync +#' @noRd +getDownsampledToaList <- function(inp_toa_list_all, offset_vals_all, keep_rate){ + if(keep_rate > 0 & keep_rate <= 1){ + toa_list_downsampled <- downsampleToaList_random(inp_toa_list_all, keep_rate) + } else if(keep_rate >= 10){ + toa_list_downsampled <- downsampleToaList_selective(inp_toa_list_all, offset_vals_all, keep_rate) + } return(toa_list_downsampled) } +# Internal function to selectively downsample the toa-matrix for inp_sync +#' @param inp_toa_list_all Output from `getInpSyncToaList` +#' @inheritParams getInpSync +#' @noRd +downsampleToaList_selective <- function(inp_toa_list_all, offset_vals_all, keep_rate){ + toa <- inp_toa_list_all$toa + offset_idx <- offset_vals_all$offset_idx + + toa_long <- data.table::data.table(reshape2::melt(toa), rep(offset_idx, times=ncol(toa))) + colnames(toa_long) <- c('ping', 'h_idx','toa','offset_idx') + # nobs_per_offset <- toa_long[!is.na(toa), .N, by=c('h_idx' ,'offset_idx')] + + nobs_per_offset <- data.table::data.table(reshape2::melt(with(toa_long[!is.na(toa)], table(h_idx, offset_idx)), value.name="N")) + + + keep_pings <- c() + for(i in 1:length(unique(offset_idx))){ + toa_long_i <- toa_long[offset_idx == i] + keep_pings_i <- c() + h_order <- order(nobs_per_offset[offset_idx == i, N]) + for(h in 1:length(h_order)){ + already_in_keeps <- nrow(toa_long_i[!is.na(toa) & ping %in% keep_pings_i & offset_idx == i & h_idx==h_order[h]]) + if(already_in_keeps >= keep_rate){ + # already enough with this hydro... + next + } else { + need <- keep_rate - already_in_keeps + 1 + pings_h <- toa_long_i[!is.na(toa) & offset_idx == i & h_idx==h_order[h], ping] + if(length(pings_h) < need){ + keep_pings_h <- pings_h + } else { + keep_pings_h <- sample(pings_h, size=need) + } + keep_pings_i <- c(keep_pings_i, keep_pings_h) + } + } + keep_pings <- c(keep_pings, keep_pings_i) + } + + toa_list_downsampled <- list( toa = inp_toa_list_all$toa[keep_pings, ], + sync_tag_idx_vec = inp_toa_list_all$sync_tag_idx[keep_pings], + epo_self_vec = inp_toa_list_all$epo_self_vec[keep_pings]) + return(toa_list_downsampled) +} -# Internal function to downsample the toa-matrix for inp_sync -#' @param toa_list_pruned Output from `pruneToaListGross` +# Internal function to randomly downsample the toa-matrix for inp_sync +#' @param inp_toa_list_all Output from `getInpSyncToaList` #' @inheritParams getInpSync #' @noRd -downsampleToaList <- function(toa_list_pruned, keep_rate){ +downsampleToaList_random <- function(inp_toa_list_all, keep_rate){ toa_list_downsampled <- list() - keeps_idx <- which(stats::rbinom(nrow(toa_list_pruned$toa), 1, keep_rate) == 1) - toa_list_downsampled$toa <- toa_list_pruned$toa[keeps_idx,] - toa_list_downsampled$epo_self_vec <- toa_list_pruned$epo_self_vec[keeps_idx] - toa_list_downsampled$sync_tag_idx_vec <- toa_list_pruned$sync_tag_idx_vec[keeps_idx] + keeps_idx <- which(stats::rbinom(nrow(inp_toa_list_all$toa), 1, keep_rate) == 1) + toa_list_downsampled$toa <- inp_toa_list_all$toa[keeps_idx,] + toa_list_downsampled$epo_self_vec <- inp_toa_list_all$epo_self_vec[keeps_idx] + toa_list_downsampled$sync_tag_idx_vec <- inp_toa_list_all$sync_tag_idx_vec[keeps_idx] return(toa_list_downsampled) } @@ -231,7 +322,7 @@ getSsVals <- function(inp_toa_list, n_ss_day){ #' Internal function to get dat for TMB sync #' @inheritParams getInpSync #' @noRd -getDatTmbSync <- function(sync_dat, time_keeper_idx, inp_toa_list, fixed_hydros_vec, offset_vals, ss_vals, inp_H_info, T0){ +getDatTmbSync <- function(sync_dat, time_keeper_idx, inp_toa_list, fixed_hydros_vec, offset_vals, ss_vals, inp_H_info, T0, ss_data_what, ss_data_vec){ H <- as.matrix(inp_H_info$inp_H) dimnames(H) <- NULL @@ -249,7 +340,9 @@ getDatTmbSync <- function(sync_dat, time_keeper_idx, inp_toa_list, fixed_hydros_ offset_idx = offset_vals$offset_idx, n_offset_idx = offset_vals$n_offset_idx, ss_idx = ss_vals$ss_idx, - n_ss_idx = ss_vals$n_ss_idx + n_ss_idx = ss_vals$n_ss_idx, + ss_data_what = ss_data_what, + ss_data_vec = ss_data_vec ) return(dat_tmb_sync) } @@ -258,17 +351,19 @@ getDatTmbSync <- function(sync_dat, time_keeper_idx, inp_toa_list, fixed_hydros_ #' Internal function to get params for TMB sync #' @inheritParams getInpSync #' @noRd -getParamsTmbSync <- function(dat_tmb_sync){ +getParamsTmbSync <- function(dat_tmb_sync, ss_data_what){ params_tmb_sync <- list( TOP = rowMeans(dat_tmb_sync$toa, na.rm=TRUE), OFFSET = matrix(rnorm(dat_tmb_sync$nh*dat_tmb_sync$n_offset_idx, 0, 3), nrow=dat_tmb_sync$nh, ncol=dat_tmb_sync$n_offset_idx), SLOPE1 = matrix(rnorm(dat_tmb_sync$nh*dat_tmb_sync$n_offset_idx, 0, 3), nrow=dat_tmb_sync$nh, ncol=dat_tmb_sync$n_offset_idx), SLOPE2 = matrix(rnorm(dat_tmb_sync$nh*dat_tmb_sync$n_offset_idx, 0, 3), nrow=dat_tmb_sync$nh, ncol=dat_tmb_sync$n_offset_idx), - SS = rnorm(dat_tmb_sync$n_ss_idx, 1420, 1), TRUE_H = as.matrix(cbind(dat_tmb_sync$H[,1], dat_tmb_sync$H[,2], dat_tmb_sync$H[,3])), - LOG_SIGMA_TOA = 0, - LOG_SIGMA_HYDROS_XY = rnorm(dat_tmb_sync$nh,-3,1) + LOG_SIGMA_TOA = 0 + # LOG_SIGMA_HYDROS_XY = rnorm(dat_tmb_sync$nh,-3,1) ) + if(ss_data_what == "est"){ + params_tmb_sync[['SS']] <- rnorm(dat_tmb_sync$n_ss_idx, 1420, 1) + } return(params_tmb_sync) } @@ -277,12 +372,20 @@ getParamsTmbSync <- function(dat_tmb_sync){ #' @inheritParams getInpSync #' @noRd getEpsLong <- function(report, pl, inp_sync){ + + if(inp_sync$dat_tmb_sync$ss_data_what == "est"){ + ss_vec <- pl$SS[inp_sync$dat_tmb_sync$ss_idx] + } else { + ss_vec <- inp_sync$dat_tmb_sync$ss_data_vec + } + + eps <- report$eps_toa eps[which(eps==0)] <- NA eps_long <- data.table::data.table(reshape2::melt(eps)) colnames(eps_long) <- c('ping', 'hydro_idx', 'E') eps_long[, sync_tag_idx:=rep(inp_sync$dat_tmb_sync$sync_tag_idx_vec, times=ncol(eps))] - eps_long[, ss:=rep(pl$SS[inp_sync$dat_tmb_sync$ss_idx], times=ncol(eps))] + eps_long[, ss:=rep(ss_vec, times=ncol(eps))] eps_long[, E_m:=E*ss] eps_long <- eps_long[!is.na(E)] @@ -295,7 +398,7 @@ getEpsLong <- function(report, pl, inp_sync){ #' @param extreme_threshold Ignore delta values larger than this threshold. #' @inheritParams getInpSync #' @noRd -getSyncCheckDat <- function(sync_model, extreme_threshold=10000){ +getSyncCheckDat <- function(sync_model, extreme_threshold=1000){ toa <- sync_model$inp_synced$inp_params$toa toa_sync <- applySync(toa, sync_model=sync_model) @@ -307,7 +410,11 @@ getSyncCheckDat <- function(sync_model, extreme_threshold=10000){ true_y <- sync_model$pl$TRUE_H[,2] true_z <- sync_model$pl$TRUE_H[,3] - ss_long <- sync_model$pl$SS[ss_idx] + if(sync_model$inp_synced$dat_tmb_sync$ss_data_what == "est"){ + ss_long <- sync_model$pl$SS[ss_idx] + } else { + ss_long <- sync_model$inp_synced$dat_tmb_sync$ss_data_vec + } toa_sync_long <- data.table::data.table(reshape2::melt(toa_sync)) colnames(toa_sync_long) <- c('ping_idx','hydro_idx', 'toa_sync') diff --git a/R/syncHelperFuncs.R b/R/syncHelperFuncs.R index 9ebaffa..3612ce1 100644 --- a/R/syncHelperFuncs.R +++ b/R/syncHelperFuncs.R @@ -44,16 +44,27 @@ buildToaListGross <- function(sync_dat, excl_self_detect){ sync_tag_i_idx <- hydro_i_idx # hydro_st_serial <- hydros[sync_tag == sync_tags[st], serial] self_detections <- detections[tag==sync_tags[i] & hydro_idx==hydro_i_idx] + # detects_on_others <- detections[tag==sync_tags[i] & hydro_idx!=hydro_i_idx] # hack added to support ref tags to serve as beacons without a hydro if(nrow(self_detections) == 0){ num_detects_on_hydro <- detections[tag==sync_tags[i], .N, by=hydro_idx] best_hydro_idx <- num_detects_on_hydro[which.max(num_detects_on_hydro$N), hydro_idx] - best_other_detections <- detections[tag==sync_tags[i] & hydro_idx==best_hydro_idx] - self_detections <- best_other_detections + if(length(best_hydro_idx) == 0){ + self_detections <- NA #ugly hack added to support situations where hydros are completely ignored - e.g. due to very crappy performance + } else { + best_other_detections <- detections[tag==sync_tags[i] & hydro_idx==best_hydro_idx] + self_detections <- best_other_detections + } } other_detections <- detections[tag==sync_tags[i] & hydro_idx!=hydro_i_idx] + + # if no other hydro detected the sync_tag it is useless... + if(nrow(other_detections) == 0){ + next + } + self_detections[, epo_roll:=epofrac] other_detections[, epo_roll:=epofrac] toa_i <- t(plyr::daply(.data=other_detections, .variables="hydro_idx", .fun=function(k){ diff --git a/R/syncPlotters.R b/R/syncPlotters.R index 8fa26e0..4344083 100644 --- a/R/syncPlotters.R +++ b/R/syncPlotters.R @@ -1,7 +1,7 @@ #' Plot residuals of sync_model to enable check of model #' #' @param sync_model Synchronization model obtained using `getSyncModel()` -#' @param by What to facet/group the plot by? Currently supports one of 'overall', 'sync_tag', 'hydro', 'quantiles' +#' @param by What to facet/group the plot by? Currently supports one of 'overall', 'sync_tag', 'hydro', 'quantiles', 'temporal', 'temporal_hydro', 'temporal_sync_tag' #' @export plotSyncModelResids <- function(sync_model, by='overall'){ eps_long <- sync_model$eps_long @@ -30,18 +30,53 @@ plotSyncModelResids <- function(sync_model, by='overall'){ out_quants <- quants[ abs(q10) > thres | abs(q90) > thres] p <- ggplot2::ggplot(data=out_quants) + geom_point(aes(x=hydro_idx, y=factor(sync_tag_idx), col=abs(q50), size=N)) p <- p + viridis::scale_color_viridis(option="magma") + labs(y = "sync tag idx") - + } else if(by %in% c('temporal', 'temporal_hydro', 'temporal_sync_tag')){ + T0 <- sync_model$inp_synced$inp_params$T0 + offset_idx <- sync_model$inp_synced$dat_tmb_sync$offset_idx + offset_levels <- sync_model$inp_synced$inp_params$offset_levels + # tops <- sync_model$pl$TOP + sync_model$inp_synced$inp_params$T0 + tops <- as.POSIXct(sync_model$pl$TOP + offset_levels[offset_idx, 1], origin="1970-01-01", tz="UTC") + eps_long[, top := tops[ping]] + if(by == 'temporal_hydro'){ + p <- ggplot2::ggplot(data=eps_long) + ggtitle("by hydro") + geom_point(aes(x=top, y=E_m), pch=".") + geom_hline(data=eps_long[, .(mean_E_m=mean(E_m)), by=hydro_idx], aes(yintercept=mean_E_m), col="red") + facet_wrap(~hydro_idx) + } else if(by == 'temporal_sync_tag'){ + p <- ggplot2::ggplot(data=eps_long) + ggtitle("by sync tag") + geom_point(aes(x=top, y=E_m), pch=".") + geom_hline(data=eps_long[, .(mean_E_m=mean(E_m)), by=sync_tag_idx], aes(yintercept=mean_E_m), col="red") + facet_wrap(~sync_tag_idx) + } else if(by == 'temporal'){ + p <- ggplot2::ggplot(data=eps_long) + ggtitle("by hydro (col) X sync tag (row)") + geom_point(aes(x=top, y=E_m), pch=".") + geom_hline(data=eps_long[, .(mean_E_m=mean(E_m)), by=c('hydro_idx', 'sync_tag_idx')], aes(yintercept=mean_E_m), col="red") + facet_grid(sync_tag_idx~hydro_idx) + } } suppressWarnings(print(p)) } -checkSyncModelResidQuantiles <- function(sync_model){ - eps_long <- sync_model$eps_long +#' Plot hydrophone positions. Especially useful if some hydro re-positioned as part of the sync model. +#' @param sync_model Synchronization model obtained using `getSyncModel()` +#' @param by What to facet/group the plot by? Currently supports one of 'sync_bin_sync', 'sync_bin_hydro', 'sync_bin_sync_smooth', 'sync_bin_hydro_smooth', 'hydro', 'sync_tag' +#' @export +plotSyncModelHydros <- function(sync_model){ + h_pos <- data.table::data.table(sync_model$inp_synced$dat_tmb_sync$H) + colnames(h_pos) <- c('x','y','z') + h_pos[, idx := 1:.N] + h_pos[, x := x + sync_model$inp_synced$inp_params$Hx0] + h_pos[, y := y + sync_model$inp_synced$inp_params$Hy0] + + h_pos[, x_synced := sync_model$pl$TRUE_H[,1]] + h_pos[, y_synced := sync_model$pl$TRUE_H[,2]] + h_pos[, z_synced := sync_model$pl$TRUE_H[,3]] + cols <- ifelse(sync_model$inp_synced$dat_tmb_sync$fixed_hydros_vec == 1, "steelblue", "tomato") + p1 <- ggplot(h_pos) + geom_point(aes(x=x,y=y), size=3) + geom_point(aes(x=x_synced, y=y_synced), col=cols) + ggrepel::geom_text_repel(aes(x=x_synced, y=y_synced, label=idx)) + coord_fixed(ratio=1) + p2 <- ggplot(h_pos) + geom_point(aes(x=idx, y=x-x), size=3) + geom_point(aes(x=idx, y=x-x_synced), col=cols) + ylab("x-x_synced") + p3 <- ggplot(h_pos) + geom_point(aes(x=idx, y=y-y), size=3) + geom_point(aes(x=idx, y=y-y_synced), col=cols) + ylab("y-y_synced") + + return(cowplot::plot_grid(p1, p2, p3, ncol=1, rel_heights=c(5,1,1))) + } + + + #' Plot to check how well the sync model is working #' #' Delta values indicate absolute difference between true and estimated distances based on pairwise relative distances to sync_tag. diff --git a/R/zzz.R b/R/zzz.R index 27ed2e2..6dfc7e6 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -4,7 +4,7 @@ #' @importFrom graphics par plot matplot abline points lines #' @importFrom grDevices dev.off pdf -#' @importFrom ggplot2 aes facet_grid facet_wrap labs scale_fill_gradientn scale_y_log10 theme xlab ylab +#' @importFrom ggplot2 aes facet_grid facet_wrap ggtitle labs scale_fill_gradientn scale_y_log10 theme xlab ylab ylim xlim #' @importFrom ggplot2 geom_bar geom_boxplot geom_histogram geom_hline geom_line geom_point geom_ribbon geom_smooth geom_tile geom_violin geom_vline #' @importFrom stats power rnorm median quantile #' @importFrom utils tail @@ -18,13 +18,14 @@ NULL # Prevent R CMD check from complaining about the use of standard data.table variables and variables inside data.table calls if (getRversion() >= "2.15.1"){ utils::globalVariables(c(".")) - utils::globalVariables(c("bi", "cum_bi", "delta", "delta_eposync", "delta_ping", "dist_to_sync_tag")) + utils::globalVariables(c("bi", "coord_fixed", "cum_bi", "delta", "delta_eposync", "delta_ping", "dist_to_sync_tag")) utils::globalVariables(c("E", "E_m", "epo", "epofrac", "epofrac_lin_corr", "epo_roll", "eposync")) - utils::globalVariables(c("focal_hydro_idx", "frac", "hx", "hy", "hydro_idx", "id", "idx", "idx med_delta", "in_seq", "lin_corr_coeffs_offset", "lin_corr_coeffs_slope", "med_delta")) - utils::globalVariables(c("N", "next_ping_too_late", "OFFSET", "offset_idx", "offset_level", "ping2next", "ping_idx", "q10", "q50")) + utils::globalVariables(c("focal_hydro_idx", "frac", "h_idx", "hx", "hy", "hydro_idx", "id", "idx", "idx med_delta", "in_seq", "lin_corr_coeffs_offset", "lin_corr_coeffs_slope", "med_delta", "mean_E_m")) + utils::globalVariables(c("N", "next_ping_too_late", "OFFSET", "offset_idx", "offset_level", "ping", "ping2next", "ping_idx", "q10", "q50")) utils::globalVariables(c("q90", "roll_eposync", "roll_seq_epo")) utils::globalVariables(c("seq_epo", "seq_lng", "seq_ping_idx", "serial", "slope1","SLOPE1", "SLOPE2", "slope2", "ss", "sync_tag", "sync_tag_idx")) - utils::globalVariables(c("tag", "toa_idx", "ts", "x", "y")) + utils::globalVariables(c("tag", "toa_idx", "top", "ts", "x", "x_synced", "y", "y_synced")) + utils::globalVariables(c("value", "Var1", "Var2")) } diff --git a/README.Rmd b/README.Rmd index 8a2b7d1..7a0fbd9 100644 --- a/README.Rmd +++ b/README.Rmd @@ -182,7 +182,11 @@ points(hy~hx, data=hydros, col="green", pch=20, cex=3) lines(pl$Y~pl$X, col="red") ``` -# Papers using YAPS +# Papers using or relating to YAPS +## 2020 + +* Vergeynst, J., Vanwyck, T., Baeyens, R. et al. (2020). Acoustic positioning in a reflective environment: going beyond point-by-point algorithms. Anim Biotelemetry 8, 16. https://doi.org/10.1186/s40317-020-00203-1 + ## 2019 * Baktoft, H., Gjelland, K.Ø., Økland, F., Rehage, J.S., Rodemann, J.R., Corujo, R.S., Viadero, N., Thygesen, U.H. (2019). Opening the black box of fish tracking using acoustic telemetry diff --git a/README.html b/README.html deleted file mode 100644 index 81c0f6b..0000000 --- a/README.html +++ /dev/null @@ -1,754 +0,0 @@ - - - - -
- - - - - - - - - - - - - - - -Welcome to the yaps
repository. The yaps
package is based on the original YAPS presented in Baktoft, Gjelland, Økland & Thygesen (2017): Positioning of aquatic animals based on time-of-arrival and random walk models using YAPS (Yet Another Positioning Solver)
To use yaps
on own data, you need to compile a TOA-matrix based on synchronized hydrophone data and replace the hydros dataframe with actual hydrophone positions. A complete step-by-step guide on how to do this, can be found in our pre-print paper Opening the black box of fish tracking using acoustic telemetry. The example in this guide is based on data collected using a 69 kHz PPM-based system (Vemco VR2). We are working towards adding examples based on data collected using other manufacturers.
We are working towards true live tracking for aquatic animals. Check out our prototype of yaps-live (or click the screenshot below). The track estimation is done on-the-fly using yaps
, but the live-stream of detection data is at the moment computer generated from manually downloaded data.
The yaps
package requires devtools and TMB. Please see instructions on TMB installation. If working on Windows, you might also need to install Rtools as specified in the TMB documentation.
yaps
obeys the fundamental rule of “garbage in, garbage out”. Therefore, DO NOT expect yaps
to salvage a poorly designed study, nor to turn crappy data into gold.
-We have attempted to make both synchronization process and track estimation user-friendly. However, it is not trivial to synchronize hydrophones (let alone automating the process) based on detections in a variable and often noisy environment. Hydrophones might be replaced/shifted and if not fixed securely, hydrophones might move/be moved during a study. Additionally, hydrophone performance and output format varies considerably among (and within) manufacturers. On top of that, hydrophones don’t always behave and perform as expected. For instance, some hydrophone models autonomously initiate reboots causing perturbation of varying magnitude and/or duration of the internal clock at apparently random time intervals. Therefore, the functions in yaps
might perform sub-optimal or even fail miserably when applied to new data. If/when this happens, please let us know through a direct message or leave a bug-report. Also note, the to-do list for improvements and tweaks is long and growing, so stay tuned for updates.
Make sure you have the newest version of yaps
installed. For this, you need devtools
installed - if not already installed, run install.packages('devtools')
.
-yaps
relies heavily on use of Template Model Builder TMB for fitting the models, so make sure TMB
is installed and working by following the simple TMB instructions.
-Then install the latest version of yaps
with:
install.packages("devtools")
- install.packages("TMB")
- TMB::runExample(all=TRUE)
- devtools::install_github("baktoft/yaps")
The code below is identical to the example workflow presented in Opening the black box of fish tracking using acoustic telemetry. See the pre-print for further explantion.
-library(yaps)
-
-# set sync parameters
-max_epo_diff <- 120
-min_hydros <- 2
-time_keeper_idx <- 5
-fixed_hydros_idx <- c(2:3, 6, 8, 11, 13:17)
-n_offset_day <- 2
-n_ss_day <- 2
-
-# get input data ready for getSyncModel()
-inp_sync <- getInpSync(sync_dat=ssu1, max_epo_diff, min_hydros, time_keeper_idx,
- fixed_hydros_idx, n_offset_day, n_ss_day)
-
-# fit the sync model
-sync_model <- getSyncModel(inp_sync, silent=TRUE)
-
-# Plot model residuals and model check plots to ensure the synchronization process was successful...
-plotSyncModelResids(sync_model, by='overall')
-plotSyncModelResids(sync_model, by='sync_tag')
-plotSyncModelResids(sync_model, by='hydro')
-
-plotSyncModelCheck(sync_model, by="sync_bin_sync")
-plotSyncModelCheck(sync_model, by="sync_bin_hydro")
-plotSyncModelCheck(sync_model, by="sync_tag")
-plotSyncModelCheck(sync_model, by="hydro")
-
-# Apply the synchronization model to all data
-detections_synced <- applySync(toa=ssu1$detections, hydros=ssu1$hydros, sync_model)
-
-# Prepare to estimate track using `yaps` on newly synchronized `ssu1` data
-hydros_yaps <- data.table::data.table(sync_model$pl$TRUE_H)
-colnames(hydros_yaps) <- c('hx','hy','hz')
-
-# Specify focal tag and tag specific min and max burst intervals
-focal_tag <- 15266
-rbi_min <- 20
-rbi_max <- 40
-
-# Extract relevant data from the synced data
-synced_dat_ssu1 <- detections_synced[tag == focal_tag]
-
-# Compile TOA-matrix to use for yaps
-toa_ssu1 <- getToaYaps(synced_dat_ssu1, hydros_yaps, rbi_min, rbi_max)
-
-# Compile all input data needed for yaps
-inp_ssu1 <- getInp(hydros_yaps, toa_ssu1, E_dist="Mixture", n_ss=2, pingType="rbi",
- sdInits=1, rbi_min=rbi_min, rbi_max=rbi_max, ss_data_what="est", ss_data=0)
-
-# Run yaps to obtain estimated track
-yaps_out_ssu1 <- runYaps(inp_ssu1, silent=TRUE)
-
-# plot the estimated track
-plotYaps(inp=inp_ssu1, yaps_out=yaps_out_ssu1, type="map")
-# Add gps track for direct comparison
-lines(utm_y~utm_x, data=ssu1$gps, lty=2)
-
-par(mfrow=c(2,1))
-plotYaps(inp=inp_ssu1, yaps_out=yaps_out_ssu1, type="coord_X")
-lines(utm_x~ts, data=ssu1$gps, lty=2)
-plotYaps(inp=inp_ssu1, yaps_out=yaps_out_ssu1, type="coord_Y")
-lines(utm_y~ts, data=ssu1$gps, lty=2)
devtools::install_github("baktoft/yaps")
-rm(list=ls())
-library(yaps)
-
-# Simulate true track of animal movement of n seconds
-trueTrack <- simTrueTrack(model='crw', n = 15000, deltaTime=1, shape=1, scale=0.5, addDielPattern=TRUE, ss='rw')
-
-# Simulate telemetry observations from true track.
-# Format and parameters depend on type of transmitter burst interval (BI) - stable (sbi) or random (rbi).
-pingType <- 'sbi'
-
-if(pingType == 'sbi') { # stable BI
- sbi_mean <- 30; sbi_sd <- 1e-4;
- teleTrack <- simTelemetryTrack(trueTrack, pingType=pingType, sbi_mean=sbi_mean, sbi_sd=sbi_sd)
-} else if(pingType == 'rbi'){ # random BI
- pingType <- 'rbi'; rbi_min <- 20; rbi_max <- 40;
- teleTrack <- simTelemetryTrack(trueTrack, pingType=pingType, rbi_min=rbi_min, rbi_max=rbi_max)
-}
-
-# Simulate hydrophone array
-hydros <- simHydros(auto=TRUE, trueTrack=trueTrack)
-toa_list <- simToa(teleTrack, hydros, pingType, sigmaToa=1e-4, pNA=0.25, pMP=0.01)
-toa <- toa_list$toa
-
-# Specify whether to use ss_data from measured water temperature (ss_data_what <- 'data') or to estimate ss in the model (ss_data_what <- 'est')
-ss_data_what <- 'data'
-if(ss_data_what == 'data') {ss_data <- teleTrack$ss} else {ss_data <- 0}
-
-
-if(pingType == 'sbi'){
- inp <- getInp(hydros, toa, E_dist="Mixture", n_ss=10, pingType=pingType, sdInits=0, ss_data_what=ss_data_what, ss_data=ss_data)
-} else if(pingType == 'rbi'){
- inp <- getInp(hydros, toa, E_dist="Mixture", n_ss=10, pingType=pingType, sdInits=0, rbi_min=rbi_min, rbi_max=rbi_max, ss_data_what=ss_data_what, ss_data=ss_data)
-}
-str(inp)
-
-pl <- c()
-maxIter <- ifelse(pingType=="sbi", 500, 5000)
-outTmb <- runTmb(inp, maxIter=maxIter, getPlsd=TRUE, getRep=TRUE)
-str(outTmb)
-
-# Estimates in pl
-pl <- outTmb$pl
-# Correcting for hydrophone centering
-pl$X <- outTmb$pl$X + inp$inp_params$Hx0
-pl$Y <- outTmb$pl$Y + inp$inp_params$Hy0
-
-
-# Error estimates in plsd
-plsd <- outTmb$plsd
-
-# plot the resulting estimated track
-plot(y~x, data=trueTrack, type="l", xlim=range(hydros$hx), ylim=range(hydros$hy), asp=1)
-lines(y~x, data=teleTrack)
-points(hy~hx, data=hydros, col="green", pch=20, cex=3)
-lines(pl$Y~pl$X, col="red")
Baktoft, H., Gjelland, K.Ø., Økland, F., Rehage, J.S., Rodemann, J.R., Corujo, R.S., Viadero, N., Thygesen, U.H. (2019). Opening the black box of fish tracking using acoustic telemetry bioRxiv 2019.12.16.877688; doi: https://doi.org/10.1101/2019.12.16.877688
Silva, A.T., Bærum, K.M., Hedger, R.D., Baktoft, H., Fjeldstad, H., Gjelland, K.Ø., Økland, F. Forseth, T. (2019). Science of the Total Environment The effects of hydrodynamics on the three-dimensional downstream migratory movement of Atlantic salmon. Science of the Total Environment, 135773. https://doi.org/10.1016/j.scitotenv.2019.135773
Szabo-Meszaros, M., Forseth, T., Baktoft, H., Fjeldstad, H.-P., Silva, A.T., Gjelland, K.Ø., Økland, F., Uglem, I., Alfredsen, K. (2019). Modelling mitigation measures for smolt migration at dammed river sections. Ecohydrology, e2131. https://doi.org/10.1002/eco.2131