Skip to content

Commit

Permalink
Created get_rates_of_misleading_slrs()
Browse files Browse the repository at this point in the history
  • Loading branch information
stephaniereinders committed Dec 13, 2024
1 parent 7ee55cd commit 42a1ac8
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 0 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ export(compare_documents)
export(compare_writer_profiles)
export(get_cluster_fill_rates)
export(get_distances)
export(get_rates_of_misleading_slrs)
export(get_ref_scores)
export(interpret_slr)
export(plot_scores)
Expand Down
45 changes: 45 additions & 0 deletions R/slrs.R
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,51 @@ interpret_slr <- function(df) {
return(x)
}

#' Get Rates of Misleading Evidence for SLRs
#'
#' Calculate the rates of misleading evidence for score-based likelihood ratios
#' (SLRs) when the ground truth is known.
#'
#' @param df A data frame of SLRs from [`compare_writer_profiles`] with
#' `score_only = FALSE`.
#' @param threshold A number greater than zero that serves as a decision
#' threshold. If the ground truth for two documents is that they came from the
#' same writer and the SLR is less than the decision threshold, this is
#' misleading evidence that incorrectly supports the defense (false negative).
#' If the ground truth for two documents is that they came from different
#' writers and the SLR is greater than the decision threshold, this is
#' misleading evidence that incorrectly supports the prosecution (false
#' positive).
#'
#' @return A list
#' @export
#'
#' @examples
#' \donttest{
#' comparisons <- compare_writer_profiles(test, score_only = FALSE)
#' get_rates_of_misleading_slrs(comparisons)
#' }
#'
get_rates_of_misleading_slrs <- function(df, threshold = 1) {
# Use across to prevent "no visible binding for global variable"
known_matches <- df |>
dplyr::filter(dplyr::across(c("ground_truth")) == "same writer")
known_non_matches <- df |>
dplyr::filter(dplyr::across(c("ground_truth")) == "different writer")

# Use across to prevent "no visible binding for global variable"
fn <- known_matches |>
dplyr::filter(dplyr::across(c("slr")) < threshold)
fp <- known_non_matches |>
dplyr::filter(dplyr::across(c("slr")) > threshold)

defense <- nrow(fn) / nrow(known_matches)
prosecution <- nrow(fp) / nrow(known_non_matches)

return(list("incorrectly_favors_defense" = defense, "incorrectly_favors_prosecution" = prosecution))

}


# Internal Functions ------------------------------------------------------

Expand Down
35 changes: 35 additions & 0 deletions man/get_rates_of_misleading_slrs.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 42a1ac8

Please sign in to comment.