Skip to content

Commit

Permalink
allo#72: Prefer expert over generic equations.
Browse files Browse the repository at this point in the history
  • Loading branch information
maurolepore committed Mar 22, 2019
1 parent 9f0a803 commit 62838e6
Show file tree
Hide file tree
Showing 8 changed files with 112 additions and 42 deletions.
47 changes: 24 additions & 23 deletions .buildignore/dbh-vs-biomass.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@ Plot dbh vs. biomass by species
``` r
# Setup
library(tidyverse)
#> -- Attaching packages ------------------------------------------------ tidyverse 1.2.1 --
#> -- Attaching packages --------------------------------------------- tidyverse 1.2.1 --
#> v ggplot2 3.1.0 v purrr 0.3.1
#> v tibble 2.0.1 v dplyr 0.8.0.1
#> v tidyr 0.8.3 v stringr 1.4.0
#> v readr 1.3.1 v forcats 0.4.0
#> Warning: package 'purrr' was built under R version 3.5.3
#> -- Conflicts --------------------------------------------------- tidyverse_conflicts() --
#> -- Conflicts ------------------------------------------------ tidyverse_conflicts() --
#> x dplyr::filter() masks stats::filter()
#> x dplyr::lag() masks stats::lag()
library(fgeo.biomass)
Expand Down Expand Up @@ -41,11 +41,12 @@ census_species <- census %>%

census_equations <- allo_find(census_species)
#> Assuming `dbh` in [mm] (required to find dbh-specific equations).
#> * Searching equations according to site and species.
#> Warning: Can't find equations matching these species:
#> acer negundo, acer platanoides, acer sp, ailanthus altissima, asimina triloba, berberis thunbergii, carpinus caroliniana, carya sp, castanea dentata, celtis occidentalis, cercis canadensis, chionanthus virginicus, corylus americana, crataegus pruinosa, crataegus sp, diospyros virginiana, elaeagnus umbellata, fraxinus sp, ilex verticillata, juglans cinerea, juglans nigra, juniperus virginiana, lindera benzoin, paulownia tomentosa, quercus prinus, quercus sp, sambucus canadensis, ulmus sp, unidentified unk
#> * Matching equations by site and species.
#> * Refining equations according to dbh.
#> Warning: Can't find equations for 21204 rows (inserting `NA`).
#> * Using generic equations where expert equations can't be found.
#> Warning: Can't find equations matching these species:
#> acer sp, carya sp, crataegus sp, fraxinus sp, juniperus virginiana, quercus prinus, quercus sp, ulmus sp, unidentified unk
#> Warning: Can't find equations for 17132 rows (inserting `NA`).
```

Notice the warning that equations couldn’t be found. This is in part
Expand All @@ -62,7 +63,7 @@ drop them.
census_equations %>%
filter(is.na(equation_id)) %>%
select(rowid, site, sp, equation_id)
#> # A tibble: 21,204 x 4
#> # A tibble: 17,132 x 4
#> rowid site sp equation_id
#> <int> <chr> <chr> <chr>
#> 1 1 scbi lindera benzoin <NA>
Expand All @@ -71,11 +72,11 @@ census_equations %>%
#> 4 4 scbi nyssa sylvatica <NA>
#> 5 5 scbi hamamelis virginiana <NA>
#> 6 7 scbi unidentified unk <NA>
#> 7 8 scbi lindera benzoin <NA>
#> 8 9 scbi viburnum prunifolium <NA>
#> 9 10 scbi asimina triloba <NA>
#> 10 11 scbi asimina triloba <NA>
#> # ... with 21,194 more rows
#> 7 9 scbi viburnum prunifolium <NA>
#> 8 10 scbi asimina triloba <NA>
#> 9 11 scbi asimina triloba <NA>
#> 10 12 scbi asimina triloba <NA>
#> # ... with 17,122 more rows

# Dropping useless rows to continue
census_equations2 <- census_equations %>%
Expand All @@ -93,20 +94,20 @@ biomass <- allo_evaluate(census_equations2)
#> object 'dba' not found
#> Warning: `biomass` may be invalid. This is still work in progress.
biomass
#> # A tibble: 9,977 x 2
#> # A tibble: 14,049 x 2
#> rowid biomass
#> <int> <dbl>
#> 1 6 NA
#> 2 21 231.
#> 3 23 NA
#> 4 29 469.
#> 5 38 4.96
#> 6 69 NA
#> 7 72 349.
#> 8 81 NA
#> 9 84 126.
#> 10 88 NA
#> # ... with 9,967 more rows
#> 2 8 5.69
#> 3 17 11.3
#> 4 21 231.
#> 5 22 10.3
#> 6 23 NA
#> 7 26 4.15
#> 8 29 469.
#> 9 34 3.44
#> 10 38 4.96
#> # ... with 14,039 more rows
```

We now learn that some equations couldn’t be evaluated. The problem now
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
34 changes: 20 additions & 14 deletions R/allo_find.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,22 @@ allo_find_impl <- function(data, custom_eqn) {
suppressMessages(fgeo.biomass::default_eqn(allodb::master_tidy()))
abort_if_not_eqn(eqn)

fixme_exclud_generic_equaitons <- function(data) {
filter(data, !.data$is_generic)
}
eqn <- fixme_exclud_generic_equaitons(eqn)

inform("* Searching equations according to site and species.")
inform("* Matching equations by site and species.")
.by <- c("sp", "site")
dbh_all <- dplyr::left_join(data, eqn, by = .by)
warn_if_species_missmatch(data, eqn)
matched <- dplyr::left_join(data, eqn, by = .by)

inform("* Refining equations according to dbh.")
dbh_all$dbh_in_range <- is_in_range(
dbh_all$dbh, min = dbh_all$dbh_min_mm, max = dbh_all$dbh_max_mm
matched$dbh_in_range <- is_in_range(
matched$dbh, min = matched$dbh_min_mm, max = matched$dbh_max_mm
)
in_range <- filter(dbh_all, .data$dbh_in_range)
out <- suppressMessages(dplyr::left_join(data, in_range))
out$dbh_in_range <- NULL
in_range <- filter(matched, .data$dbh_in_range)
refined <- suppressMessages(dplyr::left_join(data, in_range))
refined$dbh_in_range <- NULL

inform("* Using generic equations where expert equations can't be found.")
out <- prefer_expert_equaitons(refined)

warn_if_species_missmatch(out, eqn)
warn_if_missing_equations(out)

out
Expand Down Expand Up @@ -103,7 +102,7 @@ warn_if_species_missmatch <- function(data, eqn) {
warn(glue("
Can't find equations matching these species:
{missmatching}
"))
"))
}

invisible(data)
Expand All @@ -119,3 +118,10 @@ warn_if_missing_equations <- function(data) {

invisible(data)
}

prefer_expert_equaitons <- function(data) {
data %>%
group_by(.data$rowid) %>%
filter(replace_na(prefer_false(.data$is_generic), TRUE)) %>%
ungroup()
}
14 changes: 14 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
prefer_false <- function(x) {
stopifnot(is.logical(x))
if (all(x[!is.na(x)])) {
x
} else {
!x
}
}

replace_na <- function(x, replacement) {
x[is.na(x)] <- replacement
x
}

is_in_range <- function(x, min, max) {
x >= min & x <= max
}
Expand Down
19 changes: 14 additions & 5 deletions tests/testthat/test-allo_find.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,26 @@ context("allo_find")
library(dplyr)
set.seed(1)

test_that("allo_find excludes generic equations (until we support them #28)", {
test_that("allo_find prefers expert over generic equations (allo#72)", {
cns_sp <- fgeo.biomass::scbi_tree1 %>%
dplyr::sample_n(30) %>%
dplyr::sample_n(1000) %>%
add_species(fgeo.biomass::scbi_species, "scbi")

excludes_generic_equations <- any(
some_generic_equations <- any(
as.vector(na.omit(
suppressWarnings(allo_find(cns_sp))$is_generic
))
)
expect_false(excludes_generic_equations)
expect_true(some_generic_equations)

out <- allo_find(cns_sp)
pref <- out %>%
group_by(rowid) %>%
filter(
replace_na(prefer_false(is_generic), TRUE)
) %>%
ungroup()
expect_equal(out, pref)
})

test_that("allo_find does not warn if dbh in [mm]", {
Expand Down Expand Up @@ -44,7 +53,7 @@ test_that("allo_find warns non matching species", {

expect_warning(
allo_find(census_species),
"Can't find equations matching these species"
"Can't find.*equations matching these species"
)
})

Expand Down
40 changes: 40 additions & 0 deletions tests/testthat/test-utils.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,43 @@
context("prefer_false")

expect_equal(prefer_false(c(T, NA)), c(T, NA))
expect_equal(prefer_false(c(F, NA)), c(T, NA))
expect_equal(prefer_false(c(F, NA, T)), c(T, NA, F))

expect_equal(
replace_na(
prefer_false(c(F, NA, T)),
TRUE
),
c(T, T, F)
)

expect_equal(prefer_false(c(T)), c(T))
expect_equal(prefer_false(c(F)), c(T))
expect_equal(prefer_false(c(F, T)), c(T, F))
expect_equal(prefer_false(c(T, T)), c(T, T))
expect_equal(prefer_false(c(T, F, F)), c(F, T, T))

dfm <- tibble::tribble(
~id, ~lgl,
1, TRUE,
1, FALSE,
2, FALSE,
3, TRUE,
)

# Ungrouped
out <- filter(dfm, prefer_false(lgl))
expect_equal(out$id, c(1, 2))
expect_equal(out$lgl, c(FALSE, FALSE))

# Grouped
out <- filter(group_by(dfm, id), prefer_false(lgl))
expect_equal(out$id, c(1, 2, 3))
expect_equal(out$lgl, c(FALSE, FALSE, TRUE))



context("is_in_range")

test_that("is_in_range returns true if in range, else returns false", {
Expand Down

0 comments on commit 62838e6

Please sign in to comment.