From b6da9b879b4932e111a86c6e8e4ae4b8bd728193 Mon Sep 17 00:00:00 2001 From: Garrick Aden-Buie Date: Mon, 31 Jan 2022 10:00:10 -0500 Subject: [PATCH 1/2] Fix example tutorials --- .../ex-data-basics/ex-data-basics.Rmd | 60 ++-- .../ex-data-filter/ex-data-filter.Rmd | 85 +++--- .../ex-data-mutate/ex-data-mutate.Rmd | 99 ++++--- .../ex-data-manip-summarise.Rmd | 279 +++++++++--------- 4 files changed, 271 insertions(+), 252 deletions(-) diff --git a/inst/tutorials/ex-data-basics/ex-data-basics.Rmd b/inst/tutorials/ex-data-basics/ex-data-basics.Rmd index 86f260808..721ccac19 100755 --- a/inst/tutorials/ex-data-basics/ex-data-basics.Rmd +++ b/inst/tutorials/ex-data-basics/ex-data-basics.Rmd @@ -1,6 +1,6 @@ --- title: "Data basics" -output: +output: learnr::tutorial: progressive: true allow_skip: true @@ -20,25 +20,25 @@ tutorial_options(exercise.timelimit = 60) ## Welcome -In this tutorial, you will learn how to use R to inspect the contents of a data frame or tibble. Data frames and tibbles are R's structures for storing tabular data; if you inherit a tabular dataset in R, it will almost certainly come as one of these structures. +In this tutorial, you will learn how to use R to inspect the contents of a data frame or tibble. Data frames and tibbles are R's structures for storing tabular data; if you inherit a tabular dataset in R, it will almost certainly come as one of these structures. Here, you will learn how to do three things with data frames and tibbles: 1. Look at the contents of a data frame or tibble 2. Open a help page that describes a data frame or tibble -3. Identify the variables and their types in a tibble +3. Identify the variables and their types in a tibble -You will also meet the `mpg` and `flights` datasets. These datasets appear frequently in R examples. +You will also meet the `mpg` and `flights` datasets. These datasets appear frequently in R examples. -The readings in this tutorial follow [_R for Data Science_](http://r4ds.had.co.nz/), sections 3.2 and 5.1. +The readings in this tutorial follow [_R for Data Science_](http://r4ds.had.co.nz/), sections 3.2 and 5.1. ## Data frames ### What is a data frame? -A __data frame__ is a rectangular collection of values, usually organized so that variables appear in the columns and observations appear in rows. +A __data frame__ is a rectangular collection of values, usually organized so that variables appear in the columns and observations appear in rows. -Here is an example: the `mpg` data frame contains observations collected by the US Environmental Protection Agency on 38 models of cars. To see the `mpg` data frame, type `mpg` in the code chunk below and then click "Submit Answer." +Here is an example: the `mpg` data frame contains observations collected by the US Environmental Protection Agency on 38 models of cars. To see the `mpg` data frame, type `mpg` in the code chunk below and then click "Run Code". ```{r mpg-setup} mpg <- as.data.frame(mpg) @@ -49,13 +49,9 @@ mpg <- as.data.frame(mpg) ```
-**Hint:** Type `mpg` and then click the Submit Answer button. +**Hint:** Type `mpg` and then click the Run Code button.
-```{r mpg-check, echo = FALSE} -# checking code -``` - ### A note about mpg The code above worked because I've already loaded the ggplot2 package for you in this tutorial: `mpg` comes in the ggplot2 package. If you would like to look at `mpg` on your own computer, you will need to first load ggplot2. You can do that in two steps: @@ -63,9 +59,9 @@ The code above worked because I've already loaded the ggplot2 package for you in 1. Run `install.packages('ggplot2')` to install ggplot2 if you do not yet have it. 2. Load ggplot2 with the `library(ggplot2)` command -After that, you will be able to access any object in ggplot2—including `mpg`—until you close R. +After that, you will be able to access any object in ggplot2—including `mpg`—until you close R. -### +### Did you notice how much information was inside `mpg`? Me too. Sometimes the contents of a data frame do not fit on a single screen, which makes them difficult to inspect. We'll look at an alternative to using and examining data frames soon. But first let's get some help... @@ -73,23 +69,19 @@ Did you notice how much information was inside `mpg`? Me too. Sometimes the cont ### How to open a help page -You can learn more about `mpg` by opening its help page. The help page will explain where the `mpg`dataset comes from and what each variable in `mpg` describes. To open the help page, type `?mpg` in the code chunk below and then click "Submit Answer". +You can learn more about `mpg` by opening its help page. The help page will explain where the `mpg`dataset comes from and what each variable in `mpg` describes. To open the help page, type `?mpg` in the code chunk below and then click "Run Code". ```{r help, exercise = TRUE} ```
-**Hint:** Type `?mpg` and then click the Submit Answer button. +**Hint:** Type `?mpg` and then click the Run Code button.
-```{r help-check, echo = FALSE} -# checking code -``` - ### ? syntax -You can open a help page for any object that comes with R or with an R package. To open the help page, type a `?` before the object's name and then run the command, as you did with `?mpg`. This technique works for functions, packages, and more. +You can open a help page for any object that comes with R or with an R package. To open the help page, type a `?` before the object's name and then run the command, as you did with `?mpg`. This technique works for functions, packages, and more. Notice that objects created by you or your colleagues will not have a help page (unless you make one). @@ -104,26 +96,26 @@ Use the code chunk below to answer the following questions. ```{r quiz1, echo = FALSE} quiz(caption = "Quiz", question("What does the `drv` variable of `mpg` describe? Read the help for `?mpg` to find out.", - answer("Whether or not the vehicle has driver side airbags"), + answer("Whether or not the vehicle has driver side airbags"), answer("Whether a car is automatic or manual transmission"), answer("The number of cylinders in the car's engine"), - answer("Something else", correct = TRUE, message = "`drv` describes the type of drivetrain in a car: front wheel drive, rear wheel drive, or four wheel drive."), + answer("Something else", correct = TRUE, message = "`drv` describes the type of drivetrain in a car: front wheel drive, rear wheel drive, or four wheel drive."), allow_retry = TRUE ), question("How many rows are in the data frame named `cars`?", - answer("2"), + answer("2"), answer("25"), answer("50", correct = TRUE), answer("100"), - incorrect = "Incorrect.\nHint: R numbers the rows of a data frame when it displays the contents of a data frame. As a result, you can spot the number of rows in `cars` by examining `cars` in the code block above.", + incorrect = "Incorrect.\nHint: R numbers the rows of a data frame when it displays the contents of a data frame. As a result, you can spot the number of rows in `cars` by examining `cars` in the code block above.", allow_retry = TRUE ), question("How many columns are in the data frame named `cars`?", - answer("1"), + answer("1"), answer("2", correct = TRUE), answer("4"), answer("more than four"), - incorrect = "Incorrect.\nHint: If you inspect the contents of `cars` in the code block above, it should be pretty easy to count the number of columns.", + incorrect = "Incorrect.\nHint: If you inspect the contents of `cars` in the code block above, it should be pretty easy to count the number of columns.", allow_retry = TRUE ) ) @@ -133,7 +125,7 @@ quiz(caption = "Quiz", ### What is a tibble? -The `flights` data frame in the nycflights13 package is an example of a _tibble_. Tibbles are a data frames with some extra properties. +The `flights` data frame in the nycflights13 package is an example of a _tibble_. Tibbles are a data frames with some extra properties. To see what I mean, use the code chunk below to print the contents of `flights`. @@ -142,10 +134,10 @@ To see what I mean, use the code chunk below to print the contents of `flights`. ```
-**Hint:** Type the name of the data frame that you want to print and then click the Submit Answer button. I've already loaded the nycflight13 package for you. +**Hint:** Type the name of the data frame that you want to print and then click the Run Code button. I've already loaded the nycflight13 package for you.
-### +### Good Job. `flights` describes every flight that departed from New York City in 2013. The data comes from the [US Bureau of Transportation Statistics](http://www.transtats.bts.gov/DatabaseInfo.asp?DB_ID=120&Link=0), and is documented in `?flights`. @@ -156,17 +148,17 @@ You might notice that `flights` looks a little differently than `mpg`. `flights` `flights` prints differently because it's a __tibble__. Tibbles are data frames that are slightly tweaked to be more user-friendly. For example, R doesn't try to show you all of a tibble at once (but it will try to show you all of a data frame that is not a tibble). -You can use `as_tibble()` to return a tibble version of any data frame. For example, this would return a tibble version of `mpg`: `as_tibble(mpg)`. +You can use `as_tibble()` to return a tibble version of any data frame. For example, this would return a tibble version of `mpg`: `as_tibble(mpg)`. ## Data types ### Type codes - + ```{r flights3, echo = FALSE} flights -``` - +``` + Did you notice that a row of three (or four) letter abbreviations appears under the column names of `flights`? These abbreviations describe the _type_ of data that is stored in each column of `flights`: * `int` stands for integers. diff --git a/inst/tutorials/ex-data-filter/ex-data-filter.Rmd b/inst/tutorials/ex-data-filter/ex-data-filter.Rmd index 6e42856dc..1f54dd850 100755 --- a/inst/tutorials/ex-data-filter/ex-data-filter.Rmd +++ b/inst/tutorials/ex-data-filter/ex-data-filter.Rmd @@ -1,6 +1,6 @@ --- title: "Filter observations" -output: +output: learnr::tutorial: progressive: true allow_skip: true @@ -17,7 +17,18 @@ library(tidyverse) library(nycflights13) library(Lahman) -tutorial_options(exercise.timelimit = 60) +tutorial_options( + exercise.timelimit = 60, + # A simple checker function that just returns the message in the check chunk + exercise.checker = function(check_code, ...) { + learnr::feedback( + message = eval(parse(text = check_code)), + correct = logical(0), + type = "info", + location = "append" + ) + } +) knitr::opts_chunk$set(error = TRUE) ``` @@ -25,7 +36,7 @@ knitr::opts_chunk$set(error = TRUE) This is a demo tutorial. Compare it to the [source code](https://github.com/rstudio/learnr/tree/main/inst/tutorials/ex-data-filter/ex-data-filter.Rmd) that made it. -### +### In this tutorial, you will learn how to: @@ -34,13 +45,13 @@ In this tutorial, you will learn how to: * combine logical tests with Boolean operators * handle missing values within logical tests -The readings in this tutorial follow [_R for Data Science_](http://r4ds.had.co.nz/), section 5.2. +The readings in this tutorial follow [_R for Data Science_](http://r4ds.had.co.nz/), section 5.2. ### Prerequisites To practice these skills, we will use the `flights` data set from the nycflights13 package. This data frame comes from the US [Bureau of Transportation Statistics](http://www.transtats.bts.gov/DatabaseInfo.asp?DB_ID=120&Link=0) and contains all `r format(nrow(nycflights13::flights), big.mark = ",")` flights that departed from New York City in 2013. It is documented in `?flights`. -We will also use the ggplot2 package to visualize the data. +We will also use the ggplot2 package to visualize the data. If you are ready to begin, click on! @@ -59,9 +70,9 @@ filter(flights, month == 1, day == 1) ### output -Like all dplyr functions, `filter()` returns a new data frame for you to save or use. It doesn't overwrite the old data frame. +Like all dplyr functions, `filter()` returns a new data frame for you to save or use. It doesn't overwrite the old data frame. -If you want to save the output of `filter()`, you'll need to use the assignment operator, `<-`. +If you want to save the output of `filter()`, you'll need to use the assignment operator, `<-`. Rerun the command in the code chunk below, but first arrange to save the output to an object named `jan1`. @@ -73,7 +84,7 @@ filter(flights, month == 1, day == 1) jan1 <- filter(flights, month == 1, day == 1) ``` -### +### Good job! You can now see the results by running the name jan1 by itself. Or you can pass `jan1` to a function that takes data frames as input. @@ -89,7 +100,7 @@ R provides a suite of comparison operators that you can use to compare values: ` pi > 3 ``` -### +### When you place a logical test inside of `filter()`, filter applies the test to each row in the data frame and then returns the rows that pass, as a new data frame. @@ -105,13 +116,13 @@ filter(flights, month = 1) ### Multiple tests -If you give `filter()` more than one logical test, `filter()` will combine the tests with an implied "and." In other words, `filter()` will return only the rows that return `TRUE` for every test. You can combine tests in other ways with Boolean operators... +If you give `filter()` more than one logical test, `filter()` will combine the tests with an implied "and." In other words, `filter()` will return only the rows that return `TRUE` for every test. You can combine tests in other ways with Boolean operators... ## Boolean operators ### &, |, and ! -R uses boolean operators to combine multiple logical comparisons into a single logical test. These include `&` (_and_), `|` (_or_), `!` (_not_ or _negation_), and `xor()` (_exactly or_). +R uses boolean operators to combine multiple logical comparisons into a single logical test. These include `&` (_and_), `|` (_or_), `!` (_not_ or _negation_), and `xor()` (_exactly or_). Both `|` and `xor()` will return TRUE if one or the other logical comparison returns TRUE. `xor()` differs from `|` in that it will return FALSE if both logical comparisons return TRUE. The name _xor_ stands for _exactly or_. @@ -139,7 +150,7 @@ In R, the order of operations doesn't work like English. You can't write `filter Here are four more tips to help you use logical tests and Boolean operators in R: -### +### 1. A useful short-hand for this problem is `x %in% y`. This will select every row where `x` is one of the values in `y`. We could use it to rewrite the code in the question above: @@ -147,7 +158,7 @@ Here are four more tips to help you use logical tests and Boolean operators in R nov_dec <- filter(flights, month %in% c(11, 12)) ``` -### +### 2. Sometimes you can simplify complicated subsetting by remembering De Morgan's law: `!(x & y)` is the same as `!x | !y`, and `!(x | y)` is the same as `!x & !y`. For example, if you wanted to find flights that weren't delayed (on arrival or departure) by more than two hours, you could use either of the following two filters: @@ -156,11 +167,11 @@ Here are four more tips to help you use logical tests and Boolean operators in R filter(flights, arr_delay <= 120, dep_delay <= 120) ``` -### +### 3. As well as `&` and `|`, R also has `&&` and `||`. Don't use them with `filter()`! You'll learn when you should use them later. -### +### 4. Whenever you start using complicated, multipart expressions in `filter()`, consider making them explicit variables instead. That makes it much easier to check your work. You'll learn how to create new variables shortly. @@ -229,7 +240,7 @@ Use the code chunks below to find all flights that 1. Had an arrival delay of two or more hours ```{r filterex1, exercise = TRUE} - + ``` ```{r filterex1-solution} filter(flights, arr_delay >= 120) # arr_delay is in minutes @@ -238,12 +249,12 @@ Use the code chunks below to find all flights that 1. Flew to Houston (`IAH` or `HOU`) ```{r filterex2, exercise = TRUE} - + ``` ```{r filterex2-solution} filter(flights, dest %in% c("IAH", "HOU")) ``` - +
**Hint:** This is a good case for the `%in%` operator.
@@ -251,25 +262,25 @@ Use the code chunks below to find all flights that 1. Were operated by United (`UA`), American (`AA`), or Delta (`DL`) ```{r filterex3, exercise = TRUE} - + ``` ```{r filterex3-solution} filter(flights, carrier %in% c("UA", "AA", "DL")) ``` - +
**Hint:** The `carrier` variable lists the airline that operated each flight. This is another good case for the `%in%` operator.
- + 1. Departed in summer (July, August, and September) - + ```{r filterex4, exercise = TRUE} - + ``` ```{r filterex4-solution} filter(flights, 6 < month, month < 10) ``` - +
**Hint:** When converted to numbers, July, August, and September become 7, 8, and 9.
@@ -277,20 +288,20 @@ Use the code chunks below to find all flights that 1. Arrived more than two hours late, but didn't leave late ```{r filterex5, exercise = TRUE} - + ``` ```{r filterex5-solution} filter(flights, arr_delay > 120, dep_delay <= 0) ``` - +
**Hint:** Remember that departure and arrival delays are recorded in _minutes_.
1. Were delayed more than an hour, but made up more than 30 minutes in flight - + ```{r filterex6, exercise = TRUE} - + ``` ```{r filterex6-solution} filter(flights, dep_delay > 60, (dep_delay - arr_delay) > 30) @@ -299,16 +310,16 @@ Use the code chunks below to find all flights that
**Hint:** The time a plane makes up is `dep_delay - arr_delay`.
- + 1. Departed between midnight and 6am (inclusive) ```{r filterex7, exercise = TRUE} - + ``` ```{r filterex7-solution} filter(flights, dep_time <= 600 | dep_time == 2400) ``` - +
**Hint:** Don't forget flights that left at exactly midnight (`2400`). This is a good case for an "or" operator.
@@ -326,18 +337,18 @@ Another useful dplyr filtering helper is `between()`. What does it do? Can you u How many flights have a missing `dep_time`? What other variables are missing? What might these rows represent? ```{r filterex9, exercise = TRUE} - + ``` ```{r filterex9-solution} filter(flights, is.na(dep_time)) ``` - +
**Hint:** This is a good case for `is.na()`.
```{r filterex9-check} -"Good Job! these look like they might be cancelled flights." +"Flights with a missing departure time are probably cancelled flights." ``` ### Exercise 4 @@ -345,9 +356,9 @@ filter(flights, is.na(dep_time)) Why is `NA ^ 0` not missing? Why is `NA | TRUE` not missing? Why is `FALSE & NA` not missing? Can you figure out the general rule? (`NA * 0` is a tricky counterexample!) - + ```{r filterex10, exercise = TRUE} - + ``` ```{r filterex10-hint-1} # any number with a zero exponent is equal to one @@ -355,7 +366,7 @@ NA ^ 0 ``` ```{r filterex10-hint-2} -# unknown value or true evaluates to true +# unknown value or true evaluates to true # (because if one operand of "or" is true, we can be sure the result is true) NA | TRUE ``` diff --git a/inst/tutorials/ex-data-mutate/ex-data-mutate.Rmd b/inst/tutorials/ex-data-mutate/ex-data-mutate.Rmd index 580d1319d..9c7141598 100755 --- a/inst/tutorials/ex-data-mutate/ex-data-mutate.Rmd +++ b/inst/tutorials/ex-data-mutate/ex-data-mutate.Rmd @@ -1,6 +1,6 @@ --- title: "Create new variables" -output: +output: learnr::tutorial: progressive: true allow_skip: true @@ -15,7 +15,18 @@ library(tidyverse) library(nycflights13) library(Lahman) -tutorial_options(exercise.timelimit = 60) +tutorial_options( + exercise.timelimit = 60, + # A simple checker function that just returns the message in the check chunk + exercise.checker = function(check_code, ...) { + learnr::feedback( + message = eval(parse(text = check_code)), + correct = logical(0), + type = "info", + location = "append" + ) + } +) knitr::opts_chunk$set(error = TRUE) ``` @@ -32,9 +43,9 @@ The readings in this tutorial follow [_R for Data Science_](http://r4ds.had.co.n To practice these skills, we will use the `flights` data set from the nycflights13 package, which you met in [Data Basics](../01-data-basics/01-data-basics.html). This data frame comes from the US [Bureau of Transportation Statistics](http://www.transtats.bts.gov/DatabaseInfo.asp?DB_ID=120&Link=0) and contains all `r format(nrow(nycflights13::flights), big.mark = ",")` flights that departed from New York City in 2013. It is documented in `?flights`. -To visualize the data, we will use the ggplot2 package that you met in [Data Visualization Basics](../02-data-vis-basics/02-data-vis-basics.html). +To visualize the data, we will use the ggplot2 package that you met in [Data Visualization Basics](../02-data-vis-basics/02-data-vis-basics.html). -I've preloaded the packages for this tutorial with +I've preloaded the packages for this tutorial with ```{r eval = FALSE} library(tidyverse) # loads dplyr, ggplot2, and others @@ -43,17 +54,17 @@ library(nycflights13) ## Add new variables with mutate() -A data set often contains information that you can use to compute new variables. `mutate()` helps you compute those variables. Since `mutate()` always adds new columns to the end of a dataset, we'll start by creating a narrow dataset which will let us see the new variables (If we added new variables to `flights`, the new columns would run off the side of your screen, which would make them hard to see). +A data set often contains information that you can use to compute new variables. `mutate()` helps you compute those variables. Since `mutate()` always adds new columns to the end of a dataset, we'll start by creating a narrow dataset which will let us see the new variables (If we added new variables to `flights`, the new columns would run off the side of your screen, which would make them hard to see). ### select() You can select a subset of variables by name with the `select()` function in dplyr. Run the code below to see the narrow data set that `select()` creates. ```{r select, exercise = TRUE, exercise.eval = FALSE} -flights_sml <- select(flights, - arr_delay, +flights_sml <- select(flights, + arr_delay, dep_delay, - distance, + distance, air_time ) ``` @@ -63,10 +74,10 @@ flights_sml <- select(flights, The code below creates two new variables with dplyr's `mutate()` function. `mutate()` returns a new data frame that contains the new variables appended to a copy of the original data set. Take a moment to imagine what this will look like, and then click "Run Code" to find out. ```{r mutate1-setup} -flights_sml <- select(flights, - arr_delay, +flights_sml <- select(flights, + arr_delay, dep_delay, - distance, + distance, air_time ) ``` @@ -81,10 +92,10 @@ mutate(flights_sml, Note that when you use `mutate()` you can create multiple variables at once, and you can even refer to variables that are created earlier in the call to create other variables later in the call: ```{r echo = FALSE} -flights_sml <- select(flights, - arr_delay, +flights_sml <- select(flights, + arr_delay, dep_delay, - distance, + distance, air_time ) ``` @@ -123,14 +134,14 @@ transmute(flights, ## Useful mutate functions -You can use any function inside of `mutate()` so long as the function is **vectorised**. A vectorised function takes a vector of values as input and returns a vector with the same number of values as output. +You can use any function inside of `mutate()` so long as the function is **vectorised**. A vectorised function takes a vector of values as input and returns a vector with the same number of values as output. -Over time, I've found that several families of vectorised functions are particularly useful with `mutate()`: +Over time, I've found that several families of vectorised functions are particularly useful with `mutate()`: * **Arithmetic operators**: `+`, `-`, `*`, `/`, `^`. These are all vectorised, using the so called "recycling rules". If one parameter is shorter than the other, it will automatically be repeated multiple times to create a vector of the same length. This is most useful when one of the arguments is a single number: `air_time / 60`, `hours * 60 + minute`, etc. - + * **Modular arithmetic**: `%/%` (integer division) and `%%` (remainder), where `x == y * (x %/% y) + (x %% y)`. Modular arithmetic is a handy tool because it allows you to break integers up into pieces. For example, in the flights dataset, you can compute `hour` and `minute` from `dep_time` with: - + ```{r} transmute(flights, dep_time, @@ -138,21 +149,21 @@ Over time, I've found that several families of vectorised functions are particul minute = dep_time %% 100 ) ``` - + * **Logs**: `log()`, `log2()`, `log10()`. Logarithms are an incredibly useful transformation for dealing with data that ranges across multiple orders of magnitude. They also convert multiplicative relationships to additive, a feature we'll come back to in modelling. - + All else being equal, I recommend using `log2()` because it's easy to interpret: a difference of 1 on the log scale corresponds to doubling on the original scale and a difference of -1 corresponds to halving. * **Offsets**: `lead()` and `lag()` allow you to refer to leading or lagging values. This allows you to compute running differences (e.g. `x - lag(x)`) or find when values change (`x != lag(x))`. They are most useful in conjunction with `group_by()`, which you'll learn about shortly. - + ```{r} (x <- 1:10) lag(x) lead(x) ``` - + * **Cumulative and rolling aggregates**: R provides functions for running sums, products, mins and maxes: `cumsum()`, `cumprod()`, `cummin()`, `cummax()`; and dplyr provides `cummean()` for cumulative means. If you need rolling aggregates (i.e. a sum computed over a rolling window), try the RcppRoll package. - + ```{r} x cumsum(x) @@ -161,18 +172,18 @@ Over time, I've found that several families of vectorised functions are particul * **Logical comparisons**, `<`, `<=`, `>`, `>=`, `!=`, which you learned about earlier. If you're doing a complex sequence of logical operations it's often a good idea to store the interim values in new variables so you can check that each step is working as expected. -* **Ranking**: there are a number of ranking functions, but you should start with `min_rank()`. It does the most usual type of ranking (e.g. 1st, 2nd, 2nd, 4th). The default gives smallest values the small ranks; use `desc(x)` to give the largest values the smallest ranks. - +* **Ranking**: there are a number of ranking functions, but you should start with `min_rank()`. It does the most usual type of ranking (e.g. 1st, 2nd, 2nd, 4th). The default gives smallest values the small ranks; use `desc(x)` to give the largest values the smallest ranks. + ```{r} y <- c(1, 2, 2, NA, 3, 4) min_rank(y) min_rank(desc(y)) ``` - + If `min_rank()` doesn't do what you need, look at the variants `row_number()`, `dense_rank()`, `percent_rank()`, `cume_dist()`, `ntile()`. See their help pages for more details. - + ```{r} row_number(y) dense_rank(y) @@ -200,13 +211,13 @@ ggplot(flights, aes(air_time - airtime2)) + geom_histogram() Currently `dep_time` and `sched_dep_time` are convenient to look at, but hard to compute with because they're not really continuous numbers. Convert them to a more convenient representation of number of minutes since midnight. ```{r mutateex1, exercise = TRUE} - + ``` ```{r mutateex1-solution} mutate(flights, dep_time = dep_time %/% 100 * 60 + dep_time %% 100, sched_dep_time = sched_dep_time %/% 100 * 60 + sched_dep_time %% 100) ``` - +
**Hint:** `423 %% 100` returns `23`, `423 %/% 100` returns `4`.
@@ -214,11 +225,11 @@ mutate(flights, dep_time = dep_time %/% 100 * 60 + dep_time %% 100, ```{r mutateex1-check} "Good Job!" ``` - + ### Exercise 2 Compare `air_time` with `arr_time - dep_time`. What do you expect to see? What do you see? How do you explain this? - + ```{r mutateex2, exercise = TRUE} # flights <- mutate(flights, total_time = _____________) # flight_times <- select(flights, air_time, total_time) @@ -233,47 +244,47 @@ filter(flight_times, air_time != total_time) ```{r mutateex2-check} "Good Job! it doesn't make sense to do math with `arr_time` and `dep_time` until you convert the values to minutes past midnight (as you did with `dep_time` and `sched_dep_time` in the previous exercise)." ``` - -### Exercise 3 - + +### Exercise 3 + Compare `dep_time`, `sched_dep_time`, and `dep_delay`. How would you expect those three numbers to be related? ```{r mutateex3, exercise = TRUE} - + ``` - + ### Exercise 4 Find the 10 most delayed flights (`dep_delay`) using a ranking function. How do you want to handle ties? Carefully read the documentation for `min_rank()`. - + ```{r mutateex4, exercise = TRUE} - + ``` ```{r mutateex4-solution} ?min_rank flights <- mutate(flights, delay_rank = min_rank(dep_delay)) filter(flights, delay_rank <= 10) ``` - +
**Hint:** Once you compute a rank, you can filter the data set based on the ranks.
- + ```{r mutateex4-check} "Excellent! It's not possible to choose exactly 10 flights unless you pick an arbitrary method to choose between ties." ``` - + ### Exercise 5 What does `1:3 + 1:10` return? Why? ```{r mutateex5, exercise = TRUE} - + ``` ```{r mutateex5-solution} 1:3 + 1:10 ``` - +
**Hint:** Remember R's recycling rules.
@@ -287,5 +298,5 @@ What does `1:3 + 1:10` return? Why? What trigonometric functions does R provide? Hint: look up the help page for `Trig`. ```{r mutateex6, exercise = TRUE} - + ``` \ No newline at end of file diff --git a/inst/tutorials/ex-data-summarise/ex-data-manip-summarise.Rmd b/inst/tutorials/ex-data-summarise/ex-data-manip-summarise.Rmd index 57706864e..25b9ebe67 100755 --- a/inst/tutorials/ex-data-summarise/ex-data-manip-summarise.Rmd +++ b/inst/tutorials/ex-data-summarise/ex-data-manip-summarise.Rmd @@ -1,6 +1,6 @@ --- title: "Summarise Tables" -output: +output: learnr::tutorial: progressive: true allow_skip: true @@ -18,7 +18,18 @@ library(tidyverse) library(nycflights13) library(Lahman) -tutorial_options(exercise.timelimit = 60) +tutorial_options( + exercise.timelimit = 60, + # A simple checker function that just returns the message in the check chunk + exercise.checker = function(check_code, ...) { + learnr::feedback( + message = eval(parse(text = check_code)), + correct = logical(0), + type = "info", + location = "append" + ) + } +) knitr::opts_chunk$set(error = TRUE) ``` @@ -26,7 +37,7 @@ knitr::opts_chunk$set(error = TRUE) In this tutorial, you will learn how to summarise a table of data, including: -* How to summarise tables with `summarise()` +* How to summarise tables with `summarise()` * How to recognize the most useful families of functions to combine with `summarise()` * How to combine multiple dplyr operations with the pipe, `%>%` * How to compute counts of observations with `n()` @@ -38,9 +49,9 @@ The readings in this tutorial follow [_R for Data Science_](http://r4ds.had.co.n To practice these skills, we will use the `flights` data set from the nycflights13 package, which you met in [Data Basics](https://learnr-examples.shinyapps.io/ex-data-basics). This data frame comes from the US [Bureau of Transportation Statistics](http://www.transtats.bts.gov/DatabaseInfo.asp?DB_ID=120&Link=0) and contains all `r format(nrow(nycflights13::flights), big.mark = ",")` flights that departed from New York City in 2013. It is documented in `?flights`. -To visualize the data, we will use the ggplot2 package that you met in [Data Visualization Basics](https://rstudio.cloud/learn/primers/1.1). +To visualize the data, we will use the ggplot2 package that you met in [Data Visualization Basics](https://rstudio.cloud/learn/primers/1.1). -I've preloaded the packages for this tutorial with +I've preloaded the packages for this tutorial with ```{r eval = FALSE} library(tidyverse) # loads dplyr, ggplot2, and others @@ -54,7 +65,7 @@ library(nycflights13) `summarise()` collapses a data frame to a single row of summaries. You get to choose how many summaries appear in the row and how they are computed: ```{r summarize} -summarise(flights, delay = mean(dep_delay, na.rm = TRUE), +summarise(flights, delay = mean(dep_delay, na.rm = TRUE), total = sum(dep_delay, na.rm = TRUE)) ``` @@ -87,21 +98,21 @@ summarise(by_day, delay = mean(dep_delay, na.rm = TRUE), ### Exercise 1 Which carrier has the worst delays? Challenge: can you disentangle the effects of bad airports vs. bad carriers? Why/why not? (Hint: think about `flights %>% group_by(carrier, dest) %>% summarise(n())`) - + ```{r summariseex4, exercise = TRUE} - + ``` ```{r summariseex4-solution} -flights %>% - group_by(carrier) %>% +flights %>% + group_by(carrier) %>% summarise(avg_delay = mean(dep_delay, na.rm = TRUE)) %>% - mutate(rank = min_rank(desc(avg_delay))) %>% + mutate(rank = min_rank(desc(avg_delay))) %>% filter(rank == 1) ``` - +
-**Hint:** Use`min_rank(desc(avg_delay))` to rank `avg_delay` (for example) such that the largest delay receives rank one. +**Hint:** Use`min_rank(desc(avg_delay))` to rank `avg_delay` (for example) such that the largest delay receives rank one.
```{r summariseex4-check} @@ -111,24 +122,24 @@ flights %>% ### Exercise 2 For each plane, count the number of flights before the first delay of greater than 1 hour. - + ```{r summariseex5, exercise = TRUE} - + ``` ```{r summariseex5-solution} -flights %>% - filter(!is.na(dep_delay)) %>% - group_by(tailnum) %>% +flights %>% + filter(!is.na(dep_delay)) %>% + group_by(tailnum) %>% mutate(big_delay = dep_delay > 60, - before = !cumany(big_delay)) %>% + before = !cumany(big_delay)) %>% summarise(sum(before)) ``` - +
**Hint:** One strategy would be to: -* filter out all rows where `dep_delay` is `NA`. -* Then group by plane, +* filter out all rows where `dep_delay` is `NA`. +* Then group by plane, * create a variable that tests whether each flight was delayed longer than an hour * create a variable that identifies flights that occur before the first big delay with `!cumany()` * sum up the number of trues @@ -153,7 +164,7 @@ Be careful when you progressively roll up summaries: it's OK for sums and counts ### Ungrouping -If you need to remove grouping, and return to operations on ungrouped data, use `ungroup()`. +If you need to remove grouping, and return to operations on ungrouped data, use `ungroup()`. ```{r echo = FALSE} daily <- group_by(flights, year, month, day) @@ -198,52 +209,52 @@ Now let's look at how we prepared the data. There are three steps: 1. Filter to remove noisy points and Honolulu airport, which is almost twice as far away as the next closest airport. -This code is a little frustrating to write because we have to give each intermediate data frame a name, even though we don't care about it. Naming things is hard, so this slows down our analysis. +This code is a little frustrating to write because we have to give each intermediate data frame a name, even though we don't care about it. Naming things is hard, so this slows down our analysis. ### Pipes There's another way to tackle the same problem. We can turn the code into a pipe with the pipe operator, `%>%`: ```{r} -delays <- flights %>% - group_by(dest) %>% +delays <- flights %>% + group_by(dest) %>% summarise( count = n(), dist = mean(distance, na.rm = TRUE), delay = mean(arr_delay, na.rm = TRUE) - ) %>% + ) %>% filter(count > 20, dest != "HNL") ``` -Behind the scenes, `x %>% f(y)` turns into `f(x, y)`, and `x %>% f(y) %>% g(z)` turns into `g(f(x, y), z)` and so on. You can use the pipe to rewrite multiple operations in a way that you can read left-to-right, top-to-bottom. +Behind the scenes, `x %>% f(y)` turns into `f(x, y)`, and `x %>% f(y) %>% g(z)` turns into `g(f(x, y), z)` and so on. You can use the pipe to rewrite multiple operations in a way that you can read left-to-right, top-to-bottom. This focuses on the transformations, not what's being transformed, which makes the code easier to read. You can read it as a series of imperative statements: group, then summarise, then filter. As suggested by this reading, a good way to pronounce `%>%` when reading code is "then". We'll use piping frequently from now on because it considerably improves the readability of code, and we'll come back to it in more detail in [Pipes](). -The pipe is a defining feature of the tidyverse: packages in the tidyverse all contain functions that are designed to work with the pipe. The only exception is ggplot2: it was written before the pipe was discovered. Unfortunately, the next iteration of ggplot2, ggvis, which does use the pipe, isn't quite ready for prime time yet. +The pipe is a defining feature of the tidyverse: packages in the tidyverse all contain functions that are designed to work with the pipe. The only exception is ggplot2: it was written before the pipe was discovered. Unfortunately, the next iteration of ggplot2, ggvis, which does use the pipe, isn't quite ready for prime time yet. ## Useful summary functions {#summarise-funs} ### Aggregating functions -You can get a long way with means and sum; but R provides many other useful functions to use with summary. Each of these functions acts as an **aggregating function**: it takes a vector of values and returns a single value. +You can get a long way with means and sum; but R provides many other useful functions to use with summary. Each of these functions acts as an **aggregating function**: it takes a vector of values and returns a single value. Let's demonstrate some of the most useful aggregating functions with this data set, which removes flights that have no delay information (because they were cancelled). ```{r} -not_cancelled <- flights %>% +not_cancelled <- flights %>% filter(!is.na(dep_delay), !is.na(arr_delay)) ``` * **Measures of location**: we've used `mean(x)`, but `median(x)` is also useful. The mean is the sum divided by the length; the median is a value where 50% of `x` is above it, and 50% is below it. - + It's sometimes useful to combine aggregation with logical subsetting. We haven't talked about this sort of subsetting yet, but you'll learn more about it in [Subsetting](). - + ```{r} - not_cancelled %>% - group_by(year, month, day) %>% + not_cancelled %>% + group_by(year, month, day) %>% summarise( avg_delay1 = mean(arr_delay), avg_delay2 = mean(arr_delay[arr_delay > 0]) # the average positive delay @@ -251,90 +262,90 @@ not_cancelled <- flights %>% ``` * **Measures of spread**: `sd(x)`, `IQR(x)`, `mad(x)`. The mean squared deviation, or standard deviation or sd for short, is the standard measure of spread. The interquartile range `IQR()` and median absolute deviation `mad(x)`are robust equivalents that may be more useful if you have outliers. - + ```{r} # Why is distance to some destinations more variable than to others? - not_cancelled %>% - group_by(dest) %>% - summarise(distance_sd = sd(distance)) %>% + not_cancelled %>% + group_by(dest) %>% + summarise(distance_sd = sd(distance)) %>% arrange(desc(distance_sd)) ``` - + * **Measures of rank**: `min(x)`, `quantile(x, 0.25)`, `max(x)`. Quantiles are a generalisation of the median. For example, `quantile(x, 0.25)` will find a value of `x` that is greater than 25% of the values, and less than the remaining 75%. ```{r} # When do the first and last flights leave each day? - not_cancelled %>% - group_by(year, month, day) %>% + not_cancelled %>% + group_by(year, month, day) %>% summarise( first = min(dep_time), last = max(dep_time) ) ``` - + * **Measures of position**: `first(x)`, `nth(x, 2)`, `last(x)`. These work similarly to `x[1]`, `x[2]`, and `x[length(x)]` but let you set a default value if that position does not exist (i.e. you're trying to get the 3rd element from a group that only has two elements). For example, we can find the first and last departure for each day: - + ```{r} - not_cancelled %>% - group_by(year, month, day) %>% + not_cancelled %>% + group_by(year, month, day) %>% summarise( - first_dep = first(dep_time), + first_dep = first(dep_time), last_dep = last(dep_time) ) ``` - + These functions are complementary to filtering on ranks. Filtering gives you all variables, with each observation in a separate row: - + ```{r} - not_cancelled %>% - group_by(year, month, day) %>% - mutate(r = min_rank(desc(dep_time))) %>% + not_cancelled %>% + group_by(year, month, day) %>% + mutate(r = min_rank(desc(dep_time))) %>% filter(r %in% range(r)) ``` * **Counts**: In the next section, you will meet `n()`, which takes no arguments, and returns the size of the current group. You can count other useful quantities as well. To count the number of non-missing values, use `sum(!is.na(x))`. To count the number of distinct (unique) values, use `n_distinct(x)`. - + ```{r} # Which destinations have the most carriers? - not_cancelled %>% - group_by(dest) %>% - summarise(carriers = n_distinct(carrier)) %>% + not_cancelled %>% + group_by(dest) %>% + summarise(carriers = n_distinct(carrier)) %>% arrange(desc(carriers)) ``` - + * **Counts and proportions of logical values**: `sum(x > 10)`, `mean(y == 0)`. When used with numeric functions, `TRUE` is converted to 1 and `FALSE` to 0. This makes `sum()` and `mean()` very useful: `sum(x)` gives the number of `TRUE`s in `x`, and `mean(x)` gives the proportion. - + ```{r} # How many flights left before 5am? (these usually indicate delayed # flights from the previous day) - not_cancelled %>% - group_by(year, month, day) %>% + not_cancelled %>% + group_by(year, month, day) %>% summarise(n_early = sum(dep_time < 500)) - + # What proportion of flights are delayed by more than an hour? - not_cancelled %>% - group_by(year, month, day) %>% + not_cancelled %>% + group_by(year, month, day) %>% summarise(hour_perc = mean(arr_delay > 60)) ``` ### Exercise 3 Brainstorm at least 5 different ways to assess the typical delay characteristics of a group of flights. Consider the following scenarios: - + * A flight is 15 minutes early 50% of the time, and 15 minutes late 50% of the time. - + * A flight is always 10 minutes late. * A flight is 30 minutes early 50% of the time, and 30 minutes late 50% of the time. - + * 99% of the time a flight is on time. 1% of the time it's 2 hours late. - + Which is more important: arrival delay or departure delay? - + ```{r summariseex1, exercise = TRUE} - + ``` - +
**Hint:** Consider R's measures of location and measures of spread.
@@ -345,27 +356,27 @@ Which is more important: arrival delay or departure delay? You may have wondered about the `na.rm` argument we used in a previous section. What happens if we don't set it? ```{r} -flights %>% - group_by(year, month, day) %>% +flights %>% + group_by(year, month, day) %>% summarise(mean = mean(dep_delay)) ``` We get a lot of missing values! That's because aggregation functions obey the usual rule of missing values: if there's any missing value in the input, the output will be a missing value. Fortunately, all aggregation functions have an `na.rm` argument which removes the missing values prior to computation: ```{r} -flights %>% - group_by(year, month, day) %>% +flights %>% + group_by(year, month, day) %>% summarise(mean = mean(dep_delay, na.rm = TRUE)) ``` In this case, where missing values represent cancelled flights, we could also tackle the problem by first removing the cancelled flights, as we did to create `not_cancelled`. ```{r} -not_cancelled <- flights %>% +not_cancelled <- flights %>% filter(!is.na(dep_delay), !is.na(arr_delay)) -not_cancelled %>% - group_by(year, month, day) %>% +not_cancelled %>% + group_by(year, month, day) %>% summarise(mean = mean(dep_delay)) ``` @@ -380,41 +391,41 @@ Our definition of cancelled flights (`is.na(dep_delay) | is.na(arr_delay)`) is s Whenever you do any aggregation, it's always a good idea to include either a count (`n()`), or a count of non-missing values (`sum(!is.na(x))`). That way you can check that you're not drawing conclusions based on very small amounts of data. For example, let's look at the planes (identified by their tail number) that have the highest average delays: ```{r} -delays <- not_cancelled %>% - group_by(tailnum) %>% +delays <- not_cancelled %>% + group_by(tailnum) %>% summarise( delay = mean(arr_delay) ) -ggplot(data = delays, mapping = aes(x = delay)) + +ggplot(data = delays, mapping = aes(x = delay)) + geom_freqpoly(binwidth = 10) ``` Wow, there are some planes that have an _average_ delay of 5 hours (300 minutes)! -The story is actually a little more nuanced. We can get more insight if we draw a scatterplot of number of flights vs. average delay. Fill in the blank code below to compute and then plot the number of flights by the mean arrival delay (`arr_delay`). +The story is actually a little more nuanced. We can get more insight if we draw a scatterplot of number of flights vs. average delay. Fill in the blank code below to compute and then plot the number of flights by the mean arrival delay (`arr_delay`). ```{r delays, exercise = TRUE} -# delays <- not_cancelled %>% -# group_by(tailnum) %>% +# delays <- not_cancelled %>% +# group_by(tailnum) %>% # summarise( # delay = _________, # n = n() # ) -# -# ggplot(data = delays, mapping = aes(x = n, y = delay)) + +# +# ggplot(data = delays, mapping = aes(x = n, y = delay)) + # geom_point(alpha = 1/10) ``` ```{r delays-solution} -delays <- not_cancelled %>% - group_by(tailnum) %>% +delays <- not_cancelled %>% + group_by(tailnum) %>% summarise( delay = mean(arr_delay), n = n() ) -ggplot(data = delays, mapping = aes(x = n, y = delay)) + +ggplot(data = delays, mapping = aes(x = n, y = delay)) + geom_point(alpha = 1/10) ``` @@ -425,8 +436,8 @@ Not surprisingly, there is much greater variation in the average delay when ther When looking at this sort of plot, it's often useful to filter out the groups with the smallest numbers of observations, so you can see more of the pattern and less of the extreme variation in the smallest groups. This is what the following code does, as well as showing you a handy pattern for integrating ggplot2 into dplyr flows. It's a bit painful that you have to switch from `%>%` to `+`, but once you get the hang of it, it's quite convenient. ```{r echo = FALSE} -delays <- not_cancelled %>% - group_by(tailnum) %>% +delays <- not_cancelled %>% + group_by(tailnum) %>% summarise( delay = mean(arr_delay), n = n() @@ -435,9 +446,9 @@ delays <- not_cancelled %>% ```{r} -delays %>% - filter(n > 25) %>% - ggplot(mapping = aes(x = n, y = delay)) + +delays %>% + filter(n > 25) %>% + ggplot(mapping = aes(x = n, y = delay)) + geom_point(alpha = 1/10) ``` @@ -449,29 +460,29 @@ RStudio tip: a useful keyboard shortcut is Cmd/Ctrl + Shift + P. This resends th ### Sample size, average performance, and rank -There's another common variation of this type of pattern. Let's look at how the average performance of batters in baseball is related to the number of times they're at bat. Here I use data from the __Lahman__ package to compute the batting average (number of hits / number of attempts) of every major league baseball player. +There's another common variation of this type of pattern. Let's look at how the average performance of batters in baseball is related to the number of times they're at bat. Here I use data from the __Lahman__ package to compute the batting average (number of hits / number of attempts) of every major league baseball player. When I plot the skill of the batter (measured by the batting average, `ba`) against the number of opportunities to hit the ball (measured by at bat, `ab`), you see two patterns: 1. As above, the variation in our aggregate decreases as we get more data points. - + 2. There's a positive correlation between skill (`ba`) and opportunities to hit the ball (`ab`). This is because teams control who gets to play, and obviously they'll pick their best players. ```{r} # Convert to a tibble so it prints nicely batting <- as_tibble(Lahman::Batting) -batters <- batting %>% - group_by(playerID) %>% +batters <- batting %>% + group_by(playerID) %>% summarise( ba = sum(H, na.rm = TRUE) / sum(AB, na.rm = TRUE), ab = sum(AB, na.rm = TRUE) ) -batters %>% - filter(ab > 100) %>% +batters %>% + filter(ab > 100) %>% ggplot(mapping = aes(x = ab, y = ba)) + - geom_point() + + geom_point() + geom_smooth(se = FALSE) ``` @@ -482,38 +493,38 @@ You can find a good explanation of this problem at % +not_cancelled %>% count(dest) ``` - + You can optionally provide a weight variable. For example, you could use this to "count" (sum) the total number of miles a plane flew: - + ```{r} -not_cancelled %>% +not_cancelled %>% count(tailnum, wt = distance) ``` ### Exercise 5 Come up with another approach that will give you the same output as `not_cancelled %>% count(dest)` and `not_cancelled %>% count(tailnum, wt = distance)` (without using `count()`). - + ```{r summariseex2, exercise = TRUE} - + ``` ```{r summariseex2-solution} -not_cancelled %>% - group_by(dest) %>% +not_cancelled %>% + group_by(dest) %>% summarise(n = n()) -not_cancelled %>% - group_by(tailnum) %>% +not_cancelled %>% + group_by(tailnum) %>% summarise(n = sum(distance)) ``` - +
-**Hint:** Consider the tools at your disposal" `group_by()`, `summarise()`, `n()`, `sum()`, and `?count` +**Hint:** Consider the tools at your disposal" `group_by()`, `summarise()`, `n()`, `sum()`, and `?count`
```{r summariseex2-check} @@ -525,23 +536,23 @@ not_cancelled %>% What does the `sort` argument to `count()` do. When might you use it? ```{r summariseex6, exercise = TRUE} -?count +?count ``` ### Exercise 7 Look at the number of cancelled flights per day. Is there a pattern? Is the proportion of cancelled flights related to the average delay? - + ```{r summariseex3, exercise = TRUE} # Task 1 # begin with a variable that shows the day of the year -# flights %>% -# mutate(date = as.Date(paste(year, month, day, sep = "-"))) %>% +# flights %>% +# mutate(date = as.Date(paste(year, month, day, sep = "-"))) %>% # create a variable that shows whether a flight is cancelled # group by day # create a summary by counting up the number of flights where cancelled is TRUE # Plot the result against day - + # Task 2 # recreate the grouped data above # create a summary by taking the mean of cancelled variable @@ -550,34 +561,28 @@ Look at the number of cancelled flights per day. Is there a pattern? Is the prop ``` ```{r summariseex3-solution} -flights %>% - mutate(date = as.Date(paste(year, month, day, sep = "-"))) %>% - mutate(cancelled = is.na(dep_delay) | is.na(arr_delay)) %>% - group_by(date) %>% - summarise(n = sum(cancelled)) %>% +flights %>% + mutate(date = as.Date(paste(year, month, day, sep = "-"))) %>% + mutate(cancelled = is.na(dep_delay) | is.na(arr_delay)) %>% + group_by(date) %>% + summarise(n = sum(cancelled)) %>% ggplot(aes(x = date, y = n)) + geom_point() + geom_smooth() - -flights %>% - mutate(date = as.Date(paste(year, month, day, sep = "-"))) %>% - mutate(cancelled = is.na(dep_delay) | is.na(arr_delay)) %>% - group_by(date) %>% - summarise(prop = mean(cancelled), avg_delay = mean(dep_delay, na.rm = TRUE)) %>% + +flights %>% + mutate(date = as.Date(paste(year, month, day, sep = "-"))) %>% + mutate(cancelled = is.na(dep_delay) | is.na(arr_delay)) %>% + group_by(date) %>% + summarise(prop = mean(cancelled), avg_delay = mean(dep_delay, na.rm = TRUE)) %>% ggplot(aes(x = prop, y = avg_delay)) + geom_point() ``` - +
-**Hint:** Don't forget to use `na.rm = TRUE` where appropriate. +**Hint:** Don't forget to use `na.rm = TRUE` where appropriate.
```{r summariseex3-check} "Wow! You did awesome." ``` - - - - - - From 31ab9707983cbdf940e1c20f2c15e7dfa244d1fa Mon Sep 17 00:00:00 2001 From: Garrick Aden-Buie Date: Mon, 31 Jan 2022 10:08:11 -0500 Subject: [PATCH 2/2] Return a list() since feedback() isn't exported --- inst/tutorials/ex-data-filter/ex-data-filter.Rmd | 2 +- inst/tutorials/ex-data-mutate/ex-data-mutate.Rmd | 2 +- inst/tutorials/ex-data-summarise/ex-data-manip-summarise.Rmd | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/inst/tutorials/ex-data-filter/ex-data-filter.Rmd b/inst/tutorials/ex-data-filter/ex-data-filter.Rmd index 1f54dd850..215226e9d 100755 --- a/inst/tutorials/ex-data-filter/ex-data-filter.Rmd +++ b/inst/tutorials/ex-data-filter/ex-data-filter.Rmd @@ -21,7 +21,7 @@ tutorial_options( exercise.timelimit = 60, # A simple checker function that just returns the message in the check chunk exercise.checker = function(check_code, ...) { - learnr::feedback( + list( message = eval(parse(text = check_code)), correct = logical(0), type = "info", diff --git a/inst/tutorials/ex-data-mutate/ex-data-mutate.Rmd b/inst/tutorials/ex-data-mutate/ex-data-mutate.Rmd index 9c7141598..9cf71505b 100755 --- a/inst/tutorials/ex-data-mutate/ex-data-mutate.Rmd +++ b/inst/tutorials/ex-data-mutate/ex-data-mutate.Rmd @@ -19,7 +19,7 @@ tutorial_options( exercise.timelimit = 60, # A simple checker function that just returns the message in the check chunk exercise.checker = function(check_code, ...) { - learnr::feedback( + list( message = eval(parse(text = check_code)), correct = logical(0), type = "info", diff --git a/inst/tutorials/ex-data-summarise/ex-data-manip-summarise.Rmd b/inst/tutorials/ex-data-summarise/ex-data-manip-summarise.Rmd index 25b9ebe67..a3470405e 100755 --- a/inst/tutorials/ex-data-summarise/ex-data-manip-summarise.Rmd +++ b/inst/tutorials/ex-data-summarise/ex-data-manip-summarise.Rmd @@ -22,7 +22,7 @@ tutorial_options( exercise.timelimit = 60, # A simple checker function that just returns the message in the check chunk exercise.checker = function(check_code, ...) { - learnr::feedback( + list( message = eval(parse(text = check_code)), correct = logical(0), type = "info",