From f810004ad2f71e1fd472f5d7cc4abb1cdefbcd56 Mon Sep 17 00:00:00 2001 From: Matt Dancho Date: Sat, 28 Oct 2023 19:56:39 -0400 Subject: [PATCH] plot_anomalies_decomp --- NAMESPACE | 3 + R/anomalize-plotting.R | 259 ++++++++++++++++++++++++++++++++++++++++- man/plot_anomalies.Rd | 31 ++++- 3 files changed, 291 insertions(+), 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index cc0068e0..b76432de 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -73,6 +73,8 @@ S3method(plot_acf_diagnostics,data.frame) S3method(plot_acf_diagnostics,grouped_df) S3method(plot_anomalies,data.frame) S3method(plot_anomalies,grouped_df) +S3method(plot_anomalies_decomp,data.frame) +S3method(plot_anomalies_decomp,grouped_df) S3method(plot_anomaly_diagnostics,data.frame) S3method(plot_anomaly_diagnostics,grouped_df) S3method(plot_seasonal_diagnostics,data.frame) @@ -287,6 +289,7 @@ export(parse_date2) export(parse_datetime2) export(plot_acf_diagnostics) export(plot_anomalies) +export(plot_anomalies_decomp) export(plot_anomaly_diagnostics) export(plot_seasonal_diagnostics) export(plot_stl_diagnostics) diff --git a/R/anomalize-plotting.R b/R/anomalize-plotting.R index 05ad8c0b..c8085ad7 100644 --- a/R/anomalize-plotting.R +++ b/R/anomalize-plotting.R @@ -5,7 +5,7 @@ #' An interactive and scalable function for visualizing anomalies in time series data. #' Plots are available in interactive `plotly` (default) and static `ggplot2` format. #' -#' @param .data A `tibble` or `data.frame` that has been anomalized by `anomalize` +#' @param .data A `tibble` or `data.frame` that has been anomalized by `anomalize()` #' @param .date_var A column containing either date or date-time values #' @param .facet_vars One or more grouping columns that broken out into `ggplot2` facets. #' These can be selected using `tidyselect()` helpers (e.g `contains()`). @@ -356,3 +356,260 @@ plot_anomalies.grouped_df <- function( ) } + +# 2.0 PLOT ANOMALIES DECOMP ---- +#' Visualize Anomaly Decomposition +#' +#' An interactive and scalable function for visualizing anomaly decomposition in time series data. +#' Plots are available in interactive `plotly` (default) and static `ggplot2` format. +#' +#' @param .data A `tibble` or `data.frame` that has been anomalized by `anomalize()` +#' @param .date_var A column containing either date or date-time values +#' @param .facet_vars One or more grouping columns that broken out into `ggplot2` facets. +#' These can be selected using `tidyselect()` helpers (e.g `contains()`). +#' @param .facet_ncol Number of facet columns. +#' @param .facet_nrow Number of facet rows (only used for `.trelliscope = TRUE`) +#' @param .facet_scales Control facet x & y-axis ranges. Options include "fixed", "free", "free_y", "free_x" +#' @param .facet_dir The direction of faceting ("h" for horizontal, "v" for vertical). Default is "h". +#' @param .facet_collapse Multiple facets included on one facet strip instead of +#' multiple facet strips. +#' @param .facet_collapse_sep The separator used for collapsing facets. +#' @param .facet_strip_remove Whether or not to remove the strip and text label for each facet. +#' @param .line_color Line color. +#' @param .line_size Line size. +#' @param .line_type Line type. +#' @param .line_alpha Line alpha (opacity). Range: (0, 1). +#' @param .anom_color Color for the anomaly dots +#' @param .anom_alpha Opacity for the anomaly dots. Range: (0, 1). +#' @param .anom_size Size for the anomaly dots +#' @param .ribbon_fill Fill color for the acceptable range +#' @param .ribbon_alpha Fill opacity for the acceptable range. Range: (0, 1). +#' @param .legend_show Toggles on/off the Legend +#' @param .title Plot title. +#' @param .x_lab Plot x-axis label +#' @param .y_lab Plot y-axis label +#' @param .color_lab Plot label for the color legend +#' @param .interactive If TRUE, returns a `plotly` interactive plot. +#' If FALSE, returns a static `ggplot2` plot. +#' @param .trelliscope Returns either a normal plot or a trelliscopejs plot (great for many time series) +#' Must have `trelliscopejs` installed. +#' @param .trelliscope_params Pass parameters to the `trelliscopejs::facet_trelliscope()` function as a `list()`. +#' The only parameters that cannot be passed are: +#' - `ncol`: use `.facet_ncol` +#' - `nrow`: use `.facet_nrow` +#' - `scales`: use `facet_scales` +#' - `as_plotly`: use `.interactive` +#' +#' +#' @return A `plotly` or `ggplot2` visualization +#' +#' +#' @examples +#' library(dplyr) +#' +#' walmart_sales_weekly %>% +#' filter(id %in% c("1_1", "1_3")) %>% +#' group_by(id) %>% +#' anomalize(Date, Weekly_Sales, .message = FALSE) %>% +#' plot_anomalies_decomp(Date, .interactive = FALSE) +#' +#' @name plot_anomalies +#' @export +plot_anomalies_decomp <- function( + .data, + .date_var, + + .facet_vars = NULL, + + .facet_scales = "free", + + .line_color = "#2c3e50", + .line_size = 0.5, + .line_type = 1, + .line_alpha = 1, + + .title = "Anomaly Decomposition Plot", + .x_lab = "", + .y_lab = "", + .interactive = TRUE +) { + + date_var_expr <- rlang::enquo(.date_var) + + if (!is.data.frame(.data)) { + rlang::abort(".data is not a data-frame or tibble. Please supply a data.frame or tibble.") + } + if (rlang::quo_is_missing(date_var_expr)) { + rlang::abort(".date_var is missing. Please supply a date or date-time column.") + } + + column_names <- names(.data) + check_names <- c("observed", "season", "trend", "remainder") %in% column_names + if (!all(check_names)) stop('Error in plot_anomalies_decomp(): column names are missing. Run `anomalize()` and make sure: observed, remainder, anomaly, recomposed_l1, and recomposed_l2 are present', call. = FALSE) + + UseMethod("plot_anomalies_decomp", .data) + +} + +#' @export +plot_anomalies_decomp.data.frame <- function( + .data, + .date_var, + + .facet_vars = NULL, + + .facet_scales = "free", + + .line_color = "#2c3e50", + .line_size = 0.5, + .line_type = 1, + .line_alpha = 1, + + .title = "Anomaly Decomposition Plot", + .x_lab = "", + .y_lab = "", + .interactive = TRUE +) { + + # ---- FORMAT DATA ---- + + date_var_expr <- rlang::enquo(.date_var) + + data_formatted <- .data + feature_set <- c("observed", "season", "trend", "remainder") + + date_var_expr <- rlang::enquo(.date_var) + facets_expr <- rlang::enquo(.facet_vars) + + data_formatted <- tibble::as_tibble(.data) + .facet_collapse <- TRUE + .facet_collapse_sep <- " " + + # Facet Names + facets_expr <- rlang::syms(names(tidyselect::eval_select(facets_expr, .data))) + + # FACET SETUP ---- + facet_names <- data_formatted %>% dplyr::select(!!! facets_expr) %>% colnames() + + if (length(facet_names) > 0) { + # Handle facets + data_formatted <- data_formatted %>% + dplyr::ungroup() %>% + dplyr::mutate(.facets_collapsed = stringr::str_c(!!! rlang::syms(facet_names), + sep = .facet_collapse_sep)) %>% + dplyr::mutate(.facets_collapsed = forcats::as_factor(.facets_collapsed)) %>% + dplyr::select(-(!!! rlang::syms(facet_names))) %>% + dplyr::group_by(.facets_collapsed) + + facet_names <- ".facets_collapsed" + } + + data_formatted <- data_formatted %>% + dplyr::ungroup() %>% + tidyr::pivot_longer(cols = c(!!! rlang::syms(feature_set)), + names_to = ".group", values_to = ".group_value") %>% + dplyr::mutate(.group = factor(.group, levels = feature_set)) + + # data_formatted + + # ---- VISUALIZATION ---- + + g <- data_formatted %>% + ggplot2::ggplot(ggplot2::aes(!! date_var_expr, .group_value)) + + ggplot2::labs(x = .x_lab, y = .y_lab, title = .title) + + # Add line + g <- g + + ggplot2::geom_line( + color = .line_color, + linewidth = .line_size, + linetype = .line_type, + alpha = .line_alpha + ) + + # Add facets + if (length(facet_names) == 0) { + facet_ncol <- 1 + } else { + facet_ncol <- data_formatted %>% + dplyr::distinct(dplyr::pick(dplyr::all_of(facet_names))) %>% + nrow() + } + + facet_groups <- stringr::str_c(facet_names, collapse = " + ") + if (facet_groups == "") facet_groups <- "." + + facet_formula <- stats::as.formula(paste0(".group ~ ", facet_groups)) + + g <- g + ggplot2::facet_wrap(facet_formula, ncol = facet_ncol, scales = .facet_scales) + + # Add theme + g <- g + theme_tq() + + # Convert to interactive if selected + if (.interactive) { + p <- plotly::ggplotly(g) + return(p) + } else { + return(g) + } +} + +#' @export +plot_anomalies_decomp.grouped_df <- function( + .data, + .date_var, + + .facet_vars = NULL, + + .facet_scales = "free", + + .line_color = "#2c3e50", + .line_size = 0.5, + .line_type = 1, + .line_alpha = 1, + + .title = "Anomaly Decomposition Plot", + .x_lab = "", + .y_lab = "", + .interactive = TRUE +) { + + # Tidy Eval Setup + group_names <- dplyr::group_vars(.data) + facets_expr <- rlang::enquos(.facet_vars) + + # Checks + facet_names <- .data %>% dplyr::ungroup() %>% dplyr::select(!!! facets_expr) %>% colnames() + if (length(facet_names) > 0) message("plot_anomalies_decomp(...): Groups are previously detected. Grouping by: ", + stringr::str_c(group_names, collapse = ", ")) + + # ---- DATA SETUP ---- + + # Ungroup Data + data_formatted <- .data %>% dplyr::ungroup() + + # ---- PLOT SETUP ---- + g <- plot_anomalies_decomp.data.frame( + .data = data_formatted, + .date_var = !! rlang::enquo(.date_var), + + .facet_vars = !! enquo(group_names), + + .facet_scales = .facet_scales, + .line_color = .line_color, + .line_size = .line_size, + .line_type = .line_type, + .line_alpha = .line_alpha, + + .title = .title, + .x_lab = .x_lab, + .y_lab = .y_lab, + .interactive = .interactive + ) + + return(g) + + +} + diff --git a/man/plot_anomalies.Rd b/man/plot_anomalies.Rd index 3de5ec5e..1871ef1f 100644 --- a/man/plot_anomalies.Rd +++ b/man/plot_anomalies.Rd @@ -2,6 +2,7 @@ % Please edit documentation in R/anomalize-plotting.R \name{plot_anomalies} \alias{plot_anomalies} +\alias{plot_anomalies_decomp} \title{Visualize Anomalies for One or More Time Series} \usage{ plot_anomalies( @@ -33,9 +34,24 @@ plot_anomalies( .trelliscope = FALSE, .trelliscope_params = list() ) + +plot_anomalies_decomp( + .data, + .date_var, + .facet_vars = NULL, + .facet_scales = "free", + .line_color = "#2c3e50", + .line_size = 0.5, + .line_type = 1, + .line_alpha = 1, + .title = "Anomaly Decomposition Plot", + .x_lab = "", + .y_lab = "", + .interactive = TRUE +) } \arguments{ -\item{.data}{A \code{tibble} or \code{data.frame} that has been anomalized by \code{anomalize}} +\item{.data}{A \code{tibble} or \code{data.frame} that has been anomalized by \code{anomalize()}} \item{.date_var}{A column containing either date or date-time values} @@ -101,11 +117,16 @@ The only parameters that cannot be passed are: }} } \value{ +A \code{plotly} or \code{ggplot2} visualization + A \code{plotly} or \code{ggplot2} visualization } \description{ An interactive and scalable function for visualizing anomalies in time series data. Plots are available in interactive \code{plotly} (default) and static \code{ggplot2} format. + +An interactive and scalable function for visualizing anomaly decomposition in time series data. +Plots are available in interactive \code{plotly} (default) and static \code{ggplot2} format. } \examples{ library(dplyr) @@ -116,4 +137,12 @@ walmart_sales_weekly \%>\% anomalize(Date, Weekly_Sales) \%>\% plot_anomalies(Date, .facet_ncol = 2, .ribbon_alpha = 0.25, .interactive = FALSE) +library(dplyr) + +walmart_sales_weekly \%>\% + filter(id \%in\% c("1_1", "1_3")) \%>\% + group_by(id) \%>\% + anomalize(Date, Weekly_Sales) \%>\% + plot_anomalies_decomp(Date, .interactive = FALSE) + }