Skip to content

Commit

Permalink
added function to summarize MetaNLP-objects
Browse files Browse the repository at this point in the history
  • Loading branch information
nbruder committed Apr 18, 2024
1 parent f2265b2 commit 905532a
Show file tree
Hide file tree
Showing 4 changed files with 113 additions and 0 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ Collate:
util.R
delete_functions.R
feature_selection.R
useful_functions.R
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.3.1
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ export(select_features)
exportMethods(delete_stop_words)
exportMethods(delete_words)
exportMethods(select_features)
exportMethods(summary)
import(methods)
78 changes: 78 additions & 0 deletions R/useful_functions.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#' Summary of MetaNLP-objects
#'
#' Returns a quick overview over the \eqn{n} most frequent word stems structured
#' into included and excluded papers.
#'
#' @param object An object of class MetaNLP.
#' @param n Number of most frequent words to be displayed.
#' @param stop_words Boolean to decide wether stop words shall be included in
#' the summary. \code{stop_words = TRUE} means, that stop words are included.
#' @param ... Additional parameters for \code{delete_stop_words} (e.g. language
#' of the stop words).
#'
#' @return A list of most frequent words.
#'
#' @examples
#' \dontrun{
#' obj <- MetaNLP("test_data.csv")
#' summary(obj, n = 8)
#' }
#'
#' @rdname summary
#' @export
setMethod("summary", signature("MetaNLP"),
function(object, n = 5, stop_words = FALSE, ...) {
decision <- NULL

# delete stop words
if(!stop_words) {
wcm <- delete_stop_words(object, ...)@data_frame
} else {
wcm <- object@data_frame
}

# get n most frequent words
wcm |>
(`[`)(-c(1, 2)) |>
colSums() |>
sort(decreasing = TRUE) |>
(`[`)(1:n) -> total

# get n most frequent words from "exclude"
wcm |>
subset(decision == "no") |>
(`[`)(-c(1, 2)) |>
colSums() |>
sort(decreasing = TRUE) |>
(`[`)(1:n) -> exclude

# get n most frequent words from "include"
wcm |>
subset(decision == "yes") |>
(`[`)(-c(1, 2)) |>
colSums() |>
sort(decreasing = TRUE) |>
(`[`)(1:n) -> include

# relative frequency of words
denom_total <- sum(colSums(wcm[-c(1, 2)]))
denom_ex <- sum(colSums(subset(wcm, decision == "no")[-c(1, 2)]))
denom_in <- sum(colSums(subset(wcm, decision == "yes")[-c(1, 2)]))

rel_total <- paste(round(total / denom_total * 100,
digits = 2), "%")
rel_exclude <- paste(round(exclude / denom_ex * 100,
digits = 2), "%")
rel_include <- paste(round(include / denom_in * 100,
digits = 2), "%")

# return list with all information
list("Total" = noquote(rbind("Absolute" = total,
"Relative" = rel_total)),
"Exclude" = noquote(rbind("Absolute" = exclude,
"Relative" = rel_exclude)),
"Include" = noquote(rbind("Absolute" = include,
"Relative" = rel_include)))


})
33 changes: 33 additions & 0 deletions man/summary.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 905532a

Please sign in to comment.