Skip to content

Commit

Permalink
Changes requested for CRAN-release
Browse files Browse the repository at this point in the history
  • Loading branch information
nbruder committed May 21, 2024
1 parent 15463e6 commit 363ae76
Show file tree
Hide file tree
Showing 17 changed files with 208 additions and 144 deletions.
37 changes: 9 additions & 28 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,34 +2,15 @@ Package: MetaNLP
Type: Package
Title: Natural Language Processing for Meta Analysis
Version: 0.1.1
Authors@R: c(
person("Nico", "Bruder",
role = c("aut"),
email = "[email protected]"
),
person("Samuel", "Zimmermann",
role = c("aut"),
email = "[email protected]",
comment = c(ORCID = "0009-0000-4828-9294"),
),
person("Johannes", "Vey",
role = c("aut"),
email = "[email protected]",
comment = c(ORCID = "0000-0002-2610-9667"),
),
person("Maximilian", "Pilz",
role = c("aut", "cre"),
email = "[email protected]",
comment = c(ORCID = "0000-0002-9685-1613")
),
person(given = "Institute of Medical Biometry - University of Heidelberg",
role = c("cph")
)
)
Description: Given a CSV file with titles and abstracts, the package
creates a word count matrix that is lemmatized and stemmed and can directly
be used to train machine learning methods for automatic title-abstract
screening in the preparation of a meta analysis.
Authors@R: c(person("Nico", "Bruder", role = c("aut"), email = "[email protected]"),
person("Samuel", "Zimmermann", role = c("aut"), email = "[email protected]", comment = c(ORCID = "0009-0000-4828-9294")),
person("Johannes", "Vey", role = c("aut"), email = "[email protected]", comment = c(ORCID = "0000-0002-2610-9667")),
person("Maximilian", "Pilz", role = c("aut", "cre"), email = "[email protected]", comment = c(ORCID = "0000-0002-9685-1613")),
person(given = "Institute of Medical Biometry - University of Heidelberg", role = c("cph")))
Description: Given a CSV file with titles and abstracts, the package creates a
word count matrix that is lemmatized and stemmed and can directly be used to
train machine learning methods for automatic title-abstract screening in the
preparation of a meta analysis.
License: MIT + file LICENSE
Suggests:
knitr,
Expand Down
21 changes: 11 additions & 10 deletions R/MetaNLP.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,15 @@
#'
#' A \code{MetaNLP} object is the base class of the package \pkg{MetaNLP}.
#' It is initialized by passing the path to a CSV file and constructs
#' a data frame which column names are the words that occur in the titles
#' and abstracts and which cells contain the word counts for each
#' a data frame whose column names are the words that occur in the titles
#' and abstracts and whose cells contain the word counts for each
#' paper.
#'
#' @rdname MetaNLP
setClass("MetaNLP", representation(data_frame = "data.frame"))

#' @param file Either the path to the CSV file or data frame contain
#' @param file Either the path to the CSV file or a data frame containing the
#' abstracts
#' @param bounds An integer vector of length 2. The first value specifies
#' the minimum number of appearances of a word to become a column of the word
#' count matrix, the second value specifies the maximum number.
Expand Down Expand Up @@ -50,17 +51,16 @@ setClass("MetaNLP", representation(data_frame = "data.frame"))
#' The value "maybe" is handled as a "yes"/"include".
#'
#' @examples
#' \dontrun{
#' obj <- MetaNLP("test_data.csv")}
#' path <- system.file("extdata", "test_data.csv", package = "MetaNLP", mustWork = TRUE)
#' obj <- MetaNLP(path)
#'
#' @note
#' To ensure correct processing of the data when there are special characters
#' (e.g. "é" or "ü"), make sure that the csv-file is correctly encoded
#' as \code{UTF-8}.
#' The stemming algorithm makes use of the C libstemmer library generated by
#' Snowball. When german texts are stemmed, umlauts are replaced by their
#' non-umlaut equivalent, so "ä" becomes "a" etc. This behaviour cannot be
#' changed.
#' non-umlaut equivalent, so "ä" becomes "a" etc.
#'
#' @rdname MetaNLP
#' @export
Expand Down Expand Up @@ -169,9 +169,10 @@ setMethod("show", signature("MetaNLP"),
#' @param colors Character vector with the colors in
#' @param ... Additional parameters for \link[wordcloud]{wordcloud}
#'
#' @examples \dontrun{
#' obj <- MetaNLP("test_data.csv")
#' plot(obj)}
#' @examples
#' path <- system.file("extdata", "test_data.csv", package = "MetaNLP", mustWork = TRUE)
#' obj <- MetaNLP(path)
#' plt <- plot(obj)
#'
#' @return nothing
#' @export
Expand Down
16 changes: 7 additions & 9 deletions R/delete_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,11 @@ setGeneric("delete_words", function(object, delete_list) {


#' @examples
#' \dontrun{
#' obj<- MetaNLP("test_data.csv")
#' path <- system.file("extdata", "test_data.csv", package = "MetaNLP", mustWork = TRUE)
#' obj <- MetaNLP(path)
#' del_words <- c("beautiful", "considering", "found")
#' obj <- delete_words(obj, del_words)
#' }
#'
#'
#' @rdname delete_words
#' @export
Expand Down Expand Up @@ -72,10 +72,9 @@ setGeneric("delete_stop_words", function(object, ...) {
})

#' @examples
#' \dontrun{
#' obj <- MetaNLP("test_data.csv")
#' path <- system.file("extdata", "test_data.csv", package = "MetaNLP", mustWork = TRUE)
#' obj <- MetaNLP(path)
#' obj <- delete_stop_words(obj, "english")
#' }
#'
#' @rdname delete_stop_words
#' @export
Expand Down Expand Up @@ -106,10 +105,9 @@ setMethod("delete_stop_words", signature("MetaNLP"),
#' special characters anymore.
#'
#' @examples
#' \dontrun{
#' obj <- MetaNLP("test_data.csv", language = "french")
#' path <- system.file("extdata", "test_data.csv", package = "MetaNLP", mustWork = TRUE)
#' obj <- MetaNLP(path, language = "french")
#' obj <- replace_special_characters(obj)
#' }
#'
#' @rdname replace_special_characters
#' @export
Expand Down
14 changes: 5 additions & 9 deletions R/feature_selection.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,6 @@
#' parameter \eqn{\lambda}, which determines the weight of the penalty, can
#' either be chosen via cross validation (using \link[glmnet]{cv.glmnet} or by
#' giving a numeric value.
#' For further documentation and background information on \code{\link{glmnet}},
#' have a look at the
#' \href{https://glmnet.stanford.edu/articles/glmnet.html#logistic-regression-family-binomial}{online documentation}.
#' @importFrom glmnet glmnet
#'
#' @importFrom glmnet glmnet
#'
Expand Down Expand Up @@ -68,12 +64,12 @@ setGeneric("select_features", function(object, ...) {
#' columns will be chosen.
#'
#' @examples
#' \dontrun{
#' obj <- MetaNLP("test_data.csv")
#' path <- system.file("extdata", "test_data.csv", package = "MetaNLP", mustWork = TRUE)
#' obj <- MetaNLP(path)
#'
#' obj2 <- select_features(obj, alpha = 0.7)
#' obj2 <- select_features(obj, lambda = "1se")
#'
#' obj <- select_features(obj, alpha = 0.7)
#' obj <- select_features(obj, lambda = "1se", type.measure = "auc")
#' }
#'
#' @rdname select_features
#' @export
Expand Down
40 changes: 16 additions & 24 deletions R/useful_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,9 @@
#' @return A list of most frequent words.
#'
#' @examples
#' \dontrun{
#' obj <- MetaNLP("test_data.csv")
#' path <- system.file("extdata", "test_data.csv", package = "MetaNLP", mustWork = TRUE)
#' obj <- MetaNLP(path)
#' summary(obj, n = 8)
#' }
#'
#' @rdname summary
#' @export
Expand Down Expand Up @@ -89,8 +88,7 @@ setGeneric("write_csv", function(object, ...) {
#' as a csv-file.
#'
#' @param object An object of class MetaNLP.
#' @param path Path where to save the csv. If no path is set, the csv is saved
#' in the current working directory
#' @param path Path where to save the csv.
#' @param type Specifies if the word count matrix should be saved as
#' "train_wcm.csv" or "test_wcm.csv". If the user wants to use another file name,
#' the whole path including the file name should be given as the \code{path}
Expand All @@ -99,28 +97,25 @@ setGeneric("write_csv", function(object, ...) {
#' as \code{UTF-8}.
#'
#' @details
#' Overall, there are three options to specify the path. By
#' default, no path is set, so the csv is saved as "train_wcm.csv" or
#' "test_wcm.csv" in the current working directory. If a path to a specific
#' folder is given (but the path name does not end with ".csv"), the file is
#' saved in this folder as "train_wcm.csv" or "test_wcm.csv".
#' If a path to a specific folder is given (but the path name does not end with
#' ".csv"), the file is saved in this folder as "train_wcm.csv" or "test_wcm.csv".
#' By providing a path ending with ".csv", the user can override the default
#' naming convention and the file is saved according to this path.
#'
#' @examples
#' \dontrun{
#' obj <- MetaNLP("test_data.csv")
#' path <- system.file("extdata", "test_data.csv", package = "MetaNLP", mustWork = TRUE)
#' obj <- MetaNLP(path)
#' obj2 <- delete_stop_words(obj)
#' write_csv(obj2)
#' write_csv(obj2, path = "foo.csv")
#' }
#' write_path <- tempdir()
#' write_csv(obj2, path = write_path)
#' file.remove(file.path(write_path, "train_wcm.csv"))
#'
#' @return nothing
#'
#' @rdname write_csv
#' @export
setMethod("write_csv", signature("MetaNLP"),
function(object, path = "", type = c("train", "test"), ...) {
function(object, path, type = c("train", "test"), ...) {
lastchar <- 0

# extract data
Expand All @@ -134,13 +129,9 @@ setMethod("write_csv", signature("MetaNLP"),
# create file path
if(lastchar == ".csv") {
path_to_save <- path
} else if(path == ""){
type <- match.arg(type)
path_to_save <- paste0(type, "_wcm.csv")
} else {
type <- match.arg(type)
path_to_save <- file.path(path, paste0(type, "_wcm.csv"))

}

utils::write.csv2(data, file = path_to_save, row.names = FALSE, ...)
Expand All @@ -163,10 +154,11 @@ setMethod("write_csv", signature("MetaNLP"),
#' @param ... Further arguments to \code{MetaNLP}.
#'
#' @examples
#' \dontrun{
#' obj <- MetaNLP("test_data.csv")
#' to_test_obj <- read_test_data(obj, "path/to_test.csv")
#' }
#' path_train <- system.file("extdata", "test_data.csv", package = "MetaNLP", mustWork = TRUE)
#' path_test <- system.file("extdata", "test_data_changed.csv", package = "MetaNLP", mustWork = TRUE)
#' obj_train <- MetaNLP(path_train)
#' obj_test <- MetaNLP(path_test)
#' to_test_obj <- read_test_data(obj_train, obj_test)
#'
#' @return An object of class MetaNLP
#'
Expand Down
Loading

0 comments on commit 363ae76

Please sign in to comment.