imbi-heidelberg · max-pilz · Nov 7, 2024 · Nov 7, 2024
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -2,13 +2,13 @@ Package: MetaNLP
 Type: Package
 Title: Natural Language Processing for Meta Analysis
 Version: 0.1.2.9000
-Authors@R: c(person("Nico", "Bruder", role = c("aut"), email = "[email protected]"),
+Authors@R: c(person("Nico", "Bruder", role = c("aut"), email = "[email protected]", comment = c(ORCID = "0009-0004-9522-2075")),
         person("Samuel", "Zimmermann", role = c("aut"), email = "[email protected]", comment = c(ORCID = "0009-0000-4828-9294")),
         person("Johannes", "Vey", role = c("aut"), email = "[email protected]", comment = c(ORCID = "0000-0002-2610-9667")),
         person("Maximilian", "Pilz", role = c("aut", "cre"), email = "[email protected]", comment = c(ORCID = "0000-0002-9685-1613")),
         person(given = "Institute of Medical Biometry - University of Heidelberg", role = c("cph")))
 Description: Given a CSV file with titles and abstracts, the package creates a
-    word count matrix that is lemmatized and stemmed and can directly be used to
+    document-term matrix that is lemmatized and stemmed and can directly be used to
     train machine learning methods for automatic title-abstract screening in the
     preparation of a meta analysis.
 License: MIT + file LICENSE
@@ -34,7 +34,7 @@ Collate:
     useful_functions.R
 Encoding: UTF-8
 LazyData: true
-RoxygenNote: 7.3.1
+RoxygenNote: 7.3.2
 BugReports: https://github.com/imbi-heidelberg/MetaNLP/issues
 URL: https://github.com/imbi-heidelberg/MetaNLP
 Config/testthat/edition: 3

diff --git a/R/MetaNLP.R b/R/MetaNLP.R
@@ -10,12 +10,12 @@
 "_PACKAGE"
 
 
-#' Create a data frame with word counts
+#' Create a data frame with document-term matrix
 #'
 #' A \code{MetaNLP} object is the base class of the package \pkg{MetaNLP}.
 #' It is initialized by passing the path to a CSV file and constructs
 #' a data frame whose column names are the words that occur in the titles
-#' and abstracts and whose cells contain the word counts for each
+#' and abstracts and whose cells contain the word frequencies for each
 #' paper.
 #'
 #' @rdname MetaNLP
@@ -42,7 +42,7 @@ setClass("MetaNLP", representation(data_frame = "data.frame"))
 #'
 #' @details
 #' An object of class \code{MetaNLP} contains a slot data_frame where
-#' the word count data frame is stored.
+#' the document-term matrix is stored as a data frame.
 #' The CSV file must have a column \code{ID} to identify each paper, a column
 #' \code{title} with the belonging titles of the papers and a column
 #' \code{abstract} which contains the abstracts. If the CSV stores training data,
@@ -196,7 +196,7 @@ setMethod("plot", signature("MetaNLP", y = "missing"),
             # check whether decision column exists and filter data
             if(dec != "total") {
               if(is.null(x@data_frame$decision_)) {
-                warning("Column decision_ does not exist. Word cloud is created by using the whole word count matrix.")
+                warning("Column decision_ does not exist. Word cloud is created by using the whole document-term matrix.")
                 data <- x@data_frame
               }
               else {

diff --git a/R/delete_functions.R b/R/delete_functions.R
@@ -2,7 +2,7 @@
 #'
 #' There can be words that do not offer additional information
 #' in the classification whether a paper should be included or excluded
-#' from a meta-analysis. Thus, such words should not be part of the word count
+#' from a meta-analysis. Thus, such words should not be part of the document-term
 #' matrix. This function allows the user to remove these columns of the word
 #' count matrix by specifying a vector of words to delete.
 #'
@@ -13,7 +13,7 @@
 #' @details
 #' The words in \code{delete_list} can be given like they appear in the
 #' text. They are lemmatized and stemmed by \code{delete_words} to match the
-#' columns of the word count matrix.
+#' columns of the document-term matrix.
 #'
 #' @export
 setGeneric("delete_words", function(object, delete_list) {
@@ -53,7 +53,7 @@ setMethod("delete_words", signature("MetaNLP", "character"),
 #'
 #' Usually, stop words do not offer useful information in the classification
 #' whether a paper should be included or excluded
-#' from a meta-analysis. Thus, such words should not be part of the word count
+#' from a meta-analysis. Thus, such words should not be part of the document-term
 #' matrix. This function allows the user to automatically delete stop words.
 #'
 #' @param object A MetaNLP object, whose data frame is to be modified.
@@ -94,7 +94,7 @@ setMethod("delete_stop_words", signature("MetaNLP"),
 
 #' Replace special characters in column names
 #'
-#' When using non-english languages, the column names of the word count matrix
+#' When using non-english languages, the column names of the document-term matrix
 #' can contain special characters. These might lead to encoding problems, when
 #' this matrix is used to train a machine learning model. This functions
 #' automatically replaces all special characters by the nearest equivalent

diff --git a/R/feature_selection.R b/R/feature_selection.R
@@ -1,6 +1,6 @@
 #' Select features via elasticnet regularization
 #'
-#' As the word count matrix quickly grows with an increasing number of abstracts,
+#' As the document-term matrix quickly grows with an increasing number of abstracts,
 #' it can easily reach several thousand columns. Thus, it can be important to
 #' extract the columns that carry most of the information in the decision making
 #' process. This function uses a generalized linear model combined with

diff --git a/R/useful_functions.R b/R/useful_functions.R
@@ -82,14 +82,14 @@ setGeneric("write_csv", function(object, ...) {
 })
 
 
-#' Save the word count matrix
+#' Save the document-term matrix
 #'
-#' This function can be used to save the word count matrix of a MetaNLP object
+#' This function can be used to save the document-term matrix of a MetaNLP object
 #' as a csv-file.
 #'
 #' @param object An object of class MetaNLP.
 #' @param path Path where to save the csv.
-#' @param type Specifies if the word count matrix should be saved as
+#' @param type Specifies if the document-term matrix should be saved as
 #' "train_wcm.csv" or "test_wcm.csv". If the user wants to use another file name,
 #' the whole path including the file name should be given as the \code{path}
 #' argument
@@ -141,12 +141,12 @@ setMethod("write_csv", signature("MetaNLP"),
 #' Read and adapt test data
 #'
 #' This function takes a MetaNLP object (the training data) and the
-#' test data. The function creates the word count matrix from the test data
+#' test data. The function creates the document-term matrix from the test data
 #' and matches the columns of the given training MetaNLP object with the columns
-#' of the test word count matrix. This means that columns, which do appear
-#' in the test word count matrix but not in the training word count matrix are
-#' removed; columns that appear in the training word count matrix but not in the
-#' test word count matrix are added as a column consisting of zeros.
+#' of the test document-term matrix. This means that columns, which do appear
+#' in the test document-term matrix but not in the training document-term matrix are
+#' removed; columns that appear in the training document-term matrix but not in the
+#' test document-term matrix are added as a column consisting of zeros.
 #'
 #' @param object The MetaNLP object created from the training data.
 #' @param file Either the path to the test data csv, the data frame containing

diff --git a/man/MetaNLP.Rd b/man/MetaNLP.Rd
diff --git a/man/delete_stop_words.Rd b/man/delete_stop_words.Rd
diff --git a/man/delete_words.Rd b/man/delete_words.Rd
diff --git a/man/read_test_data.Rd b/man/read_test_data.Rd
diff --git a/man/replace_special_characters.Rd b/man/replace_special_characters.Rd
diff --git a/man/select_features.Rd b/man/select_features.Rd
diff --git a/man/write_csv.Rd b/man/write_csv.Rd