From 5f56e0baf054a7ee6ee3cfcdb849d44cb0098fb0 Mon Sep 17 00:00:00 2001 From: Christian Bager Bach Houmann Date: Thu, 13 Jun 2024 10:06:21 +0200 Subject: [PATCH 1/3] Spelling corrections --- report_thesis/src/_preamble.tex | 6 +++--- report_thesis/src/sections/appendix/index.tex | 4 ++-- .../sections/background/ensemble_learning_models/etr.tex | 4 ++-- .../background/linear_and_regularization_models/pls.tex | 2 +- .../src/sections/background/preprocessing/kernel_pca.tex | 4 ++-- .../sections/background/preprocessing/power_transform.tex | 2 +- .../experiments/inital_experiments/initial_experiment.tex | 2 +- .../experiments/inital_experiments/initial_results.tex | 2 +- report_thesis/src/sections/introduction.tex | 2 +- .../src/sections/proposed_approach/model_selection.tex | 2 +- .../sections/proposed_approach/optimization_framework.tex | 2 +- report_thesis/src/sections/related_work.tex | 2 +- 12 files changed, 17 insertions(+), 17 deletions(-) diff --git a/report_thesis/src/_preamble.tex b/report_thesis/src/_preamble.tex index ab4080b5..aaa98212 100644 --- a/report_thesis/src/_preamble.tex +++ b/report_thesis/src/_preamble.tex @@ -50,11 +50,11 @@ \title{LASERGAME: Leveraging Advanced Spectroscopy and Ensemble Regression for Geochemical Analysis and Model Evaluation} \author{Christian Bager Bach Houmann} -\email{chouma19@student.aau.dk} +\email{christian@bagerbach.com} \author{Patrick Frostholm Østergaard} -\email{pfas19@student.aau.dk} +\email{ostergaardpatrick@hotmail.com} \author{Ivik Lau Dalgas Hostrup} -\email{ihostr16@student.aau.dk} +\email{ivikhostrup94@gmail.com} \affiliation{ \institution{Aalborg University} \city{Aalborg} diff --git a/report_thesis/src/sections/appendix/index.tex b/report_thesis/src/sections/appendix/index.tex index 2bb14ff5..61180828 100644 --- a/report_thesis/src/sections/appendix/index.tex +++ b/report_thesis/src/sections/appendix/index.tex @@ -57,7 +57,7 @@ \subsection{Cross-Validation Fold Plots for Major Oxides}\label{subsec:cv_plots} \subsection{Initial Experiment: Model Hyperparameters}\label{subsec:initial_experiment_hyperparameters} \begin{table}[!htb] \centering -\caption{Explictly set hyperparameters for the \gls{pls}, \gls{svr}, ridge, \gls{lasso}, \gls{enet}, \gls{rf}, and \gls{etr} models. When not explicitly set, the default hyperparameters provided by the libraries listed in Section~\ref{sec:experimental_setup} are used.} +\caption{Explicitly set hyperparameters for the \gls{pls}, \gls{svr}, ridge, \gls{lasso}, \gls{enet}, \gls{rf}, and \gls{etr} models. When not explicitly set, the default hyperparameters provided by the libraries listed in Section~\ref{sec:experimental_setup} are used.} \begin{tabular}{@{}llp{0.5\textwidth}@{}} \toprule \textbf{Model} & \textbf{Hyperparameter} & \textbf{Value} \\ @@ -114,7 +114,7 @@ \subsection{Initial Experiment: Model Hyperparameters}\label{subsec:initial_expe \begin{table}[!htb] \centering -\caption{Explictly set hyperparameters for the \gls{gbr} and \gls{xgboost} models. When not explicitly set, the default hyperparameters provided by the libraries listed in Section~\ref{sec:experimental_setup} are used. The \gls{ngboost} model does not have any explicitly set hyperparameters.} +\caption{Explicitly set hyperparameters for the \gls{gbr} and \gls{xgboost} models. When not explicitly set, the default hyperparameters provided by the libraries listed in Section~\ref{sec:experimental_setup} are used. The \gls{ngboost} model does not have any explicitly set hyperparameters.} \begin{tabular}{@{}llp{0.5\textwidth}@{}} \toprule \textbf{Model} & \textbf{Hyperparameter} & \textbf{Value} \\ diff --git a/report_thesis/src/sections/background/ensemble_learning_models/etr.tex b/report_thesis/src/sections/background/ensemble_learning_models/etr.tex index b1351234..fb9d39fb 100644 --- a/report_thesis/src/sections/background/ensemble_learning_models/etr.tex +++ b/report_thesis/src/sections/background/ensemble_learning_models/etr.tex @@ -10,7 +10,7 @@ \subsubsection{Extra Trees Regressor (ETR)} This further decorrelates the trees, enhancing the model's robustness and reducing the risk of overfitting. By aggregating predictions from multiple trees, the model achieves better generalization and robustness. -For a feature vector $x$, the prediction of a \gls{rf} model can be represeted as an aggregation of the predictions of individual trees: +For a feature vector $x$, the prediction of a \gls{rf} model can be represented as an aggregation of the predictions of individual trees: $$ f(x) = \frac{1}{M} \sum_{m=1}^{M} f_m(x), @@ -22,5 +22,5 @@ \subsubsection{Extra Trees Regressor (ETR)} \gls{etr} extends the \gls{rf} model by introducing additional randomness in the tree-building process, specifically through random feature selection and random split points. While \gls{rf} uses bootstrap sampling and selects the best split from a random subset of features to create a set of diverse samples, \gls{etr} instead selects split points randomly within the chosen features, introducing additional randomness. This process results in even greater variability among the trees, aiming to reduce overfitting and improve the model's robustness. -As a tradeoff, \gls{etr} is less interpretable than a single decision tree, as the added randomness can introduce more bias than \gls{rf}. +As a trade off, \gls{etr} is less interpretable than a single decision tree, as the added randomness can introduce more bias than \gls{rf}. However, it often achieves better generalization performance, especially in high-dimensional or noisy datasets. \ No newline at end of file diff --git a/report_thesis/src/sections/background/linear_and_regularization_models/pls.tex b/report_thesis/src/sections/background/linear_and_regularization_models/pls.tex index d21764c7..cff52ca1 100644 --- a/report_thesis/src/sections/background/linear_and_regularization_models/pls.tex +++ b/report_thesis/src/sections/background/linear_and_regularization_models/pls.tex @@ -1,6 +1,6 @@ \subsubsection{Principal Component Regression (PCR) \& Partial Least Squares (PLS)}\label{subsec:pls} In order to understand \gls{pls}, it is helpful to first consider \gls{pcr}, as \gls{pls} is an extension of \gls{pcr} that aims to address some of its limitations. -We provide provide an overview of both regression techniques based on \citet{James2023AnIS}. +We provide an overview of both regression techniques based on \citet{James2023AnIS}. \gls{pcr} extends \gls{pca} in the context of regression analysis. First, \gls{pca} is performed to identify the $M$ principal components that capture the most variance in the data. diff --git a/report_thesis/src/sections/background/preprocessing/kernel_pca.tex b/report_thesis/src/sections/background/preprocessing/kernel_pca.tex index 449f7f72..35946800 100644 --- a/report_thesis/src/sections/background/preprocessing/kernel_pca.tex +++ b/report_thesis/src/sections/background/preprocessing/kernel_pca.tex @@ -5,12 +5,12 @@ \subsubsection{Kernel PCA} This mapping enables linear separation of data points in the higher-dimensional space, even if they are not linearly separable in the original space. Similar to \gls{pca}, as described in Section~\ref{subsec:pca}, the goal of \gls{kernel-pca} is to extract the principal components of the data. -Unlike \gls{pca}, \gls{kernel-pca} does not compute the covariance matrix of the data directly, as this is often infeasible for high\\-dimensional datasets. +Unlike \gls{pca}, \gls{kernel-pca} does not compute the covariance matrix of the data directly, as this is often infeasible for high-dimensional datasets. Instead, \gls{kernel-pca} leverages the kernel trick to compute the similarities between data points directly in the original space using a kernel function. This kernel function implicitly computes the dot product in the higher-dimensional feature space without explicitly mapping the data points into that space. That way, \gls{kernel-pca} can capture nonlinear relationships among data points without explicitly transforming them into a higher-dimensional space. By using pairwise similarities to construct a kernel matrix, also referred to as a Gram matrix, \gls{kernel-pca} can perform eigenvalue decomposition. This process allows for the extraction of principal components in the feature space, similar to the approach used in regular \gls{pca}. -However, in \gls{kernel-pca}, the eigenvalue decomposition is performed on the kernel matrix rather than the covariance matrix, resulting in the prinpical components. +However, in \gls{kernel-pca}, the eigenvalue decomposition is performed on the kernel matrix rather than the covariance matrix, resulting in the principal components. These principal components are nonlinear combinations of the original data points, enabling the algorithm to capture complex relationships among data points that are not linearly separable in the original space. \ No newline at end of file diff --git a/report_thesis/src/sections/background/preprocessing/power_transform.tex b/report_thesis/src/sections/background/preprocessing/power_transform.tex index 72491fa5..1242ae24 100644 --- a/report_thesis/src/sections/background/preprocessing/power_transform.tex +++ b/report_thesis/src/sections/background/preprocessing/power_transform.tex @@ -15,7 +15,7 @@ \subsubsection{Power Transformation} $$ where $\lambda$ is the transformation parameter. -$\lambda$ determines the extend and nature of the transformation, where positive values of $\lambda$ apply a power transformation and $\lambda = 0$ applies a logarithmic transformation. +$\lambda$ determines the extent and nature of the transformation, where positive values of $\lambda$ apply a power transformation and $\lambda = 0$ applies a logarithmic transformation. To overcome the limitations of the Box-Cox transformation, \citet{YeoJohnson} introduced a new family of power transformations that can handle both positive and negative values. The Yeo-Johnson power transformation is defined as: diff --git a/report_thesis/src/sections/experiments/inital_experiments/initial_experiment.tex b/report_thesis/src/sections/experiments/inital_experiments/initial_experiment.tex index 058cb7dc..149bd5de 100644 --- a/report_thesis/src/sections/experiments/inital_experiments/initial_experiment.tex +++ b/report_thesis/src/sections/experiments/inital_experiments/initial_experiment.tex @@ -7,7 +7,7 @@ \subsection{Initial Experiment Design}\label{sec:initial-experiment} Furthermore, all experiments used our data partitioning and were evaluated using our testing and validation strategy, as described in Section~\ref{subsec:validation_testing_procedures}. To ensure as fair of a comparison between models as possible, all models were trained using as many default hyperparameters as possible, and those hyperparameters that did not have default options were selected based on values found in the literature. However, due to the nature of the neural network models' architecture, some extra time was spent on tuning the models to ensure a fair comparison. -This included using batch normalization for the \gls{cnn} model, as early assesments showed that this was necessary to produce reasonable results. +This included using batch normalization for the \gls{cnn} model, as early assessments showed that this was necessary to produce reasonable results. Finally, we evaluated each model once per oxide given the selected configuration of hyperparameters. As stated, the goal of this experiment was merely to get an initial indication of the performance of the models. diff --git a/report_thesis/src/sections/experiments/inital_experiments/initial_results.tex b/report_thesis/src/sections/experiments/inital_experiments/initial_results.tex index 8e7c0adf..3c5e9b44 100644 --- a/report_thesis/src/sections/experiments/inital_experiments/initial_results.tex +++ b/report_thesis/src/sections/experiments/inital_experiments/initial_results.tex @@ -46,7 +46,7 @@ \subsubsection{Results for Initial Experiment}\label{sec:initial_results} \end{figure*} \begin{table} -\caption{Relative performance of each model compared to the best performing model, measured by normalized RMSECV and multiplied by 100 for percentage. A higher percentage indicates worse performance. The 'Diff. vs Prev.' column shows the difference in performance compared to the next best model, measured in percentage points.} +\caption{Relative performance of each model compared to the best performing model, measured by normalized \gls{rmsecv} and multiplied by 100 for percentage. A higher percentage indicates worse performance. The 'Diff. vs Prev.' column shows the difference in performance compared to the next best model, measured in percentage points.} \begin{tabular}{lrr} \toprule Model & Relative Performance (\%) & Diff. vs Prev. \\ diff --git a/report_thesis/src/sections/introduction.tex b/report_thesis/src/sections/introduction.tex index d25394de..25d115f8 100644 --- a/report_thesis/src/sections/introduction.tex +++ b/report_thesis/src/sections/introduction.tex @@ -61,7 +61,7 @@ \section{Introduction}\label{sec:introduction} Term & Definition \\ \midrule Sample & A physical specimen of rock, soil, or other material collected for scientific analysis.\\ -Location & The specific point on a sample where a LIBS laser is targeted. There are typically multiple locations per sample. \\ +Location & The specific point on a sample where a \gls{libs} laser is targeted. There are typically multiple locations per sample. \\ Target & Refers to the variable that a machine learning model is trained to predict. \\ Extreme Concentration Values & The concentration values of oxides in the targets that are significantly higher or lower than the majority of the data. \\ \bottomrule diff --git a/report_thesis/src/sections/proposed_approach/model_selection.tex b/report_thesis/src/sections/proposed_approach/model_selection.tex index 480f2ab9..16cb4933 100644 --- a/report_thesis/src/sections/proposed_approach/model_selection.tex +++ b/report_thesis/src/sections/proposed_approach/model_selection.tex @@ -22,7 +22,7 @@ \subsection{Model and Preprocessing Selection}\label{sec:model_selection} The selected models for experimentation had to be diverse to ensure sufficient breadth in our results, enabling informed decisions about which models to include in the final stacking ensemble pipeline. Additionally, the models had to be suitable for regression tasks. In the absence of research specific to \gls{libs} data, we selected models that have shown promise in other domains. -Our literature review found that a variety of models fit this criteria. +Our literature review found that a variety of models fit these criteria. For example, \citet{andersonPostlandingMajorElement2022} demonstrated that models such as \gls{gbr}, \gls{pls}, \gls{lasso}, and \gls{rf} were each effective at predicting different major oxides from \gls{libs} data. Additionally, \citet{svrforlibs} showed that \gls{svr} outperforms \gls{pls} regression in predicting \ce{Si}, \ce{Ca}, \ce{Mg}, \ce{Fe}, and \ce{Al} using \gls{libs} data. As a result, we included \gls{gbr}, \gls{pls}, \gls{lasso}, \gls{rf}, and \gls{svr} in our experiments. diff --git a/report_thesis/src/sections/proposed_approach/optimization_framework.tex b/report_thesis/src/sections/proposed_approach/optimization_framework.tex index 7d008e7a..85af1de0 100644 --- a/report_thesis/src/sections/proposed_approach/optimization_framework.tex +++ b/report_thesis/src/sections/proposed_approach/optimization_framework.tex @@ -94,7 +94,7 @@ \subsubsection{The Framework} This prevents any form of double preprocessing from occuring, which would lead to potential issues. As mentioned in Section~\ref{subsec:validation_testing_procedures}, we use both cross-validation and a test set to evaluate the model. -This can be seen in line Line~\ref{step:cross_validate} and Lines~\ref{step:train_model} to~\ref{step:evaluate_model}, where cross-validation, training, and evaluation are performed with respect to the current oxide. +This can be seen in Line~\ref{step:cross_validate} and Lines~\ref{step:train_model} to~\ref{step:evaluate_model}, where cross-validation, training, and evaluation are performed with respect to the current oxide. It is important to note that in practice, the model $m$ is being reinstantiated in each iteration of the cross-validation, and again before the model is trained, so no learned parameters are carried over between them. Once a trial is complete, the metrics are returned in Line~\ref{step:return_metrics} to the \texttt{optimize} function in the \nameref{alg:study_function}, which then determines the next steps in the optimization process. diff --git a/report_thesis/src/sections/related_work.tex b/report_thesis/src/sections/related_work.tex index c955ebac..0318cf47 100644 --- a/report_thesis/src/sections/related_work.tex +++ b/report_thesis/src/sections/related_work.tex @@ -17,7 +17,7 @@ \subsection{Machine Learning Models in \gls{libs} Analysis} Their results were evaluated using the \gls{rmse} and \gls{rsd} of predicted versus measured concentrations and showed that the non-linear \gls{svr} model significantly outperformed the linear \gls{pls} regression model at predicting elemental concentrations. The superior performance of \gls{svr} was attributed to its ability to handle non-linearities and matrix effects in the complex geological samples, demonstrating the potential of this machine learning technique for quantitative \gls{libs} analysis in geoscience applications. -\citet{el_haddad_ann_2013} explored the application of \gls{ann} for quantitative analysis of soil samples using \gls{libs}, employing a three-layer perceptron \gls{ann} architecture to address matrix effects and nonlinearities. +\citet{el_haddad_ann_2013} explored the application of \gls{ann} for quantitative analysis of soil samples using \gls{libs}, employing a three-layer perceptron \gls{ann} architecture to address matrix effects and non-linearities. They demonstrated that \gls{ann} is efficient for predicting the concentrations of \ce{Al}, \ce{Ca}, \ce{Cu}, and \ce{Fe}. Incorporating additional spectral lines from other chemical elements, thereby increasing the amount of data input to the model, was also shown to significantly improve predictive accuracy. From 97f301ca4b262486403fa194429bf4c58ae1b87c Mon Sep 17 00:00:00 2001 From: Christian Bager Bach Houmann Date: Thu, 13 Jun 2024 10:08:46 +0200 Subject: [PATCH 2/3] Update report_thesis/src/sections/background/ensemble_learning_models/etr.tex Co-authored-by: Pattrigue <57709490+Pattrigue@users.noreply.github.com> --- .../src/sections/background/ensemble_learning_models/etr.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/report_thesis/src/sections/background/ensemble_learning_models/etr.tex b/report_thesis/src/sections/background/ensemble_learning_models/etr.tex index fb9d39fb..c59ca2a5 100644 --- a/report_thesis/src/sections/background/ensemble_learning_models/etr.tex +++ b/report_thesis/src/sections/background/ensemble_learning_models/etr.tex @@ -22,5 +22,5 @@ \subsubsection{Extra Trees Regressor (ETR)} \gls{etr} extends the \gls{rf} model by introducing additional randomness in the tree-building process, specifically through random feature selection and random split points. While \gls{rf} uses bootstrap sampling and selects the best split from a random subset of features to create a set of diverse samples, \gls{etr} instead selects split points randomly within the chosen features, introducing additional randomness. This process results in even greater variability among the trees, aiming to reduce overfitting and improve the model's robustness. -As a trade off, \gls{etr} is less interpretable than a single decision tree, as the added randomness can introduce more bias than \gls{rf}. +As a trade-off, \gls{etr} is less interpretable than a single decision tree, as the added randomness can introduce more bias than \gls{rf}. However, it often achieves better generalization performance, especially in high-dimensional or noisy datasets. \ No newline at end of file From c238e04cb13f5d97e22a26b718e42b83190d0916 Mon Sep 17 00:00:00 2001 From: Christian Bager Bach Houmann Date: Thu, 13 Jun 2024 10:14:12 +0200 Subject: [PATCH 3/3] update cite meaning --- .../src/sections/proposed_approach/testing_validation.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/report_thesis/src/sections/proposed_approach/testing_validation.tex b/report_thesis/src/sections/proposed_approach/testing_validation.tex index 644e9ab0..26c81381 100644 --- a/report_thesis/src/sections/proposed_approach/testing_validation.tex +++ b/report_thesis/src/sections/proposed_approach/testing_validation.tex @@ -212,7 +212,7 @@ \subsubsection{Discussion of Testing and Validation Strategy} In our initial and optimization experiments, we prioritize cross-validation metrics to evaluate the models. This strategy mitigates the risk of overfitting to the test set by avoiding a bias towards lower \gls{rmsep} values. Conversely, for the stacking ensemble experiment, we emphasize test set metrics to comprehensively assess the ensemble's performance, while still considering cross-validation metrics. -This approach aligns with standard machine learning conventions\cite{geronHandsonMachineLearning2023}. +Using cross-validation for initial model selection and tuning experiments aligns with standard machine learning conventions\cite{geronHandsonMachineLearning2023}. In the initial experiment, cross-validation metrics serve as thresholds for model selection. During the optimization phase, only cross-validation metrics guide the search for optimal hyperparameters. For the stacking ensemble experiment, both cross-validation and test set metrics are evaluated, with a primary focus on the \gls{rmsep} metric.