Merge pull request #147 from chhoumann/svr-background

[KB-185] Define SVR
chhoumann · May 24, 2024 · 5150243 · 5150243
2 parents 911609a + a746940
commit 5150243
Show file tree

Hide file tree

Showing 2 changed files with 67 additions and 4 deletions.
diff --git a/report_thesis/src/references.bib b/report_thesis/src/references.bib
@@ -460,4 +460,12 @@ @online{PDSGeoscienceNode
   urldate    = {2023-09-01},
   langid     = {english},
   note       = {Last~Accessed: 2023-09-01}
-}
+}
+
+@article{druckerSVR,
+	title = {Support {Vector} {Regression} {Machines}},
+	abstract = {A new regression technique based on Vapnik's concept of support vectors is introduced. We compare support vector regression (SVR) with a committee regression technique (bagging) based on regression trees and ridge regression done in feature space. On the basis of these experiments, it is expected that SVR will have advantages in high dimensionality space because SVR optimization does not depend on the dimensionality of the input space.},
+	language = {en},
+	author = {Drucker, Harris and Burges, Christopher J C and Kaufman, Linda and Smola, Alex J and Vapnik, Vladimir},
+	file = {Drucker et al. - Support Vector Regression Machines.pdf:C\:\\Users\\Patrick\\Zotero\\storage\\RZEXUIXV\\Drucker et al. - Support Vector Regression Machines.pdf:application/pdf},
+}
diff --git a/report_thesis/src/sections/background.tex b/report_thesis/src/sections/background.tex
@@ -47,6 +47,8 @@ \subsubsection{Quantile Transformer}
 
 \subsubsection{Principal Component Analysis (PCA)}
 \gls{pca} is a dimensionality reduction technique that transforms a set of possibly correlated variables into a smaller set of uncorrelated variables called \textit{principal components}.
+We give an overview of the \gls{pca} algorithm based on \citet{James2023AnIS}.
+
 First, the data matrix $\mathbf{X}$ is centered by subtracting the mean of each variable to ensure that the data is centered at the origin:
 
 $$
@@ -94,11 +96,11 @@ \subsubsection{Kernel PCA}
 % \citet{scholkopftKPCA}
 
 \subsection{Overview of Core Models}
-In this section, we provide an overview and definitions of \gls{pls}, primarily based on the methodologies described by \citet{James2023AnIS}.
+In this section, we provide an overview and definitions of \gls{pls}, \gls{svr}, \gls{etr}, \gls{gbr}, and \gls{xgboost}.
 These models form the basis of the final architecture of our proposed pipeline, detailed further in Section~\ref{sec:methodology}.
 
 \subsubsection{Partial Least Squares (PLS)}
-Having introduced \gls{pca}, we now describe \gls{pls}.
+Having previously introduced \gls{pca}, we now describe \gls{pls} based on \citet{James2023AnIS}.
 In order to understand \gls{pls}, it is helpful to first consider \gls{pcr}, as \gls{pls} is an extension of \gls{pcr} that aims to address some of its limitations.
 
 \gls{pcr} extends \gls{pca} in the context of regression analysis.
@@ -148,6 +150,59 @@ \subsubsection{Partial Least Squares (PLS)}
 The components are then used to predict the target variable by fitting a linear model via least squares regression.
 
 \subsubsection{Support Vector Regression (SVR)}
+\gls{svr} is a regression technique that extends the principles of \gls{svm} to regression problems.
+We therefore provide an overview of \gls{svm}s based on \citet{James2023AnIS} before discussing \gls{svr}s.
+
+\gls{svm} is a supervised learning algorithm used primarily for classification tasks.
+A core concept in \gls{svm} is the \textit{hyperplane}.
+Generally, a hyperplane is a subspace of one dimension less than its ambient space.
+This means that in a two-dimensional space, a hyperplane is a line, while in a three-dimensional space, it is a plane, and so on.
+
+\gls{svm} is built on the idea of finding the hyperplane that best separates the data points into different classes.
+This hyperplane is chosen to maximize the margin, which is the distance between the hyperplane and the nearest data point from either class.
+The instances right on or inside the margin are called \textit{support vectors}, which are used to 'support' the margin and decision boundary.
+
+\gls{svr} extends the principles of \gls{svm} to regression problems.
+We use our previous discussion of \gls{svm} to introduce \gls{svr} based on \citet{druckerSVR}.
+
+\gls{svr} aims to fit a function that predicts continuous values rather than finding the hyperplane that best separates data points.
+Instead of using a hyperplane to separate the data, \gls{svr} uses two parallel hyperplanes to define a margin within which the function should lie, often referred to as the $\epsilon$-\textit{tube}, where $\epsilon$ is a hyperparameter that defines the width of the tube.
+The goal is to find a function $f(x)$ that lies within this tube and has the maximum number of data points within the tube.
+$f(x)$ is typically defined as a linear function of the form:
+
+$$
+f(x) = \mathbf{w} \cdot \mathbf{x} + b,
+$$
+
+where:
+
+\begin{itemize}
+	\item $\mathbf{w}$ is the weight vector,
+	\item $\mathbf{x}$ is the input vector, and
+	\item $b$ is the bias term.
+\end{itemize}
+
+The two parallel hyperplanes at a distance $\epsilon$ from the hyperplane are defined as:
+
+$$
+\begin{aligned}
+    \mathbf{w} \cdot \mathbf{x} + b &= f(\mathbf{x}) + \epsilon, \\
+    \mathbf{w} \cdot \mathbf{x} + b &= f(\mathbf{x}) - \epsilon.
+\end{aligned}
+$$
+
+Or, more succinctly:
+
+$$
+\begin{aligned}
+    f^+(\mathbf{x}) &= f(\mathbf{x}) + \epsilon, \\
+    f^-(\mathbf{x}) &= f(\mathbf{x}) - \epsilon,
+\end{aligned}
+$$
+
+where $f^+(\mathbf{x})$ and $f^-(\mathbf{x})$ are the upper and lower bounds of the $\epsilon$-insensitive tube, respectively.
+
+The optimization problem in \gls{svr} is to find the coefficients $\mathbf{w}$ and $b$ that minimize the norm of $\mathbf{w}$ (i.e., keep the regression function as flat as possible) while ensuring that most data points lie within the $\epsilon$ margin.
 
 \subsubsection{Extra Trees Regressor (ETR)}
 
@@ -156,4 +211,4 @@ \subsubsection{Gradient Boosting Regressor (GBR)}
 \subsubsection{XGBoost}
 
 \subsection{Stacking Ensemble}
-% \citet{pavlyshenko2018stacking}
+% \citet{pavlyshenko2018stacking}