From cfb6bc451e392b6f2f967670f46a02e294c0cf3a Mon Sep 17 00:00:00 2001
From: Matteo Bongiovanni <40599507+MatBon01@users.noreply.github.com>
Date: Sun, 27 Aug 2023 23:42:51 +0100
Subject: [PATCH] Fix maths in background (#102)

* Write introduction for database representation

* Proof read section on database representation

* Define an indexed table

* Define functions in map needed for indexed table

* Fix table reference error

* Improve description of merge

* Fix spacings in merge function

* Add spacings to merge

* Add words to dictionary
---
 report/.hunspell                             |  1 +
 report/background/databaserepresentation.tex | 64 +++++++++++++++++++-
 report/background/utils.sty                  |  2 +
 3 files changed, 64 insertions(+), 3 deletions(-)

diff --git a/report/.hunspell b/report/.hunspell
index 3f65f30..223c2f3 100644
--- a/report/.hunspell
+++ b/report/.hunspell
@@ -265,3 +265,4 @@ Wazir
 Bina
 Vishnuram
 Elysia
+indexedTableRelAlgOps
diff --git a/report/background/databaserepresentation.tex b/report/background/databaserepresentation.tex
index 40ff58d..ca711a9 100644
--- a/report/background/databaserepresentation.tex
+++ b/report/background/databaserepresentation.tex
@@ -1,7 +1,15 @@
 \section{Evolution of database representation}\label{sec:background:dbrep}
+This section introduces the mathematical framework presented in \relalg{} for
+defining the relational model. It is useful to help understand the structure in
+\fref{chap:database}.
+
 \subsection{Bags}
-\paragraph{Characteristics of a database}We expect our database approximation to not be ordered and admit multiplicities and a finite bag of values is one of the simplest constructions that does so. Like a finite set, a bag contains a collection of unordered values. However, unlike a set, bags can contain duplicate elements \cite{RelationalAlgebraByWayOfAdjunctions}.  This multiplicity is key for processing non-idempotent aggregations. For instance, if summing up the ages of a database of people, without admitting multiplicity we would only sum each unique age once.
-\subparagraph{Generalisation}Furthermore, going forward we generalise to bags of any types instead of the classical ``bags of records''. This also allows us to deal with intermediate tables that contain non-record values.
+\paragraph{Characteristics of a database} We expect our database approximation to not be ordered and admit multiplicities and a finite bag of values is one of the simplest constructions that does so. Like a finite set, a bag contains a collection of unordered values. However, unlike a set, bags can contain duplicate elements.  This multiplicity is key for processing non-idempotent aggregations. For instance, if summing up the ages of a database of people, without admitting multiplicity we would only sum each unique age once.
+
+\subparagraph{Generalisation} Furthermore, going forward we generalise to bags
+of any types instead of the classical ``bags of records''. This also allows us
+to deal with intermediate tables that contain non-record values which, again,
+may be useful for describing intermediate states of aggergations or projections.
 
 In \fref{tab:BagRelAlgOps} we summarise the implementation of relational algebra operators with bags
 as their bulk type \cite{RelationalAlgebraByWayOfAdjunctions}.
@@ -19,7 +27,7 @@ \subsection{Bags}
         aggregation in monoid $\monoid{M}$ & $reduce\ \monoid{M}$\\
     \end{tabular}
     \caption{Relational algebra operators implemented for bags}
-    \label{tab:BagRelAlgOps}
+    \label{tab:indexedTableRelAlgOps}
 \end{table}
 
 \subsection{Indexed tables}
@@ -29,6 +37,7 @@ \subsection{Indexed tables}
 \theoremstyle{definition}\newtheorem*{ppfuncdef}{Point-preserving function}
 \theoremstyle{definition}\newtheorem*{mapdef}{Map}
 \theoremstyle{definition}\newtheorem*{finitemapdef}{Finite map}
+\theoremstyle{definition}\newtheorem*{indexedtabledef}{Indexed Table}
 \begin{psetdef}\label{def:pset}
   A pointed set $\pset{A}{a}$ is a set $A$ with a distinguished element $a \in A$.
 \end{psetdef}
@@ -47,3 +56,52 @@ \subsection{Indexed tables}
   A finite map of type \finitemap{\keyset}{\valset} is a map where only a finite number of keys are mapped to $null_\valset$ (where $null_\valset$ is the distinguished element of \valset). 
 \end{finitemapdef}
 The advantage of using a finite map in a database is to allow aggregation.
+Furthermore, we note the following
+isomorphisms~\cite{RelationalAlgebraByWayOfAdjunctions} where $1$ is the unit
+type and pointed.
+
+\begin{equation*}
+\begin{split}
+    & empty: 1 \rightarrow \finitemap{\keyset}{1} \\
+    & empty = \lambda\:k \rightarrow () \\
+    \\
+    & merge: \finitemap{\keyset}{\valset_1} \times \finitemap{\keyset}{\valset_2}
+             \rightarrow \finitemap{\keyset}{\valset_1 \times \valset_2} \\
+    & merge\ (s,\;t) = \lambda\:k \rightarrow (s\:k,\;t\:k) \\
+\end{split}
+\end{equation*}
+
+The functions above tell us some extremely important information on creating
+empty maps and calculating their unions. As you can see $empty$ returns a
+function that maps any key to the neutral element $()$. This is to be expected
+as there are no values in an empty map. More interestingly, we see the merge
+of two maps as a function that returns a function that maps a key to a pair of
+values, each of which holds the result of the key lookup in the respective
+table.
+
+We now have the correct machinery to define an indexed table.
+
+\begin{indexedtabledef}
+    An indexed table \indexedTable{\keyset}{\valset} is simply \finitemap{K}{\bag{V}}.
+\end{indexedtabledef}
+
+Set with the definitions above, \fref{tab:indexedTableRelAlgOps} can be referred
+to for a summary of relational algebra operations implemented for a bulk type of
+indexed tables~\cite{RelationalAlgebraByWayOfAdjunctions}.
+
+\begin{table}[h]
+    \centering
+    \begin{tabular}{r|l}
+        \keyset{}-indexed table of \valset{} values & \indexedTable{\keyset}{\valset} \\
+        empty table & $empty$ \\
+        singleton table $(k, v)$ & $k \mapsto \lbag v \rbag$ \\
+        union of tables & $\finitemap{\keyset}{(\uplus)}\ \cdot\ merge$ \\
+        projection $\projsymb{f}$ & $\finitemap{\keyset}{(\finitebag{f})}$ \\
+        selection $\selectsymb{p}$ & $\finitemap{\keyset}{(filter\ p)}$ \\
+        aggregation in monoid $\monoid{M}$ & $\finitemap{\keyset}{(reduce\
+        \monoid{M})}$\\
+            natural join & \finitemap{\keyset}{(\times)}\ $\cdot\ merge$ \\
+    \end{tabular}
+    \caption{Relational algebra operators implemented for bags}
+    \label{tab:BagRelAlgOps}
+\end{table}
diff --git a/report/background/utils.sty b/report/background/utils.sty
index 1f2fa92..0c018fe 100644
--- a/report/background/utils.sty
+++ b/report/background/utils.sty
@@ -26,6 +26,7 @@
 
 % Bag
 \newcommand{\bag}[1]{\ensuremath{\mathrm{Bag}\ #1}}
+\newcommand{\finitebag}[1]{\ensuremath{\mathrm{Bag}_*\ #1}}
 \newcommand{\emptybag}{\ensuremath{\emptyset}}
 \newcommand{\singletonbag}{\ensuremath{single}}
 \newcommand{\bagunion}[2]{\ensuremath{#1 \uplus #2}}
@@ -41,6 +42,7 @@
 \newcommand{\valset}{\ensuremath{\mathrm{V}}}
 \newcommand{\map}[2]{\ensuremath{\mathrm{Map}\ #1\;#2}}
 \newcommand{\finitemap}[2]{\ensuremath{\mathrm{Map}_*\ #1\;#2}}
+\newcommand{\indexedTable}[2]{\ensuremath{\mathrm{Table}\ #1\;#2}}
 
 % Relational model
 \newcommand{\database}[1]{\ensuremath{\mathtt{#1}}}