diff --git a/report/background/databaserepresentation.tex b/report/background/databaserepresentation.tex index 7ec0742..14b63ce 100644 --- a/report/background/databaserepresentation.tex +++ b/report/background/databaserepresentation.tex @@ -56,3 +56,6 @@ \subsection{Indexed tables} The advantage of using a finite map in a database is to allow aggregation. \todo{Understand why only semi-monoidal} \todo{Introduced indexed tables} +\paragraph{Useful functions}{} \todo{Explain all the functions needed, such as +merge\label{sec:finitemapfuncs}} + diff --git a/report/background/final.tex b/report/background/final.tex index fb9ab4c..07e2880 100644 --- a/report/background/final.tex +++ b/report/background/final.tex @@ -14,4 +14,8 @@ \chapter{Background} Note 1: Often the terms Background, Literature Review, Related Work and State of the Art are used interchangeably. Note 2: Keyword search is wonderful, but you need the right Keywords. Note 2: IEEExplore, ACM Digital Library and Science Direct may require you to be on the College network to download the PDF versions of papers. If at home, use VPN. -\end{comment} \ No newline at end of file +\end{comment} + +% Currently add in only to stop reference error - not ready for submission +\input{background/relationalmodel} +\input{background/databaserepresentation} diff --git a/report/background/utils.sty b/report/background/utils.sty index f166424..e711019 100644 --- a/report/background/utils.sty +++ b/report/background/utils.sty @@ -26,6 +26,7 @@ % Pointed set \newcommand{\pset}[2]{\ensuremath{\left(#1,\,#2\right)}} +\newcommand{\pointed}[1]{\ensuremath{\mathit{Pointed}\ #1}} % For code % Maps \newcommand{\keyset}{\ensuremath{\mathrm{K}}} @@ -43,3 +44,7 @@ \newcommand{\natjoin}[2]{\ensuremath{\relation{#1}\bowtie\relation{#2}}} \newcommand{\thetajoin}[3]{\ensuremath{\relation{#2}\bowtie_{#1}\relation{#3}}} \newcommand{\equijoin}[4]{\ensuremath{\relation{#1}\,{}_{#2}\!\bowtie_{\:#4}\relation{#3}}} + +% Temporary function to show italicised code similar to maths +% TODO: Change +\newcommand{\mathcodefunc}[1]{\ensuremath{\mathit{#1}}} diff --git a/report/bibs/combined.bib b/report/bibs/combined.bib index 664b0cd..0d57a39 100644 --- a/report/bibs/combined.bib +++ b/report/bibs/combined.bib @@ -161,3 +161,11 @@ @misc{HDBC title={HDBC-2.4.0.4: Haskell Database Connectivity}, url={https://hackage.haskell.org/package/HDBC-2.4.0.4/docs/Database-HDBC.html#g:2}, } +% Check the referencing style (webpage) +@misc{Prelude, + title={Standard types, classes and related functions}, + volume={2023}, + number={April 29th}, + url={https://hackage.haskell.org/package/base-4.18.0.0/docs/Prelude.html#g:3} +} + diff --git a/report/project/benchmark/implementation.tex b/report/project/benchmark/implementation.tex index c102a77..3266207 100644 --- a/report/project/benchmark/implementation.tex +++ b/report/project/benchmark/implementation.tex @@ -5,6 +5,22 @@ \section{Implementation} \cite{RelationalAlgebraByWayOfAdjunctionsPrototypeImplementation}. \todo{Write what I contributed} +\subsection{Interface design} +Sharing many similarities with \cite{RelationalAlgebraByWayOfAdjunctions}, there +is a considerable design decision that most functions are uncurried. Although +seeming unintuitive, it follows more strictly the mathematical definitions of +the operations, for instance the Cartesian Product on bags $(\times): \bag{a} +\times \bag{b} \to \bag{\left(a \times b\right)}$. + +In an implementation perspective, however, it helps unify many of the other +functions. You will notice that the \mathcodefunc{merge} function in +\cite{RelationalAlgebraByWayOfAdjunctions} as seen in \fref{sec:finitemapfuncs} +\todo{Actually write this section} has the final type \bag{\left(a \times + b\right)} and so by writing these other operations to work on pairs allows +greater synergy between the interface. This could easily be allowed using +Haskell's built in functions\cite{Prelude} such as \texttt{uncurry} but I felt +as though it created a clunkier interface. + \subsection{Pointed sets} This section describes the types that declare instances of the \texttt{PointedSet} type class. @@ -19,7 +35,7 @@ \subsection{Pointed sets} using pattern matching in order to avoid the \texttt{Eq} constraint on the container type of the Bag (i.e. \texttt{Eq a => Bag a}). This instance is key for the indexing operation $ix$. The function $ix$ has a type $ix :: -\bag{(k, v)} \to \map{k}{(\bag{v})}$, coupled with the requirement $(Pointed v) +\bag{(k, v)} \to \map{k}{(\bag{v})}$, coupled with the requirement $(\pointed{v}) => \map{k}{v}$ the necessity of the implementation becomes clear and it is important to note that when indexing the bag of key-value pairs, if a key has no value, it is the "null" value for \texttt{Bag} that is used and not for $v$ diff --git a/src/Data/Bag.hs b/src/Data/Bag.hs index ce436e8..1136af8 100644 --- a/src/Data/Bag.hs +++ b/src/Data/Bag.hs @@ -29,7 +29,7 @@ instance Monad Bag where Bag xs >>= k = Bag (xs >>= (elements . k)) instance Semigroup (Bag a) where - (<>) = Data.Bag.union + (<>) = curry Data.Bag.union instance Monoid (Bag a) where mempty = Data.Bag.empty @@ -46,16 +46,16 @@ instance Pointed.PointedSet (Bag a) where empty :: Bag a empty = Bag [] -union :: Bag a -> Bag a -> Bag a -b1 `union` b2 = Bag (elements b1 ++ elements b2) +union :: (Bag a, Bag a) -> Bag a +union (b1, b2) = Bag (elements b1 ++ elements b2) reduceBag :: CMonoid m => Bag m -> m -- Reduce a bag of ms into an m (e.g. a bag of bags into a bag) reduceBag = mconcat . elements -- Cartesian product -cp :: Bag a -> Bag b -> Bag (a, b) -cp (Bag xs) (Bag ys) = Bag [(x, y) | x <- xs, y <- ys] +cp :: (Bag a, Bag b) -> Bag (a, b) +cp (Bag xs, Bag ys) = Bag [(x, y) | x <- xs, y <- ys] -- Create singleton bag single :: a -> Bag a diff --git a/src/Database/Bag.hs b/src/Database/Bag.hs index 5d60668..d50b834 100644 --- a/src/Database/Bag.hs +++ b/src/Database/Bag.hs @@ -11,10 +11,10 @@ empty = Bag.empty single :: a -> Table a single = Bag.single -union :: Table a -> Table a -> Table a +union :: (Table a, Table a) -> Table a union = Bag.union -cp :: Table a -> Table b -> Table (a, b) +cp :: (Table a, Table b) -> Table (a, b) cp = Bag.cp -- TODO:: check @@ -30,8 +30,8 @@ select p = Bag.Bag . filter p . Bag.elements aggregate :: CMonoid a => Table a -> a aggregate = Bag.reduceBag -equijoinWithCp :: Eq c => (a -> c) -> (b -> c) -> Table a -> Table b -> Table (a, b) -equijoinWithCp fa fb as bs = select equality (cp as bs) +equijoinWithCp :: Eq c => (a -> c) -> (b -> c) -> (Table a, Table b) -> Table (a, b) +equijoinWithCp fa fb = select equality . cp where equality (a, b) = fa a == fb b diff --git a/src/Database/IndexedTable.hs b/src/Database/IndexedTable.hs index 2ccda5c..676ca81 100644 --- a/src/Database/IndexedTable.hs +++ b/src/Database/IndexedTable.hs @@ -11,7 +11,7 @@ singleton :: (Map.Key k) => (k, v) -> Map.Map k (Bag.Bag v) singleton (k, v) = Map.single (k, Bag.single v) union :: (Map.Key k) => Map.Map k (Bag.Bag v) -> Map.Map k (Bag.Bag v) -> Map.Map k (Bag.Bag v) -union t1 t2 = (fmap (uncurry Bag.union) . Map.merge) (t1, t2) +union t1 t2 = (fmap Bag.union . Map.merge) (t1, t2) projection :: (Map.Key k) => (v -> w) -> Map.Map k (Bag.Bag v) -> Map.Map k (Bag.Bag w) projection = fmap . fmap @@ -23,5 +23,5 @@ aggregation :: (Map.Key k, CMonoid m) => Map.Map k (Bag.Bag m) -> Map.Map k m aggregation = fmap Bag.reduceBag -- Joins on common keys -naturalJoin :: (Map.Key k) => Map.Map k (Bag.Bag v) -> Map.Map k (Bag.Bag w) -> Map.Map k (Bag.Bag (v, w)) -naturalJoin t1 t2 = fmap (uncurry Bag.cp) (Map.merge (t1 , t2)) +naturalJoin :: (Map.Key k) => (Map.Map k (Bag.Bag v), Map.Map k (Bag.Bag w)) -> Map.Map k (Bag.Bag (v, w)) +naturalJoin = fmap Bag.cp . Map.merge diff --git a/test/Data/BagSpec.hs b/test/Data/BagSpec.hs index 89f5125..8aa98f5 100644 --- a/test/Data/BagSpec.hs +++ b/test/Data/BagSpec.hs @@ -62,7 +62,7 @@ spec = do describe "Data.Bag.union" $ do it "returns the union of both bags" $ do - (Bag.Bag ['a', 'c', 'a'] `Bag.union` Bag.Bag ['c', 'd']) `shouldBe` Bag.Bag ['a', 'c', 'a', 'c', 'd'] + Bag.union (Bag.Bag ['a', 'c', 'a'], Bag.Bag ['c', 'd']) `shouldBe` Bag.Bag ['a', 'c', 'a', 'c', 'd'] describe "Data.Bag Semigroup" $ do it "has an associative operator <>" $ do @@ -89,9 +89,9 @@ spec = do (getProduct . Bag.reduceBag) (Bag.Bag [1, 2, 3, 4] :: Bag.Bag (Product Int)) `shouldBe` 24 describe "Data.Bag.cp" $ do it "correctly can calculate the cartesian product of two bags" $ do - Bag.cp b1 b2 `shouldBe` Bag.Bag [('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'b'), ('b', 'c'), ('b', 'd'), ('c', 'b'), ('c', 'c'), ('c', 'd')] + Bag.cp (b1, b2) `shouldBe` Bag.Bag [('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'b'), ('b', 'c'), ('b', 'd'), ('c', 'b'), ('c', 'c'), ('c', 'd')] it "correctly deals with the empty bag in a cartesian product" $ do - Bag.cp b1 (Bag.empty :: Bag.Bag Int) `shouldBe` (Bag.empty :: Bag.Bag (Char, Int)) + Bag.cp (b1, (Bag.empty :: Bag.Bag Int)) `shouldBe` (Bag.empty :: Bag.Bag (Char, Int)) describe "Data.Bag.single" $ do it "creates a singleton bag" $ do Bag.single 'a' `shouldBe` Bag.Bag ['a'] diff --git a/test/Database/BagSpec.hs b/test/Database/BagSpec.hs index 72adbcc..46501d7 100644 --- a/test/Database/BagSpec.hs +++ b/test/Database/BagSpec.hs @@ -75,18 +75,18 @@ spec = do DB.single 5 `shouldBe` Bag.Bag [5] describe "Database.Bag union" $ do it "uses the bag union to calculate the union" $ do - Bag.Bag [1, 2, 3] `DB.union` Bag.Bag [3, 4, 5] `shouldBe` Bag.Bag [1, 2, 3, 3, 4, 5] + DB.union (Bag.Bag [1, 2, 3], Bag.Bag [3, 4, 5]) `shouldBe` Bag.Bag [1, 2, 3, 3, 4, 5] it "can correctly deal with one empty table in the union" $ do - Bag.Bag [1, 2, 3] `DB.union` DB.empty `shouldBe` Bag.Bag [1, 2, 3] + DB.union (Bag.Bag [1, 2, 3], DB.empty) `shouldBe` Bag.Bag [1, 2, 3] it "can find the union of two empty tables" $ do - (DB.empty :: DB.Table Char) `DB.union` DB.empty `shouldBe` (DB.empty :: DB.Table Char) + DB.union ((DB.empty :: DB.Table Char), DB.empty) `shouldBe` (DB.empty :: DB.Table Char) describe "Database.Bag cp" $ do it "correctly can calculate the cartesian product of two tables" $ do - Bag.Bag [1, 2] `DB.cp` Bag.Bag [3, 4] `shouldBe` Bag.Bag [(1, 3), (1, 4), (2, 3), (2, 4)] + DB.cp (Bag.Bag [1, 2], Bag.Bag [3, 4]) `shouldBe` Bag.Bag [(1, 3), (1, 4), (2, 3), (2, 4)] it "returns an empty table when one table is empty" $ do - Bag.Bag [1, 2] `DB.cp` (DB.empty :: DB.Table Char) `shouldBe` (DB.empty :: DB.Table (Int, Char)) + DB.cp (Bag.Bag [1, 2], (DB.empty :: DB.Table Char)) `shouldBe` (DB.empty :: DB.Table (Int, Char)) it "returns an empty table when both tables are empty" $ do - (DB.empty :: DB.Table Bool) `DB.cp` (DB.empty :: DB.Table Char) `shouldBe` (DB.empty :: DB.Table (Bool, Char)) + DB.cp ((DB.empty :: DB.Table Bool), (DB.empty :: DB.Table Char)) `shouldBe` (DB.empty :: DB.Table (Bool, Char)) describe "Database.Bag neutral" $ do -- TODO:: add more tests when understood it "returns a bag with the unit element" $ do DB.neutral `shouldBe` Bag.Bag [()] @@ -107,11 +107,11 @@ spec = do DB.aggregate (Bag.Bag [1, 1, 1, 1, 2] :: Bag.Bag (Sum Int)) `shouldBe` 6 describe "Database.Bag equijoinWithCp" $ do it "can join two tables with at most one matching element" $ do - equijoinWithCp invoiceId orderId orderPrices1 orderItems1 `shouldBe` orderJoin1 + equijoinWithCp invoiceId orderId (orderPrices1, orderItems1) `shouldBe` orderJoin1 it "can join two tables with more than one matching elements,\ \ only multiple in one table" $ do - equijoinWithCp invoiceId orderId orderPrices3 orderItems3 `shouldBe` orderJoin3 + equijoinWithCp invoiceId orderId (orderPrices3, orderItems3) `shouldBe` orderJoin3 it "can join two tables with no matching elements" $ do - equijoinWithCp invoiceId orderId orderPrices2 orderItems2 `shouldBe` Bag.empty + equijoinWithCp invoiceId orderId (orderPrices2, orderItems2) `shouldBe` Bag.empty it "can join two tables with multiple elements in both tables" $ do - equijoinWithCp lastName lastName people people `shouldBe` lastNameJoin + equijoinWithCp lastName lastName (people, people) `shouldBe` lastNameJoin diff --git a/test/Database/IndexedTableSpec.hs b/test/Database/IndexedTableSpec.hs index a522c6c..1199058 100644 --- a/test/Database/IndexedTableSpec.hs +++ b/test/Database/IndexedTableSpec.hs @@ -46,4 +46,4 @@ spec = do Table.aggregation (Map.Lone (Bag.Bag [Any True, Any True, Any False])) `shouldBe` Map.Lone (Any True) describe "natural join" $ do it "is a local cartesian product" $ do - Table.naturalJoin (Map.Lone (Bag.Bag [1, 2])) (Map.Lone (Bag.Bag [2, 3])) `shouldBe` Map.Lone (Bag.Bag [(1, 2), (1, 3), (2, 2), (2, 3)]) + Table.naturalJoin (Map.Lone (Bag.Bag [1, 2]), Map.Lone (Bag.Bag [2, 3])) `shouldBe` Map.Lone (Bag.Bag [(1, 2), (1, 3), (2, 2), (2, 3)])