Skip to content

Commit

Permalink
Implement indexed equijoin (#43)
Browse files Browse the repository at this point in the history
* Implement indexing bags

* Describe what () key is

* Define product of pointed set

* Implement indexing and merging of arrays

* Change reminder to test more complicated mapping

* Implement lookup

* Implement indexing

* Implement unindexing of map

* Test reduction of Array map

* Test indexing with repeated keys

* Add indexedEquijoin to database
  • Loading branch information
MatBon01 authored May 2, 2023
1 parent 17e86e4 commit eb13653
Show file tree
Hide file tree
Showing 7 changed files with 163 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,9 @@ library
other-extensions:
InstanceSigs, TypeFamilies, FlexibleContexts,
FlexibleInstances
build-depends: base >=4.16.4.0
build-depends:
base >=4.16.4.0,
array >= 0.5.4.0
hs-source-dirs: src
default-language: Haskell2010

Expand Down Expand Up @@ -69,5 +71,9 @@ test-suite spec
Data.KeySpec,
Database.BagSpec,
Database.IndexedTableSpec
build-depends: base >=4.16.4.0, hspec ^>=2.10, a-deeper-dive-into-relational-algebra-by-way-of-adjunctions
build-depends:
base >=4.16.4.0,
hspec ^>=2.10,
a-deeper-dive-into-relational-algebra-by-way-of-adjunctions,
array >= 0.5.4.0
build-tool-depends: hspec-discover:hspec-discover == 2.*
22 changes: 21 additions & 1 deletion src/Data/Key.hs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ module Data.Key where
import Data.PointedSet
import Data.Bag
import Data.CMonoid
import Data.Word
import Data.Array
import qualified Data.Bifunctor as Bifunctor

class (Functor (Map k)) => Key k where
data Map k :: * -> *
Expand All @@ -25,7 +28,7 @@ class (Functor (Map k)) => Key k where
reduce :: (PointedSet v, CMonoid v) => Map k v -> v
reduce = reduceBag . cod

instance Key () where
instance Key () where -- unit type
newtype Map () v = Lone v deriving (Show, Eq)

empty = Lone Data.PointedSet.null
Expand All @@ -40,3 +43,20 @@ instance Key () where

instance Functor (Map ()) where
fmap f (Lone v) = Lone (f v)

instance Key Word16 where -- constant type (array indexed by 16 bit word)
newtype Map Word16 v = A (Array Word16 v) deriving (Eq, Show)
empty = A (accumArray (curry snd) Data.PointedSet.null (0, 2^16-1) [])
isEmpty (A a) = all isNull (elems a)
single (k, v) = A (accumArray (curry snd) Data.PointedSet.null (0, 2^16-1) [(k, v)])
merge (A a1, A a2) = A (listArray (0, 2^16 - 1) (zip (elems a1) (elems a2)))
dom (A a) = Bag [ k | (k, v) <- assocs a, not (isNull v) ]
cod (A a) = Bag [ v | (k, v) <- assocs a, not (isNull v) ]
lookup (A a) = (!) a
index kvps = A (accumArray (curry Data.Bag.union) Data.Bag.empty (0, 2^16-1) vals)
where
vals = (elements . fmap (Bifunctor.second Data.Bag.single)) kvps
unindex (A a) = Bag [ (k, v) | (k, vs) <- assocs a, v <- elements vs ]

instance Functor (Map Word16) where
fmap f (A a) = A (fmap f a)
4 changes: 4 additions & 0 deletions src/Data/PointedSet.hs
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,7 @@ class PointedSet a where
instance PointedSet (Maybe a) where
null = Nothing
isNull = isNothing

instance (PointedSet v1, PointedSet v2) => (PointedSet (v1, v2)) where
null = (Data.PointedSet.null, Data.PointedSet.null)
isNull (v1, v2) = isNull v1 && isNull v2
20 changes: 16 additions & 4 deletions src/Database/Bag.hs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
module Database.Bag where

import qualified Data.Bag as Bag
import Database.IndexedTable
import Data.CMonoid
import Data.Key

type Table = Bag.Bag

Expand All @@ -19,7 +21,7 @@ cp = Bag.cp

-- TODO:: check
neutral :: Table ()
neutral = single ()
neutral = Database.Bag.single ()

project :: (a -> b) -> Table a -> Table b
project = fmap
Expand All @@ -30,9 +32,19 @@ select p = Bag.Bag . filter p . Bag.elements
aggregate :: CMonoid a => Table a -> a
aggregate = Bag.reduceBag

equijoinWithCp :: Eq c => (a -> c) -> (b -> c) -> (Table a, Table b) -> Table (a, b)
equijoinWithCp fa fb = select equality . cp
productEquijoin :: Eq c => (a -> c) -> (b -> c) -> (Table a, Table b) -> Table (a, b)
productEquijoin fa fb = select equality . cp
where
equality (a, b) = fa a == fb b


indexBy :: (Key k) => (a -> k) -> Table a -> Map k (Table a)
indexBy keyProj = index . fmap (\x -> (keyProj x, x))

indexedEquijoin :: (Key k) => (a -> k) -> (b -> k) -> (Table a, Table b) -> Table (a, b)
-- t1, t2 Bags
-- if1, if2 are indexing functions
indexedEquijoin if1 if2 (t1, t2) = (reduce . fmap cp . merge) (it1, it2)
where
-- Indexed table 1 and 2
it1 = indexBy if1 t1
it2 = indexBy if2 t2
68 changes: 65 additions & 3 deletions test/Data/KeySpec.hs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ import Test.Hspec
import Data.Key
import qualified Data.Bag as Bag
import qualified Data.PointedSet as Pointed
import Data.Array
import Data.Word

spec :: Spec
spec = do
Expand Down Expand Up @@ -33,11 +35,11 @@ spec = do
it "can lookup elements" $ do
Data.Key.lookup (Lone (Just True)) () `shouldBe` Just True
it "can index a bag of pairs without multiplicity of keys" $ do
index (Bag.Bag [((), Just 'r')]) `shouldBe` Lone (Bag.Bag [Just 'r'])
Data.Key.index (Bag.Bag [((), Just 'r')]) `shouldBe` Lone (Bag.Bag [Just 'r'])
it "can index an empty bag, with PointedSet null value as the null bag" $ do
index (Bag.empty :: Bag.Bag ((), Int)) `shouldBe` Lone (Pointed.null :: Bag.Bag Int)
Data.Key.index (Bag.empty :: Bag.Bag ((), Int)) `shouldBe` Lone (Pointed.null :: Bag.Bag Int)
it "can index a bag of pairs with multiplicity" $ do
index (Bag.Bag [((), 'a'), ((), 'b'), ((), 'c')]) `shouldBe` Lone (Bag.Bag ['a', 'b', 'c'])
Data.Key.index (Bag.Bag [((), 'a'), ((), 'b'), ((), 'c')]) `shouldBe` Lone (Bag.Bag ['a', 'b', 'c'])
it "can unindex a map with no multiplicities" $ do
unindex (Lone (Bag.Bag [1])) `shouldBe` Bag.Bag [((), 1)]
it "can unindex an empty map" $ do
Expand All @@ -46,3 +48,63 @@ spec = do
unindex (Lone (Bag.Bag [True, True, False])) `shouldBe` Bag.Bag [((), True), ((), True), ((), False)]
it "can reduce a map correctly" $ do
reduce (Lone (Bag.Bag ['a'])) `shouldBe` Bag.Bag ['a']
describe "Map Word16" $ do
it "correctly produces an empty map" $ do
(empty :: Map Word16 (Maybe Int)) `shouldBe` A (accumArray (curry snd) Nothing (0, 2^16 - 1) [])
it "can correctly detect an empty map" $ do
isEmpty (A (accumArray (curry snd) Nothing (0, 2^16 - 1) []) :: Map Word16 (Maybe Int)) `shouldBe` True
it "detects its created empty map as empty" $ do
isEmpty (empty :: Map Word16 (Maybe Int)) `shouldBe` True
it "correctly produces a singleton with a non null value" $ do
single (5, Just 'c') `shouldBe` A (accumArray (curry snd) Nothing (0, 2^16 - 1) [(5, Just 'c')])
it "correctly produces a singleton with a null value" $ do
single (5, (Nothing :: Maybe Int)) `shouldBe` A (accumArray (curry snd) Nothing (0, 2^16 - 1) [(5, (Nothing :: Maybe Int))])
it "correctly merges two singletons with different indices" $ do
merge ((single (5, Just 'd')), (single (6, Just 'e'))) `shouldBe` A (accumArray (curry snd) (Nothing, Nothing) (0, 2^16 - 1) [(5, (Just 'd', Nothing)), (6, (Nothing, Just 'e'))])
it "correctly merges two singletons with the same index" $ do
merge ((single (5, Just "one")), (single (5, Just "two"))) `shouldBe` A (accumArray (curry snd) (Nothing, Nothing) (0, 2^16 - 1) [(5, (Just "one", Just "two"))])
it "correctly merges with one null value in the key" $ do
merge ((single (5, Just "one")), (single (5, (Nothing :: Maybe String)))) `shouldBe` A (accumArray (curry snd) (Nothing, Nothing) (0, 2^16 - 1) [(5, (Just "one", (Nothing :: Maybe String)))])
it "merging with two empty values produces an empty map" $ do
merge ((single (5, (Nothing :: Maybe String))), (single (5, (Nothing :: Maybe String)))) `shouldBe` (empty :: Map Word16 (Maybe String, Maybe String))
it "can correctly unmerge a map" $ do
unmerge (A (accumArray (curry snd) (Nothing, Nothing) (0, 2^16 - 1) [(5, (Just "one", Just "two")), (6, (Just "three", Just "four"))])) `shouldBe` (A (accumArray (curry snd) Nothing (0, 2^16 - 1) [(5, Just "one"), (6, Just "three")]), A (accumArray (curry snd) Nothing (0, 2^16 - 1) [(5, Just "two"), (6, Just "four")]))
it "can correctly unmerge an empty map" $ do
unmerge (empty :: Map Word16 (Maybe String, Maybe Bool)) `shouldBe` (empty :: Map Word16 (Maybe String), empty :: Map Word16 (Maybe Bool))
it "can correctly find the domain of a map" $ do
dom (A (accumArray (curry snd) Nothing (0, 2^16 - 1) [(5, Just 'd'), (6, Just 'e')])) `shouldBe` Bag.Bag [5, 6]
it "can correctly find the domain of an empty map" $ do
dom (empty :: Map Word16 (Maybe String)) `shouldBe` (Bag.empty :: Bag.Bag Word16)
it "can correctly find the domain of a map with only null values" $ do
dom (single (5, Nothing)) `shouldBe` (Bag.empty :: Bag.Bag Word16)
it "can correctly find the codomain of a map" $ do
cod (A (accumArray (curry snd) Nothing (0, 2^16 - 1) [(5, Just 'd'), (6, Just 'e')])) `shouldBe` Bag.Bag [Just 'd', Just 'e']
it "can correctly find the codomain of an empty map" $ do
cod (empty :: Map Word16 (Maybe Bool)) `shouldBe` (Bag.empty :: Bag.Bag (Maybe Bool))
it "can correctly find the codomain of a map with only null elements" $ do
cod (single ((5 :: Word16), (Nothing :: Maybe Int))) `shouldBe` (Bag.empty :: Bag.Bag (Maybe Int))
it "can lookup a singleton with a present key" $ do
Data.Key.lookup (single (5 :: Word16, Just 3)) 5 `shouldBe` Just 3
it "can lookup a singleton with a key not present" $ do
Data.Key.lookup (single (5 :: Word16, Just 3)) 4 `shouldBe` (Pointed.null :: Maybe Int)
it "can lookup an empty map" $ do
Data.Key.lookup (empty :: Map Word16 (Bag.Bag Int)) 2 `shouldBe` (Pointed.null :: Bag.Bag Int)
it "can create a map from a bag of key value pairs with distinct indices" $ do
Data.Key.index (Bag.Bag [(1, Just '1'), (2, Just '2')]) `shouldBe` A (accumArray (curry snd) (Bag.empty :: Bag.Bag (Maybe Char)) (0, 2^16 - 1) [(1, Bag.single (Just '1')), (2, Bag.single (Just '2'))])
it "can create a map from a bag of key value pairs with shared indices" $ do
Data.Key.index (Bag.Bag [(1, Just '1'), (2, Just '2'), (1, Just '3'), (2, Just '4')]) `shouldBe` A (accumArray (curry snd) (Bag.empty :: Bag.Bag (Maybe Char)) (0, 2^16 - 1) [(1, Bag.Bag [Just '1', Just '3']), (2, Bag.Bag [Just '2', Just '4'])])
it "can index an empty bag" $ do
Data.Key.index (Bag.empty :: Bag.Bag (Word16, Maybe Char)) `shouldBe` (empty :: Map Word16 (Bag.Bag (Maybe Char)))
--
it "can unindex a map with singleton bag values" $ do
Data.Key.unindex (A (accumArray (curry snd) (Bag.empty :: Bag.Bag (Maybe Char)) (0, 2^16 - 1) [(1, Bag.single (Just '1')), (2, Bag.single (Just '2'))])) `shouldBe` Bag.Bag [(1, Just '1'), (2, Just '2')]
it "can unindex a map with bags with multiple values" $ do
Data.Key.unindex (A (accumArray (curry snd) (Bag.empty :: Bag.Bag (Maybe Char)) (0, 2^16 - 1) [(1, Bag.Bag [Just '1', Just '1']), (2, Bag.Bag [Just '2', Just '4'])])) `shouldBe` (Bag.Bag [(1, Just '1'), (2, Just '2'), (1, Just '1'), (2, Just '4')])
it "can index unindex an empty map" $ do
Data.Key.unindex (empty :: Map Word16 (Bag.Bag (Maybe Char))) `shouldBe` (Bag.empty :: Bag.Bag (Word16, Maybe Char))
it "can reduce a map with singleton bags" $ do
Data.Key.reduce (A (accumArray (curry snd) (Bag.empty :: Bag.Bag Bool) (0, 2^16 - 1) [(1, Bag.single (True)), (2, Bag.single (False))])) `shouldBe` Bag.Bag [True, False]
it "can reduce a map with multiple value bags bags" $ do
Data.Key.reduce (A (accumArray (curry snd) (Bag.empty :: Bag.Bag Int) (0, 2^16 - 1) [(1, Bag.Bag [1, 2]), (2, Bag.Bag [2, 5, 6])])) `shouldBe` Bag.Bag [1, 2, 2, 5, 6]
it "can reduce an empty map" $ do
(Data.Key.empty :: Map Word16 (Bag.Bag Int)) `shouldBe` (empty :: Map Word16 (Bag.Bag Int))
9 changes: 9 additions & 0 deletions test/Data/PointedSetSpec.hs
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,12 @@ spec = do
Pointed.isNull (Nothing :: Maybe Int) `shouldBe` True
it "does not interpret Just as Null" $ do
Pointed.isNull (Just 3) `shouldBe` False
describe "Product PointedSet" $ do
it "has (null, null) as its null value" $ do
(Pointed.null :: (Maybe Int, Maybe Char)) `shouldBe` (Nothing, Nothing)
it "detects (null, null) as null" $ do
Pointed.isNull ((Nothing :: Maybe Int), (Nothing :: Maybe Char)) `shouldBe` True
it "does not detect a pair as null if its first element is not null" $ do
Pointed.isNull ((Just 3), (Nothing :: Maybe Char)) `shouldBe` False
it "does not detect a pair as null if its second element is not null" $ do
Pointed.isNull ((Nothing :: Maybe Int), (Just 'a')) `shouldBe` False
46 changes: 40 additions & 6 deletions test/Database/BagSpec.hs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@ import Test.Hspec
import qualified Database.Bag as DB
import qualified Data.Bag as Bag
import Data.Monoid
import Database.Bag (equijoinWithCp)
import Database.Bag (productEquijoin, indexedEquijoin)
import Data.Key as Map
import Data.Array
import Data.Word

type Name = String
data Person = Person {firstName :: Name, lastName :: Name} deriving (Show, Eq)
Expand All @@ -16,6 +19,15 @@ lastNameJoin = Bag.Bag
, (Person "Jane" "Doe", Person "John" "Doe")
, (Person "John" "Smith", Person "John" "Smith") ]

data NameNum = NN {name :: Name, nums :: Int} deriving (Show, Eq)
namenums = Bag.Bag [NN "John" 12, NN "Jane" 12, NN "John" 18]
namenumjoins = Bag.Bag
[ (NN "John" 12, NN "John" 12)
, (NN "John" 12, NN "Jane" 12)
, (NN "Jane" 12, NN "Jane" 12)
, (NN "Jane" 12, NN "John" 12)
, (NN "John" 18, NN "John" 18) ]

type OrderId = Int
type OrderPrice = Float
type Item = String
Expand Down Expand Up @@ -57,6 +69,12 @@ orderItems3 = Bag.Bag
, OrderItem "Milk" 2 2
, OrderItem "Banana" 2 12
, OrderItem "Chocolate" 3 5]
orderItems3kvps =
[ (1, Bag.Bag [OrderItem "Apple" 1 11, OrderItem "Orange" 1 5, OrderItem "Peach" 1 12])
, (2, Bag.Bag [OrderItem "Milk" 2 2, OrderItem "Banana" 2 12])
, (3, Bag.Bag [OrderItem "Chocolate" 3 5])
]
orderItems3Array = A (accumArray (curry Bag.union) (Bag.empty :: Bag.Bag OrderItem) (0, 2^16 - 1) orderItems3kvps)
orderJoin3 = Bag.Bag -- orderPrices join OrderItems by id
[ (OrderInvoice 1 25.50, OrderItem "Apple" 1 11)
, (OrderInvoice 1 25.50, OrderItem "Orange" 1 5)
Expand Down Expand Up @@ -105,13 +123,29 @@ spec = do
(getAny . DB.aggregate) (Bag.Bag [Any True, Any False]) `shouldBe` True
it "can correctly aggregate a table with multiplicities" $ do
DB.aggregate (Bag.Bag [1, 1, 1, 1, 2] :: Bag.Bag (Sum Int)) `shouldBe` 6
describe "Database.Bag equijoinWithCp" $ do
describe "Database.Bag productEquijoin" $ do
it "can join two tables with at most one matching element" $ do
productEquijoin invoiceId orderId (orderPrices1, orderItems1) `shouldBe` orderJoin1
it "can join two tables with more than one matching elements,\
\ only multiple in one table" $ do
productEquijoin invoiceId orderId (orderPrices3, orderItems3) `shouldBe` orderJoin3
it "can join two tables with no matching elements" $ do
productEquijoin invoiceId orderId (orderPrices2, orderItems2) `shouldBe` Bag.empty
it "can join two tables with multiple elements in both tables" $ do
productEquijoin lastName lastName (people, people) `shouldBe` lastNameJoin
describe "indexBy" $ do
it "can correctly index with trivial key" $ do
DB.indexBy (const ()) people `shouldBe` Map.Lone people
it "can correctly index an empty bag" $ do
DB.indexBy (const ()) (DB.empty :: DB.Table Int) `shouldBe` (Map.empty :: Map () (Bag.Bag Int))
it "can correctly index a bag with a repeated index" $ do
(DB.indexBy (fromIntegral . orderId) orderItems3 :: Map.Map Word16 (Bag.Bag OrderItem)) `shouldBe` orderItems3Array
it "can join two tables with at most one matching element" $ do
equijoinWithCp invoiceId orderId (orderPrices1, orderItems1) `shouldBe` orderJoin1
indexedEquijoin (fromIntegral . invoiceId :: OrderInvoice -> Word16) (fromIntegral . orderId) (orderPrices1, orderItems1) `shouldBe` orderJoin1
it "can join two tables with more than one matching elements,\
\ only multiple in one table" $ do
equijoinWithCp invoiceId orderId (orderPrices3, orderItems3) `shouldBe` orderJoin3
indexedEquijoin (fromIntegral . invoiceId :: OrderInvoice -> Word16) (fromIntegral . orderId) (orderPrices3, orderItems3) `shouldBe` orderJoin3
it "can join two tables with no matching elements" $ do
equijoinWithCp invoiceId orderId (orderPrices2, orderItems2) `shouldBe` Bag.empty
indexedEquijoin (fromIntegral . invoiceId :: OrderInvoice -> Word16) (fromIntegral . orderId) (orderPrices2, orderItems2) `shouldBe` Bag.empty
it "can join two tables with multiple elements in both tables" $ do
equijoinWithCp lastName lastName (people, people) `shouldBe` lastNameJoin
indexedEquijoin (fromIntegral . nums :: NameNum -> Word16) (fromIntegral . nums) (namenums, namenums) `shouldBe` namenumjoins

0 comments on commit eb13653

Please sign in to comment.