From 7f9a305bd282839250ca1ff624b867d595d7c09c Mon Sep 17 00:00:00 2001 From: David van Balen Date: Fri, 24 May 2024 11:04:37 +0200 Subject: [PATCH] fix linear index in workstealing --- .../Array/Accelerate/LLVM/Native/CodeGen.hs | 15 +---- .../Accelerate/LLVM/Native/CodeGen/Loop.hs | 26 ++++++-- .../Array/Accelerate/LLVM/Native/Compile.hs | 2 +- .../Array/Accelerate/LLVM/Native/Execute.hs | 2 +- .../Array/Accelerate/LLVM/Native/Operation.hs | 1 + accelerate-llvm-native/test/nofib/Main.hs | 66 ++++++++++++++++--- 6 files changed, 84 insertions(+), 28 deletions(-) diff --git a/accelerate-llvm-native/src/Data/Array/Accelerate/LLVM/Native/CodeGen.hs b/accelerate-llvm-native/src/Data/Array/Accelerate/LLVM/Native/CodeGen.hs index 80f38af77..9f3611c25 100644 --- a/accelerate-llvm-native/src/Data/Array/Accelerate/LLVM/Native/CodeGen.hs +++ b/accelerate-llvm-native/src/Data/Array/Accelerate/LLVM/Native/CodeGen.hs @@ -166,17 +166,7 @@ codegen name env (Clustered c b) args = (\(OP_Pair i l) -> fmap (toR . snd) . runStateT (body (d, l, i:outerI)) . fromR) -flipShape :: forall sh. ShapeR sh -> Operands sh -> Operands sh -flipShape shr = multidim shr . reverse . multidim' shr -multidim :: ShapeR sh -> [Operands Int] -> Operands sh -multidim ShapeRz [] = OP_Unit -multidim (ShapeRsnoc shr) (i:is) = OP_Pair (multidim shr is) i -multidim _ _ = error "shouldn't have trusted me" - -multidim' :: ShapeR sh -> Operands sh -> [Operands Int] -multidim' ShapeRz OP_Unit = [] -multidim' (ShapeRsnoc shr) (OP_Pair sh i) = i : multidim' shr sh -- We use some unsafe coerces in the context of the accumulators. -- Some, in this function, are very local. Others, like in evalOp, @@ -280,10 +270,11 @@ instance EvalOp NativeOp where i <- intOfIndex shr2 sh' sh2 readBuffer tp TypeInt (aprjBuffer (unsafeCoerce buf) gamma) (op TypeInt i) | otherwise = pure CN - readInput tp _ (TupRsingle buf) gamma a (_,i,_) = -- assuming no bp, and I'll just make a read at every depth? + readInput tp _ (TupRsingle buf) gamma a (_,i,_) = error "here" + -- assuming no bp, and I'll just make a read at every depth? -- lift $ CJ . ir tp <$> readBuffer tp TypeInt (aprjBuffer (unsafeCoerce buf) gamma) (op TypeInt i) -- second attempt, the above segfaults: never read instead - pure CN + -- pure CN -- also segfaults :( {- weird: this implies that a is a `IsUnit`, but it happens on Int error $ show tp <> case buf of diff --git a/accelerate-llvm-native/src/Data/Array/Accelerate/LLVM/Native/CodeGen/Loop.hs b/accelerate-llvm-native/src/Data/Array/Accelerate/LLVM/Native/CodeGen/Loop.hs index 76b143d51..a5804cd65 100644 --- a/accelerate-llvm-native/src/Data/Array/Accelerate/LLVM/Native/CodeGen/Loop.hs +++ b/accelerate-llvm-native/src/Data/Array/Accelerate/LLVM/Native/CodeGen/Loop.hs @@ -82,25 +82,29 @@ imapNestFromTo shr start end extent body = loopWorkFromTo :: ShapeR sh -> Operands sh -> Operands sh -> Operands sh -> TypeR s -> (LoopWork sh (StateT (Operands s) (CodeGen Native)),StateT (Operands s) (CodeGen Native) ()) -> StateT (Operands s) (CodeGen Native) () loopWorkFromTo shr start end extent tys (loopwork,finish) = do - linix <- lift (intOfIndex shr extent start) + -- linix <- lift (intOfIndex shr (flipShape shr extent) (flipShape shr start)) + let linix = liftInt 0 loopWorkFromTo' shr start end extent linix [] tys loopwork finish loopWorkFromTo' :: ShapeR sh -> Operands sh -> Operands sh -> Operands sh -> Operands Int -> [Operands Int] -> TypeR s -> LoopWork sh (StateT (Operands s) (CodeGen Native)) -> StateT (Operands s) (CodeGen Native) () loopWorkFromTo' ShapeRz OP_Unit OP_Unit OP_Unit _ _ _ LoopWorkZ = pure () -loopWorkFromTo' (ShapeRsnoc shr) (OP_Pair start' start) (OP_Pair end' end) (OP_Pair extent' _) linix ixs tys (LoopWorkSnoc lw foo) = do +loopWorkFromTo' (ShapeRsnoc shr) (OP_Pair start' start) (OP_Pair end' end) (OP_Pair extent' _) linixprev ixs tys (LoopWorkSnoc lw foo) = do + linix <- lift $ add numType start linixprev StateT $ \s -> ((),) <$> Loop.iter (TupRpair typerInt typerInt) tys (OP_Pair start linix) s - (\(OP_Pair i _) -> lt singleType i end) + (\(OP_Pair i _) -> lt singleType i end + ) (\(OP_Pair i l) -> OP_Pair <$> add numType (constant typerInt 1) i <*> add numType (constant typerInt 1) l) (\(OP_Pair i l) -> execStateT $ do - recurlinix <- lift $ mul numType l $ firstOrZero shr extent' + recurlinix <- lift $ mul numType l $ firstOrZero shr extent' -- if extent' is empty, this won't be used anyway loopWorkFromTo' shr start' end' extent' recurlinix (i:ixs) tys lw - foo l (i : ixs)) + foo l (i : ixs) + ) firstOrZero :: ShapeR sh -> Operands sh -> Operands Int @@ -381,6 +385,18 @@ data LoopWork sh m where -> LoopWork (sh, Int) m +flipShape :: forall sh. ShapeR sh -> Operands sh -> Operands sh +flipShape shr = multidim shr . reverse . multidim' shr + + +multidim :: ShapeR sh -> [Operands Int] -> Operands sh +multidim ShapeRz [] = OP_Unit +multidim (ShapeRsnoc shr) (i:is) = OP_Pair (multidim shr is) i +multidim _ _ = error "shouldn't have trusted me" + +multidim' :: ShapeR sh -> Operands sh -> [Operands Int] +multidim' ShapeRz OP_Unit = [] +multidim' (ShapeRsnoc shr) (OP_Pair sh i) = i : multidim' shr sh ---- debugging tools ---- putchar :: Operands Int -> CodeGen Native (Operands Int) diff --git a/accelerate-llvm-native/src/Data/Array/Accelerate/LLVM/Native/Compile.hs b/accelerate-llvm-native/src/Data/Array/Accelerate/LLVM/Native/Compile.hs index 8706f0cef..5def25766 100644 --- a/accelerate-llvm-native/src/Data/Array/Accelerate/LLVM/Native/Compile.hs +++ b/accelerate-llvm-native/src/Data/Array/Accelerate/LLVM/Native/Compile.hs @@ -93,7 +93,7 @@ compile uid name module' = do withNativeTargetMachine $ \machine -> withTargetLibraryInfo triple $ \libinfo -> do -- dump llvm - hPutStrLn stderr . T.unpack . decodeUtf8 =<< moduleLLVMAssembly mdl + -- hPutStrLn stderr . T.unpack . decodeUtf8 =<< moduleLLVMAssembly mdl optimiseModule datalayout (Just machine) (Just libinfo) mdl diff --git a/accelerate-llvm-native/src/Data/Array/Accelerate/LLVM/Native/Execute.hs b/accelerate-llvm-native/src/Data/Array/Accelerate/LLVM/Native/Execute.hs index c2661c243..68ce8ab3e 100644 --- a/accelerate-llvm-native/src/Data/Array/Accelerate/LLVM/Native/Execute.hs +++ b/accelerate-llvm-native/src/Data/Array/Accelerate/LLVM/Native/Execute.hs @@ -56,7 +56,7 @@ import Control.Concurrent.MVar import System.IO -- change for debugging schedule execution -don't :: IO a -> IO () +don't :: IO () -> IO () don't f = return () diff --git a/accelerate-llvm-native/src/Data/Array/Accelerate/LLVM/Native/Operation.hs b/accelerate-llvm-native/src/Data/Array/Accelerate/LLVM/Native/Operation.hs index 2ef62bb3d..d000a9a90 100644 --- a/accelerate-llvm-native/src/Data/Array/Accelerate/LLVM/Native/Operation.hs +++ b/accelerate-llvm-native/src/Data/Array/Accelerate/LLVM/Native/Operation.hs @@ -274,6 +274,7 @@ instance MakesILP NativeOp where ( inputConstraints l lIns <> ILP.c (InDir l) .==. ILP.c (OutDir l) <> ILP.c (InDims l) .==. int 1 .+. ILP.c (OutDims l) + <> foldMap (\lin -> fused lin l .==. int 1) lIns <> inrankifmanifest (ShapeRsnoc shr) l) (defaultBounds l) diff --git a/accelerate-llvm-native/test/nofib/Main.hs b/accelerate-llvm-native/test/nofib/Main.hs index 685c98270..5dca7e766 100644 --- a/accelerate-llvm-native/test/nofib/Main.hs +++ b/accelerate-llvm-native/test/nofib/Main.hs @@ -32,11 +32,54 @@ import Control.Concurrent -- import Quickhull main :: IO () main = do - let xs = fromList (Z :. 5) [1 :: Int ..] - let ys = map (+1) $ - use xs - let f = map (*2) - let program = awhile (map (A.>0) . asnd) (\(T2 a b) -> T2 (f a) (map (\x -> x - 1) b)) (T2 ys $ unit $ constant (100000 :: Int)) + -- let xs = fromList (Z :. 5 :. 10) [1 :: Int ..] + -- let ys = map (+1) $ + -- use xs + -- let f = map (*2) + -- let program = awhile (map (A.>0) . asnd) (\(T2 a b) -> T2 (f a) (map (\x -> x - 1) b)) (T2 ys $ unit $ constant (100000 :: Int)) + + -- putStrLn "generate" + -- let zs = generate (Z_ ::. 256 ::. 256) $ \idx -> if Prelude.foldl1 (||) $ Prelude.map (== idx) [I2 (constant a) (constant b) | a <- [205..209], b <- [200..210]] + -- then 1 + -- else if Prelude.foldl1 (||) $ Prelude.map (== idx) [I2 (constant a) (constant b) | a <- [250..259], b <- [250..260]] + -- then -1 + -- else 0 :: Exp Double + + -- putStrLn $ test @UniformScheduleFun @NativeKernel zs + -- print $ run @Native zs + + -- let negatives = [ + -- I3 211 154 98, + -- I3 102 138 112, + -- I3 101 156 59, + -- I3 17 205 32, + -- I3 92 63 205, + -- I3 199 7 203, + -- I3 250 170 157, + -- I3 82 184 255, + -- I3 154 162 36, + -- I3 223 42 240] + -- positives = [ + -- I3 57 120 167, + -- I3 5 118 175, + -- I3 176 246 164, + -- I3 45 194 234, + -- I3 212 7 248, + -- I3 115 123 207, + -- I3 202 83 209, + -- I3 203 18 198, + -- I3 243 172 14, + -- I3 54 209 40] + + let zs = generate (Z_ ::. constant 15 ::. constant 15 ::. constant 11) $ \(I3 x y z) -> T3 x y z + -- cond (Prelude.foldl1 (||) $ Prelude.map (== idx) negatives) + -- (-1) + -- $ cond (Prelude.foldl1 (||) $ Prelude.map (==idx) positives) + -- 1 + -- 0 :: Exp Double + -- let zs' = zs $ use $ fromList Z [11 :: Int] + putStrLn $ test @UniformScheduleFun @NativeKernel zs + print $ run @Native zs -- putStrLn "scan:" -- let f = @@ -85,10 +128,15 @@ main = do -- print $ runN @Native f xs -- print $ runN @Native (f ys) - putStrLn "fold:" - let f = fold1 (+) ys - putStrLn $ test @UniformScheduleFun @NativeKernel f - print $ run @Native f + -- putStrLn "fold:" + -- let f = fold1 (+) ys + -- putStrLn $ test @UniformScheduleFun @NativeKernel f + -- print $ run @Native f + + -- putStrLn "stencil:" + -- let f = stencil (\(a :: Exp Int,b,c) -> a+b+c) mirror ys + -- putStrLn $ test @UniformScheduleFun @NativeKernel f + -- print $ run @Native f -- putStrLn "scan:" -- let f = scanl1 (+) ys