diff --git a/src/Data/Array/Accelerate.hs b/src/Data/Array/Accelerate.hs index 7fafc1120..f87ccb8e1 100644 --- a/src/Data/Array/Accelerate.hs +++ b/src/Data/Array/Accelerate.hs @@ -430,7 +430,8 @@ module Data.Array.Accelerate ( CShort, CUShort, CInt, CUInt, CLong, CULong, CLLong, CULLong, CChar, CSChar, CUChar, - test, testWithObjective, module Data.Array.Accelerate.Backend + test, testWithObjective, testBench, + module Data.Array.Accelerate.Backend ) where import Data.Array.Accelerate.Backend @@ -453,7 +454,7 @@ import Data.Array.Accelerate.Language import Data.Array.Accelerate.Pattern import Data.Array.Accelerate.Pattern.TH import Data.Array.Accelerate.Prelude -import Data.Array.Accelerate.Trafo (test, testWithObjective) +import Data.Array.Accelerate.Trafo (test, testWithObjective, testBench) import Data.Array.Accelerate.Pretty () -- show instances import Data.Array.Accelerate.Smart import Data.Array.Accelerate.Sugar.Array ( Array, Arrays, Scalar, Vector, Matrix, Segments, fromFunction, fromFunctionM, toList, fromList ) diff --git a/src/Data/Array/Accelerate/Trafo.hs b/src/Data/Array/Accelerate/Trafo.hs index cf4da5ee7..4235a7079 100644 --- a/src/Data/Array/Accelerate/Trafo.hs +++ b/src/Data/Array/Accelerate/Trafo.hs @@ -33,7 +33,7 @@ module Data.Array.Accelerate.Trafo ( Function, EltFunctionR, convertExp, convertFun, - test, testWithObjective, convertAccWithObj, convertAfunWithObj, convertAccBench, convertAfunBench, + test, testWithObjective, testBench, convertAccWithObj, convertAfunWithObj, convertAccBench, convertAfunBench, ) where import Data.Array.Accelerate.Sugar.Array ( ArraysR ) @@ -129,6 +129,48 @@ testWithObjective obj f schedule = convertScheduleFun @sched @kernel slvpartitioned +testBench + :: forall sched kernel f. (Afunction f, DesugarAcc (KernelOperation kernel), Operation.SimplifyOperation (KernelOperation kernel), Operation.SLVOperation (KernelOperation kernel), Partitioning.MakesILP (KernelOperation kernel), Pretty.PrettyOp (KernelOperation kernel), Pretty.PrettyKernel kernel, IsSchedule sched, IsKernel kernel, Pretty.PrettySchedule sched, Operation.ShrinkArg (Partitioning.BackendClusterArg (KernelOperation kernel))) + => Benchmarking + -> f + -> String +testBench bench f + = "OriginalAcc:\n" + ++ Pretty.renderForTerminal (Pretty.prettyPreOpenAfun configPlain prettyOpenAcc Empty original) + ++ "\n\nDesugared OperationAcc:\n" + ++ Pretty.renderForTerminal (Pretty.prettyAfun desugared) + ++ "\n\nSimplified OperationAcc:\n" + ++ Pretty.renderForTerminal (Pretty.prettyAfun operation) + ++ "\n\nPartitionedAcc:\n" + ++ Pretty.renderForTerminal (Pretty.prettyAfun partitioned) + ++ "\nSLV'd PartitionedAcc:\n" + ++ Pretty.renderForTerminal (Pretty.prettyAfun slvpartitioned) + ++ "\n\nSchedule:\n" + ++ Pretty.renderForTerminal (Pretty.prettySchedule schedule) + where + operation + = + Operation.simplifyFun $ + Operation.stronglyLiveVariablesFun $ + Operation.simplifyFun $ + desugared + desugared = + desugarAfun @(KernelOperation kernel) + $ original + original = + LetSplit.convertAfun + $ Sharing.convertAfunWith defaultOptions f + + partitioned = + Operation.simplifyFun $ + NewNewFusion.convertAccBenchF bench operation + + slvpartitioned = + Operation.simplifyFun $ + Operation.stronglyLiveVariablesFun partitioned + + schedule = convertScheduleFun @sched @kernel slvpartitioned + @@ -163,12 +205,13 @@ convertAccWith config convertAccBench :: forall sched kernel arrs. - (DesugarAcc (KernelOperation kernel), Operation.SLVOperation (KernelOperation kernel), Operation.SimplifyOperation (KernelOperation kernel), Partitioning.MakesILP (KernelOperation kernel), Pretty.PrettyOp (KernelOperation kernel), IsSchedule sched, IsKernel kernel, Operation.NFData' (Graph.BackendClusterArg (KernelOperation kernel)), Operation.ShrinkArg (Partitioning.BackendClusterArg (KernelOperation kernel))) + (Pretty.PrettySchedule sched, Pretty.PrettyKernel kernel, DesugarAcc (KernelOperation kernel), Operation.SLVOperation (KernelOperation kernel), Operation.SimplifyOperation (KernelOperation kernel), Partitioning.MakesILP (KernelOperation kernel), Pretty.PrettyOp (KernelOperation kernel), IsSchedule sched, IsKernel kernel, Operation.NFData' (Graph.BackendClusterArg (KernelOperation kernel)), Operation.ShrinkArg (Partitioning.BackendClusterArg (KernelOperation kernel))) => NewNewFusion.Benchmarking -> Acc arrs -> sched kernel () (ScheduleOutput sched (DesugaredArrays (ArraysR arrs)) -> ()) convertAccBench b - = phase' "codegen" rnfSchedule convertSchedule + = --(\s -> Debug.Trace.trace (Pretty.renderForTerminal (Pretty.prettySchedule s)) s) . + phase' "codegen" rnfSchedule convertSchedule . phase "partition-live-vars" (Operation.simplify . Operation.stronglyLiveVariables) . phase "array-fusion" (Operation.simplify . NewNewFusion.convertAccBench b) . phase "operation-live-vars" (Operation.simplify . Operation.stronglyLiveVariables) @@ -179,12 +222,13 @@ convertAccBench b convertAfunBench :: forall sched kernel f. - (Afunction f, DesugarAcc (KernelOperation kernel), Operation.SLVOperation (KernelOperation kernel), Operation.SimplifyOperation (KernelOperation kernel), Partitioning.MakesILP (KernelOperation kernel), Pretty.PrettyOp (KernelOperation kernel), IsSchedule sched, IsKernel kernel, Operation.NFData' (Graph.BackendClusterArg (KernelOperation kernel)), Operation.ShrinkArg (Partitioning.BackendClusterArg (KernelOperation kernel))) + (Pretty.PrettySchedule sched, Pretty.PrettyKernel kernel, Afunction f, DesugarAcc (KernelOperation kernel), Operation.SLVOperation (KernelOperation kernel), Operation.SimplifyOperation (KernelOperation kernel), Partitioning.MakesILP (KernelOperation kernel), Pretty.PrettyOp (KernelOperation kernel), IsSchedule sched, IsKernel kernel, Operation.NFData' (Graph.BackendClusterArg (KernelOperation kernel)), Operation.ShrinkArg (Partitioning.BackendClusterArg (KernelOperation kernel))) => NewNewFusion.Benchmarking -> f -> sched kernel () (Scheduled sched (DesugaredAfun (ArraysFunctionR f))) convertAfunBench b - = phase' "codegen" rnfSchedule convertScheduleFun + = --(\s -> Debug.Trace.trace (Pretty.renderForTerminal (Pretty.prettySchedule s)) s) . + phase' "codegen" rnfSchedule convertScheduleFun . phase "partition-live-vars" (Operation.simplifyFun . Operation.stronglyLiveVariablesFun) . phase "array-fusion" (Operation.simplifyFun . NewNewFusion.convertAccBenchF b) . phase "operation-live-vars" (Operation.simplifyFun . Operation.stronglyLiveVariablesFun) diff --git a/src/Data/Array/Accelerate/Trafo/Partitioning/ILP.hs b/src/Data/Array/Accelerate/Trafo/Partitioning/ILP.hs index 4a3fc1e74..c43b48406 100644 --- a/src/Data/Array/Accelerate/Trafo/Partitioning/ILP.hs +++ b/src/Data/Array/Accelerate/Trafo/Partitioning/ILP.hs @@ -118,7 +118,7 @@ greedyFusion' k1 k2 s obj acc = fusedAcc go n info -- loop over all fusible edges. Try to set the current one to fused, if there's still a legal solution, keep it fused and continue. | n >= nedges = info | otherwise = let - i:->j = graph^.fusibleEdges&Set.elemAt n + i:->j = graph^.fusibleEdges&Set.elemAt (nedges - n - 1) info'' = info&constr<>~(fused i j .==. int 0) in go (n+1) $ if check info'' then info'' else info check :: Information op -> Bool diff --git a/src/Data/Array/Accelerate/Trafo/Partitioning/ILP/MIP.hs b/src/Data/Array/Accelerate/Trafo/Partitioning/ILP/MIP.hs index 5177fff29..3c830d9f4 100644 --- a/src/Data/Array/Accelerate/Trafo/Partitioning/ILP/MIP.hs +++ b/src/Data/Array/Accelerate/Trafo/Partitioning/ILP/MIP.hs @@ -11,6 +11,7 @@ {-# LANGUAGE ViewPatterns #-} {-# LANGUAGE TupleSections #-} {-# LANGUAGE InstanceSigs #-} +{-# LANGUAGE LambdaCase #-} module Data.Array.Accelerate.Trafo.Partitioning.ILP.MIP ( -- Exports default paths to 6 solvers, as well as an instance to ILPSolver for all of them @@ -44,7 +45,7 @@ instance (MakesILP op, MIP.IsSolver s IO) => ILPSolver s op where solve s (ILP dir obj constr bnds n) = makeSolution names . addZeroes problem <$> MIP.solve s options problem where options = def { MIP.solveTimeLimit = Just 60 - , MIP.solveLogger = const (pure ()) --putStrLn . ("AccILPSolver: " ++) + , MIP.solveLogger = const $ pure () -- putStrLn . ("AccILPSolver: " ++) , MIP.solveErrorLogger = putStrLn . ("AccILPSolverError: " ++) } --, MIP.solveCondensedSolution = False } @@ -53,7 +54,13 @@ instance (MakesILP op, MIP.IsSolver s IO) => ILPSolver s op where <$> (Problem (Just "AccelerateILP") <$> (mkFun dir <$> expr n obj) <*> cons n constr <*> pure [] <*> pure []) <*> (bounds bnds >>= finishBounds) <*> vartypes -- If any variables are not given a type, they won't get fixed by `solveCondensedSoluton` (and I'm also not sure whether Integer is the default). - (problem, (names,_)) = runState stateProblem ((mempty, mempty),"") + (problem', (names,_)) = runState stateProblem ((mempty, mempty),"") + -- add empty constraints for variables that are only in bounds + problem = case problem' of + Problem name o c _ _ b t -> Problem name o (c <> [MIP.constExpr (negate $ fromIntegral n) MIP..<=. MIP.varExpr x | x <- M.keys b, x `notElem` varsOf c]) [] [] b t + varsOf :: [MIP.Constraint Scientific] -> [MIP.Var] + varsOf = concatMap $ concatMap (\(Term _ vs)->vs) . (\(Expr ts)->ts) . MIP.constrExpr + mkFun Maximise = ObjectiveFunction (Just "AccelerateObjective") OptMax mkFun Minimise = ObjectiveFunction (Just "AccelerateObjective") OptMin diff --git a/src/Data/Array/Accelerate/Trafo/Partitioning/ILP/Solve.hs b/src/Data/Array/Accelerate/Trafo/Partitioning/ILP/Solve.hs index cd5e6f221..aa5e41340 100644 --- a/src/Data/Array/Accelerate/Trafo/Partitioning/ILP/Solve.hs +++ b/src/Data/Array/Accelerate/Trafo/Partitioning/ILP/Solve.hs @@ -43,6 +43,7 @@ data Objective | ArrayReadsWrites | IntermediateArrays | FusedEdges + | Everything deriving (Show, Bounded, Enum) @@ -89,6 +90,7 @@ makeILP obj (Info ArrayReadsWrites -> (Minimise, numberOfArrayReadsWrites) IntermediateArrays -> (Minimise, numberOfManifestArrays) FusedEdges -> (Minimise, numberOfUnfusedEdges) + Everything -> (Minimise, numberOfClusters .+. numberOfArrayReadsWrites) -- arrayreadswrites already indictly includes everything else -- objective function that maximises the number of edges we fuse, and minimises the number of array reads if you ignore horizontal fusion @@ -146,6 +148,7 @@ makeILP obj (Info numberOfClusters = c (Other "maximumClusterNumber") -- removing this from myConstraints makes the ILP slightly smaller, but disables the use of this cost function numberOfClustersConstraint = case obj of NumClusters -> foldMap (\l -> pi l .<=. numberOfClusters) nodes + Everything -> foldMap (\l -> pi l .<=. numberOfClusters) nodes _ -> mempty -- attempt at execpi: