Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add function for generating Float/Double in [0,1] range #149

Merged
merged 11 commits into from
Jun 15, 2020
54 changes: 54 additions & 0 deletions bench/Main.hs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
module Main (main) where

import Control.Monad
import Control.Monad.State.Strict
import Data.Int
import Data.Proxy
import Data.Typeable
Expand Down Expand Up @@ -185,6 +186,59 @@ main = do
!range = (1, n - 1)
in pureUniformRBench @Natural range sz
]
, bgroup "floating"
[ bgroup "IO"
[ bench "uniformFloat01M" $ nfIO $ runStateGenT_ (mkStdGen 1337) $ \g ->
replicateM_ sz $ do !_ <- uniformFloat01M g
Comment on lines +191 to +192
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible to make this more generic, i.e. to run all benchmarks with runStateGenT_ rather than just this specific one? I would really like us to have a full benchmark matrix.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Surely it's possible. I however would prefer to do it in separate PR in order to one thing at a time

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cool. Filed #161.

return ()
, bench "uniformFloatPositive01M" $ nfIO $ runStateGenT_ (mkStdGen 1337) $ \g ->
replicateM_ sz $ do !_ <- uniformFloatPositive01M g
return ()
, bench "uniformDouble01M" $ nfIO $ runStateGenT_ (mkStdGen 1337) $ \g ->
replicateM_ sz $ do !_ <- uniformDouble01M g
return ()
, bench "uniformDoublePositive01M" $ nfIO $ runStateGenT_ (mkStdGen 1337) $ \g ->
replicateM_ sz $ do !_ <- uniformDoublePositive01M g
return ()
]
--
, bgroup "St"
[ bench "uniformFloat01M" $ nf
(\n -> runStateGen_ (mkStdGen 1337) $ \g -> replicateM_ n $ do !_ <- uniformFloat01M g
return ()
) sz
, bench "uniformFloatPositive01M" $ nf
(\n -> runStateGen_ (mkStdGen 1337) $ \g -> replicateM_ n $ do !_ <- uniformFloatPositive01M g
return ()
) sz
, bench "uniformDouble01M" $ nf
(\n -> runStateGen_ (mkStdGen 1337) $ \g -> replicateM_ n $ do !_ <- uniformDouble01M g
return ()
) sz
, bench "uniformDoublePositive01M" $ nf
(\n -> runStateGen_ (mkStdGen 1337) $ \g -> replicateM_ n $ do !_ <- uniformDoublePositive01M g
return ()
) sz
]
, bgroup "pure"
[ let !stdGen = mkStdGen 1337
in bench "uniformFloat01M" $ nf
(genMany (runState $ uniformFloat01M (StateGenM @StdGen)) stdGen)
sz
, let !stdGen = mkStdGen 1337
in bench "uniformFloatPositive01M" $ nf
(genMany (runState $ uniformFloatPositive01M (StateGenM @StdGen)) stdGen)
sz
, let !stdGen = mkStdGen 1337
in bench "uniformDouble01M" $ nf
(genMany (runState $ uniformDouble01M (StateGenM @StdGen)) stdGen)
sz
, let !stdGen = mkStdGen 1337
in bench "uniformDoublePositive01M" $ nf
(genMany (runState $ uniformDoublePositive01M (StateGenM @StdGen)) stdGen)
sz
]
]
, bgroup "ShortByteString"
[ env (pure genLengths) $ \ ~(ns, gen) ->
bench "genShortByteString" $
Expand Down
1 change: 1 addition & 0 deletions random.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -168,5 +168,6 @@ benchmark bench
build-depends:
base -any,
gauge >=0.2.3 && <0.3,
mtl,
random -any,
splitmix >=0.1 && <0.2
64 changes: 48 additions & 16 deletions src/System/Random/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ module System.Random.Internal
, Uniform(..)
, UniformRange(..)
, uniformByteString
, uniformDouble01M
, uniformDoublePositive01M
, uniformFloat01M
, uniformFloatPositive01M

-- * Generators for sequences of pseudo-random bytes
, genShortByteStringIO
Expand Down Expand Up @@ -709,34 +713,62 @@ instance UniformRange Bool where
-- | See /Floating point number caveats/ in "System.Random.Stateful".
instance UniformRange Double where
uniformRM (l, h) g = do
w64 <- uniformWord64 g
let x = word64ToDoubleInUnitInterval w64
x <- uniformDouble01M g
return $ (h - l) * x + l

-- | Turns a given uniformly distributed 'Word64' value into a uniformly
-- distributed 'Double' value in the range [0, 1].
word64ToDoubleInUnitInterval :: Word64 -> Double
word64ToDoubleInUnitInterval w64 = d / m
-- | Generates uniformly distributed 'Double' in the range \([0, 1]\).
-- Numbers are generated by generating uniform 'Word64' and dividing
-- it by \(2^{64}\). It's used to implement 'UniformR' instance for
-- 'Double'.
--
-- @since 1.2.0
uniformDouble01M :: StatefulGen g m => g -> m Double
uniformDouble01M g = do
w64 <- uniformWord64 g
return $ fromIntegral w64 / m
where
d = fromIntegral w64 :: Double
m = fromIntegral (maxBound :: Word64) :: Double
{-# INLINE word64ToDoubleInUnitInterval #-}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have you run the benchmarks on this? I have a feeling that uniformDouble01M might need to be INLINE to avoid a regression, but I'm not sure.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. No change at all. I played a bit with strictness but GHC but I wasn't able to find any measurable change.


-- | Generates uniformly distributed 'Double' in the range
-- \((0, 1]\). Number is generated by adding \(2^{-32}/2\) to the
-- evaluation result of 'uniformDouble01M'.
--
-- @since 1.2.0
uniformDoublePositive01M :: StatefulGen g m => g -> m Double
uniformDoublePositive01M g = (+ d) <$> uniformDouble01M g
where
-- We add small constant to shift generated value from zero. It's
-- selected as 1/2 of smallest possible nonzero value
d = 2.710505431213761e-20 -- 2**(-65)
Comment on lines +741 to +743
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the largest value that can be added to 1.0 such that the result is still 1.0. There are many smaller values that also have this property and that would also result in never getting 0 as a result of this function. My guess is that the goal here was to avoid "excessively small values" rather than just avoiding zero, is that correct? If so, I would suggest extending the description a little to justify why this is 2^-64 and not e.g. 5e-324, which would also avoid getting zero but would not exclude very small numbers from being returned.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It' good question.

No. It's 1/2 of smallest non zero value that this algorithm could generate. Largest ε that 1+ε=1 is 2^{-53}. Choice is somewhat arbitrary. Another possibility is to pick ε*2^{-64}. That is largest number that will leave all other values unchanged.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, thanks for the correction.

I guess the question is: under what circumstances do people want to avoid getting a zero here, and under those circumstances, would they also want to avoid getting extremely small values?

My default here would be to exclude only the zero. Everything else should have a good reason IMO, since excluding more numbers results in a larger effect on the outcome distribution.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When result is fed into function that isn't defined for 0. Most common choices are: log 1/x^n. For log anything nonzero goes. For inverse x it's better to get some distance between underflow and 1/2^64. Denormalized numbers are sort of useless here since 1/x will yield same ∞ as zero.

ε*2^{-64}. looks like good choice since it doesn't change rest of distribution and still quite far from underflows

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

1/2 of smallest possible nonzero value

I don't think that's right. The smallest positive double, or the smallest absolute value that is non-zero, is 5e-324. So "1/2 of smallest possible nonzero value" would be 2.5e-324.

I don't have a very strong opinion on what this number should be, but the comment should be updated.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I can explain that what @Shimuuar suggests is actually very good. Suppose you have floating point numbers with only 2 bits for the mantissa and 2 bits for the exponent then using the proposed algorithm:

word32ToFloatInUnitInterval :: Int -> Float
word32ToFloatInUnitInterval w32 = f / m
  where
    f = fromIntegral w32 :: Float
    m = fromIntegral (maxBound :: Int) :: Float

and manually calculating for our tiny floating point values

map ((/4) . fromIntegral) [0,1,2,3]
[0.0,0.25,0.5,0.75]

The smallest value the RNG algorithm can generate is 0.25 and half of this is 0.125 so we will get

map ((+(0.25/2)) . (/4) . fromIntegral) [0,1,2,3]
[0.125,0.375,0.625,0.875]

which has the nice property that it is symmetrical in the interval. N.B. that these really are available in the floating point representation

normals :: [Double]
normals = [ 1/2^e * (1 + m0 * (1/2^1) + m1 * (1/2^2)) | e <- [2,1], m0 <- [0,1], m1 <- [0,1] ]
[0.25,0.3125,0.375,0.4375,0.5,0.625,0.75,0.875]

so no rounding is happening (that was what I was worried about).

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Of course this nice property doesn't really manifest itself with Double and even Float - you would have to sample a lot of Double to notice the mean is now correct :)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah okay that clears it up.

1/2 of smallest possible nonzero value

"smallest possible" here does not refer to the IEEE 754 encoding but to the division algorithm we use to generate doubles. Cool. Let's put that in the comment.

For concreteness' sake, here is my suggestion:

Add a small constant shift away from zero. This constant is 1/2 of the smallest positive value generated by 'uniformDouble01M'.

(And something similar for Float).


-- | See /Floating point number caveats/ in "System.Random.Stateful".
instance UniformRange Float where
uniformRM (l, h) g = do
w32 <- uniformWord32 g
let x = word32ToFloatInUnitInterval w32
x <- uniformFloat01M g
return $ (h - l) * x + l

-- | Turns a given uniformly distributed 'Word32' value into a uniformly
-- distributed 'Float' value in the range [0,1].
word32ToFloatInUnitInterval :: Word32 -> Float
word32ToFloatInUnitInterval w32 = f / m
-- | Generates uniformly distributed 'Float' in the range \([0, 1]\).
-- Numbers are generated by generating uniform 'Word32' and dividing
-- it by \(2^{32}\). It's used to implement 'UniformR' instance for 'Float'
--
-- @since 1.2.0
uniformFloat01M :: StatefulGen g m => g -> m Float
uniformFloat01M g = do
w32 <- uniformWord32 g
return $ fromIntegral w32 / m
where
f = fromIntegral w32 :: Float
m = fromIntegral (maxBound :: Word32) :: Float
{-# INLINE word32ToFloatInUnitInterval #-}

-- | Generates uniformly distributed 'Float' in the range
-- \((0, 1]\). Number is generated by adding \(2^{-33}\) to
-- the evaluation result of 'uniformFloat01M'.
--
-- @since 1.2.0
uniformFloatPositive01M :: StatefulGen g m => g -> m Float
uniformFloatPositive01M g = (+ d) <$> uniformFloat01M g
where
-- See uniformDoublePositive01M
d = 1.1641532182693481e-10 -- 2**(-33)

-- The two integer functions below take an [inclusive,inclusive] range.
randomIvalIntegral :: (RandomGen g, Integral a) => (a, a) -> g -> (a, g)
Expand Down
4 changes: 4 additions & 0 deletions src/System/Random/Stateful.hs
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,10 @@ module System.Random.Stateful
, genShortByteStringIO
, genShortByteStringST
, uniformByteString
, uniformDouble01M
, uniformDoublePositive01M
, uniformFloat01M
, uniformFloatPositive01M

-- * Appendix

Expand Down