diff --git a/CHANGELOG.md b/CHANGELOG.md index f205976..2a6df14 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,22 @@ Changelog ========= +1.2.0 +----- + +#### Front End +- `VectorTile` now holds a lazy `Map` internally, instead of a `Vector`. Use +the `ix` Lens, giving it a layer name, to hone in on individual Layers +quickly. +- Users an optionally use the new `fromProtobuf` function directly from +`Geography.VectorTile.Protobuf` if they wish to be semantically explicit +about the backend conversion. Otherwise, they can still use the top-level +`tile` function. + +#### Back End +- Reworked internals for Protobuf conversion. Most code moved to an +`Internal` module. + 1.1.1 ----- - Removed the `StrictData` pragma. Turns out laziness is faster. diff --git a/Geography/VectorTile/Protobuf/Internal.hs b/Geography/VectorTile/Protobuf/Internal.hs index 91f532f..4430656 100644 --- a/Geography/VectorTile/Protobuf/Internal.hs +++ b/Geography/VectorTile/Protobuf/Internal.hs @@ -89,9 +89,11 @@ class Protobuffable a where toProtobuf :: a -> Protobuf a instance Protobuffable VT.VectorTile where - fromProtobuf = fmap (VT.VectorTile . V.fromList) . mapM fromProtobuf . getField . _layers + fromProtobuf raw = do + ls <- mapM fromProtobuf . getField $ _layers raw + pure . VT.VectorTile . M.fromList $ map (\l -> (VT._name l, l)) ls - toProtobuf vt = RawVectorTile { _layers = putField . V.toList . V.map toProtobuf $ VT._layers vt } + toProtobuf vt = RawVectorTile { _layers = putField . map toProtobuf . M.elems $ VT._layers vt } instance Protobuffable VT.Layer where fromProtobuf l = do diff --git a/Geography/VectorTile/VectorTile.hs b/Geography/VectorTile/VectorTile.hs index 8939b81..dd84548 100644 --- a/Geography/VectorTile/VectorTile.hs +++ b/Geography/VectorTile/VectorTile.hs @@ -45,15 +45,13 @@ import Geography.VectorTile.Geometry --- --- | A high-level representation of a Vector Tile. At its simplest, a tile --- is just a list of `Layer`s. --- --- There is potential to implement `_layers` as a `M.Map`, with its String-based --- `name` as a key. -newtype VectorTile = VectorTile { _layers :: V.Vector Layer } deriving (Eq,Show,Generic) +-- | A high-level representation of a Vector Tile. Implemented internally +-- as a `M.Map`, so that access to individual layers can be fast if you +-- know the layer names ahead of time. +newtype VectorTile = VectorTile { _layers :: M.Map Text Layer } deriving (Eq,Show,Generic) --- | > Lens' VectorTile (Vector Layer) -layers :: Functor f => (V.Vector Layer -> f (V.Vector Layer)) -> VectorTile -> f VectorTile +-- | > Lens' VectorTile (Map Text Layer) +layers :: Functor f => (M.Map Text Layer -> f (M.Map Text Layer)) -> VectorTile -> f VectorTile layers f v = VectorTile <$> f (_layers v) {-# INLINE layers #-} diff --git a/README.md b/README.md index c3ff683..ad37069 100644 --- a/README.md +++ b/README.md @@ -75,7 +75,8 @@ You can run the benchmarks with `stack bench`, provided you have [the stack tool](http://docs.haskellstack.org/en/stable/README/). The following results are from a 2016 Lenovo ThinkPad Carbon X1 with an Intel Core i7 processor, comparing this library with a [Python library of similar -functionality](https://github.com/mapzen/mapbox-vector-tile). +functionality](https://github.com/mapzen/mapbox-vector-tile). All +benchmarking code is available in the `bench` directory. *Note: 1 ms = 1000 μs* @@ -83,9 +84,9 @@ functionality](https://github.com/mapzen/mapbox-vector-tile). | | One Point | One LineString | One Polygon | roads.mvt (40kb, 15 layers) | --- | --- | --- | --- | --- | -| CPython 3.5.2 | 59 μs | 69 μs | 82 μs | 73 ms | -| PyPy 5.3 | 115 μs | 213 μs | 212 μs | 11.4 ms | -| Haskell | 3.6 μs | 4.7 μs | 5.7 μs | 16.6 ms +| CPython 3.5.2 | 63 μs | 70 μs | 84 μs | 76 ms | +| PyPy 5.3 | 116 μs | 210 μs | 211 μs | 12 ms | +| Haskell | 3.6 μs | 5 μs | 5.8 μs | 17.1 ms *The Haskell times are measuring data evaluation to their Normal Form (fully evaluated form).* @@ -94,22 +95,22 @@ evaluated form).* ##### Encoding -| | One Point | One LineString | One Polygon | roads.mvt (40kb, 15 layers) +| | One Point | One LineString | One Polygon | roads.mvt | --- | --- | --- | --- | --- | -| CPython 3.5.2 | 212 μs | 268 μs | 667 μs | N/A | -| Haskell | 3.1 μs | 3.8 μs | 4.5 μs | 10 ms +| CPython 3.5.2 | 218 μs | 278 μs | 703 μs | N/A | +| Haskell | 3.2 μs | 4.4 μs | 5 μs | 11.1 ms *Certain encoding benchmarks for Python were not possible.* -##### Data Access (Fetching all Layer names) +##### Data Access (Fetching first Polygon) -| | One Point | One LineString | One Polygon | roads.mvt (40kb, 15 layers) -| --- | --- | --- | --- | --- | -| CPython 3.5.2 | 60 μs | 69 μs | 83 μs | 73 ms | -| PyPy 5.3 | 162 μs | 124 μs | 103 μs | 7.7 ms | -| Haskell | 3.1 μs | 3.4 μs | 3.5 μs | 6.5 ms +| | One Polygon | roads.mvt (`water` layer) +| --- | --- | --- | +| CPython 3.5.2 | 84 μs | 78 ms | +| PyPy 5.3 | 31 μs | 7.9 ms | +| Haskell | 3.4 μs | 6.8 ms | -*The operation being benchmarked is `ByteString -> [Text]`, meaning we +*The operation being benchmarked is `ByteString -> Polygon`, meaning we include the decoding time to account for speed gains afforded by laziness.* ##### Conclusions diff --git a/bench/Bench.hs b/bench/Bench.hs index 5707b1a..8a4d4f8 100644 --- a/bench/Bench.hs +++ b/bench/Bench.hs @@ -1,11 +1,14 @@ +{-# LANGUAGE OverloadedStrings #-} + module Main where import Control.Monad ((>=>)) import Criterion.Main import qualified Data.ByteString as BS +import qualified Data.Map.Lazy as M import Data.Text (Text) import Geography.VectorTile -import qualified Geography.VectorTile.Protobuf as R +import Geography.VectorTile.Geometry (Polygon) import Lens.Micro import Lens.Micro.Platform () -- Instances only. @@ -17,10 +20,10 @@ main = do ls <- BS.readFile "test/linestring.mvt" pl <- BS.readFile "test/polygon.mvt" rd <- BS.readFile "test/roads.mvt" - let op' = fromRight $ R.decode op >>= R.tile - ls' = fromRight $ R.decode ls >>= R.tile - pl' = fromRight $ R.decode pl >>= R.tile - rd' = fromRight $ R.decode rd >>= R.tile + let op' = fromRight $ decode op >>= tile + ls' = fromRight $ decode ls >>= tile + pl' = fromRight $ decode pl >>= tile + rd' = fromRight $ decode rd >>= tile defaultMain [ bgroup "Decoding" [ bgroup "onepoint.mvt" $ decodes op , bgroup "linestring.mvt" $ decodes ls @@ -40,22 +43,30 @@ main = do , bench "One Polygon" $ nf layerNames pl , bench "roads.mvt" $ nf layerNames rd ] + , bgroup "First Polygon" + [ bench "One Polygon" $ nf (firstPoly "OnePolygon") op + , bench "roads.mvt - water layer" $ nf (firstPoly "water") rd + ] ] ] decodes :: BS.ByteString -> [Benchmark] -decodes bs = [ bench "Raw.VectorTile" $ nf R.decode bs - , bench "VectorTile" $ nf (R.decode >=> R.tile) bs +decodes bs = [ bench "Raw.VectorTile" $ nf decode bs + , bench "VectorTile" $ nf (decode >=> tile) bs ] encodes :: VectorTile -> [Benchmark] -encodes vt = [ bench "Raw.VectorTile" $ nf R.untile vt - , bench "ByteString" $ nf (R.encode . R.untile) vt +encodes vt = [ bench "Raw.VectorTile" $ nf untile vt + , bench "ByteString" $ nf (encode . untile) vt ] layerNames :: BS.ByteString -> [Text] -layerNames mvt = t ^.. layers . each . name - where t = fromRight $ R.decode mvt >>= R.tile +layerNames mvt = M.keys $ _layers t + where t = fromRight $ decode mvt >>= tile + +firstPoly :: Text -> BS.ByteString -> Maybe Polygon +firstPoly ln mvt = r ^? _Right . layers . ix ln . polygons . _head . geometries . _head + where r = decode mvt >>= tile fromRight :: Either a b -> b fromRight (Right b) = b diff --git a/bench/pybench.py b/bench/pybench.py index 812bf2b..fe0a861 100644 --- a/bench/pybench.py +++ b/bench/pybench.py @@ -16,6 +16,10 @@ def layerNames(data): decoded = mapbox_vector_tile.decode(data, y_coord_down=True) return list(decoded.keys()) +def firstPoly(data, layerName): + decoded = mapbox_vector_tile.decode(data, y_coord_down=True) + return decoded[layerName]['features'][0]['geometry'][0] + # Benchmark the decoding process. def benchDecode(file): print('Benchmarking {}'.format(file)) @@ -52,6 +56,20 @@ def benchFetch(file): print('Average: {} ms'.format(1000 * the_time / iters)) +def benchPoly(file, layerName): + print('Benchmarking {} - {}'.format(file, layerName)) + + # Exclude the IO from the benchmark timing. + with open(file, 'rb') as f: + data = f.read() + + iters = 100 + wrapped = wrapper(firstPoly, data, layerName) + the_time = timeit.timeit(wrapped, number=iters) + + print('Average: {} ms'.format(1000 * the_time / iters)) + + print('*** DECODING ***') benchDecode('test/onepoint.mvt') @@ -101,5 +119,7 @@ def benchFetch(file): benchFetch('test/linestring.mvt') benchFetch('test/polygon.mvt') benchFetch('test/roads.mvt') +benchPoly('test/polygon.mvt', 'OnePolygon') +benchPoly('test/roads.mvt', 'water') print("\nDone") diff --git a/demo/Demo.hs b/demo/Demo.hs index 68ecfc7..4bed394 100644 --- a/demo/Demo.hs +++ b/demo/Demo.hs @@ -10,17 +10,27 @@ import qualified Data.Vector as V --- +get :: FilePath -> IO (Either Text VectorTile) +get fp = do + mvt <- BS.readFile fp + pure $ decode mvt >>= tile + -- | Read in raw protobuf data and decode it into a high-level type. roads :: IO (Either Text VectorTile) -roads = do - mvt <- BS.readFile "test/roads.mvt" - pure $ decode mvt >>= tile +roads = get "test/roads.mvt" + + +onePolygon :: IO (Either Text VectorTile) +onePolygon = get "test/polygon.mvt" layerNames :: Traversal' VectorTile Text layerNames = layers . traverse . name -- (\r -> sum $ r ^.. _Right . layers . traverse . polygons . traverse . geometries . to V.length) <$> roads +-- Find the first Polygon from the `water` layer. +-- (\r -> r ^? _Right . layers . ix "water" . polygons . _head . geometries . _head) <$> roads + -- POINTS: 76 -- LINESTRINGS: 576 -- POLYGONS: 555 diff --git a/vectortiles.cabal b/vectortiles.cabal index 39120de..fa3cffc 100644 --- a/vectortiles.cabal +++ b/vectortiles.cabal @@ -92,6 +92,7 @@ benchmark vectortiles-bench , microlens >= 0.4 && < 0.5 , microlens-platform >= 0.3 && < 0.4 , text + , containers hs-source-dirs: bench main-is: Bench.hs