diff --git a/.gitignore b/.gitignore index 346cc90..42b31db 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,7 @@ TAGS *.backup /.cabal-sandbox cabal.sandbox.config +cabal.project.local countries.ttl *.prof bench/MainCriterion diff --git a/.hlint.yaml b/.hlint.yaml new file mode 100644 index 0000000..4057568 --- /dev/null +++ b/.hlint.yaml @@ -0,0 +1,65 @@ +# HLint configuration file +# https://github.com/ndmitchell/hlint +########################## + +# This file contains a template configuration file, which is typically +# placed as .hlint.yaml in the root of your project + + +# Specify additional command line arguments +# +# - arguments: [--color, --cpp-simple, -XQuasiQuotes] + + +# Control which extensions/flags/modules/functions can be used +# +# - extensions: +# - default: false # all extension are banned by default +# - name: [PatternGuards, ViewPatterns] # only these listed extensions can be used +# - {name: CPP, within: CrossPlatform} # CPP can only be used in a given module +# +# - flags: +# - {name: -w, within: []} # -w is allowed nowhere +# +# - modules: +# - {name: [Data.Set, Data.HashSet], as: Set} # if you import Data.Set qualified, it must be as 'Set' +# - {name: Control.Arrow, within: []} # Certain modules are banned entirely +# +# - functions: +# - {name: unsafePerformIO, within: []} # unsafePerformIO can only appear in no modules + + +# Add custom hints for this project +# +# Will suggest replacing "wibbleMany [myvar]" with "wibbleOne myvar" +# - error: {lhs: "wibbleMany [x]", rhs: wibbleOne x} + + +# Turn on hints that are off by default +# +# Ban "module X(module X) where", to require a real export list +# - warn: {name: Use explicit module export list} +# +# Replace a $ b $ c with a . b $ c +# - group: {name: dollar, enabled: true} +# +# Generalise map to fmap, ++ to <> +- group: {name: generalise, enabled: true} + + +# Ignore some builtin hints +# - ignore: {name: Use let} +# - ignore: {name: Use const, within: SpecialModule} # Only within certain modules + + +# Define some custom infix operators +# - fixity: infixr 3 ~^#^~ + + +# To generate a suitable file for HLint do: +# $ hlint --default > .hlint.yaml + +- ignore: {name: Eta reduce} +- ignore: {name: Redundant bracket} +- ignore: {name: Reduce duplication} +- ignore: {name: Use camelCase} diff --git a/.travis.yml b/.travis.yml index 500904b..6d809b6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -81,10 +81,6 @@ matrix: compiler: ": #stack 7.10.3" addons: {apt: {packages: [libgmp-dev]}} - - env: BUILD=stack ARGS="--resolver lts-7" - compiler: ": #stack 8.0.1" - addons: {apt: {packages: [libgmp-dev]}} - - env: BUILD=stack ARGS="--resolver lts-9" compiler: ": #stack 8.0.2" addons: {apt: {packages: [libgmp-dev]}} @@ -97,6 +93,10 @@ matrix: compiler: ": #stack 8.4.3" addons: {apt: {packages: [libgmp-dev]}} + - env: BUILD=stack ARGS="--resolver lts-13" + compiler: ": #stack 8.6.5" + addons: {apt: {packages: [libgmp-dev]}} + # Nightly builds are allowed to fail # - env: BUILD=stack ARGS="--resolver nightly" # compiler: ": #stack nightly" diff --git a/bench/MainCriterion.hs b/bench/MainCriterion.hs index ffa8d67..6077c0c 100644 --- a/bench/MainCriterion.hs +++ b/bench/MainCriterion.hs @@ -4,6 +4,7 @@ module Main where import Prelude hiding (readFile) +import Data.Semigroup (Semigroup(..)) import Criterion import Criterion.Types import Criterion.Main @@ -18,21 +19,15 @@ import Control.DeepSeq (NFData) -- $ gzip -d bills.099.actions.rdf.gz parseXmlRDF :: Rdf a => T.Text -> RDF a -parseXmlRDF s = - let (Right rdf) = parseString (XmlParser Nothing Nothing) s - in rdf +parseXmlRDF = either (error . show) id . parseString (XmlParser Nothing Nothing) {-# INLINE parseXmlRDF #-} parseNtRDF :: Rdf a => T.Text -> RDF a -parseNtRDF s = - let (Right rdf) = parseString NTriplesParser s - in rdf +parseNtRDF = either (error . show) id . parseString NTriplesParser {-# INLINE parseNtRDF #-} parseTtlRDF :: Rdf a => T.Text -> RDF a -parseTtlRDF s = - let (Right rdf) = parseString (TurtleParser Nothing Nothing) s - in rdf +parseTtlRDF = either (error . show) id . parseString (TurtleParser Nothing Nothing) {-# INLINE parseTtlRDF #-} queryGr :: Rdf a => (Maybe Node,Maybe Node,Maybe Node,RDF a) -> [Triple] @@ -48,15 +43,19 @@ main :: IO () main = defaultMainWith (defaultConfig {resamples = 100}) [ env + -- [FIXME] Do not rely on system's defaults to read files. (do fawltyContentTurtle <- readFile "data/ttl/fawlty1.ttl" fawltyContentNTriples <- readFile "data/nt/all-fawlty-towers.nt" - rdf1' <- parseFile (XmlParser Nothing Nothing) xmlFile - rdf2' <- parseFile (XmlParser Nothing Nothing) xmlFile - let rdf1 = either (error . show) id rdf1' :: RDF TList + xmlContent <- readFile xmlFile + let rdf1' = parseString (XmlParser Nothing Nothing) xmlContent + rdf2' = parseString (XmlParser Nothing Nothing) xmlContent + rdf3' =parseString (XmlParser Nothing Nothing) xmlContent + rdf1 = either (error . show) id rdf1' :: RDF TList rdf2 = either (error . show) id rdf2' :: RDF AdjHashMap + rdf3 = either (error . show) id rdf3' :: RDF AlgebraicGraph triples = triplesOf rdf1 - return (rdf1, rdf2, triples, fawltyContentNTriples, fawltyContentTurtle)) $ - \ ~(triplesList, adjMap, triples, fawltyContentNTriples, fawltyContentTurtle) -> + return (rdf1, rdf2, rdf3, triples, fawltyContentNTriples, fawltyContentTurtle, xmlContent)) $ + \ ~(triplesList, adjMap, algGraph, triples, fawltyContentNTriples, fawltyContentTurtle, xmlContent) -> bgroup "rdf4h" [ bgroup @@ -81,42 +80,56 @@ main = defaultMainWith let res = parseString (TurtleParserCustom Nothing Nothing Attoparsec) t :: Either ParseFailure (RDF TList) in either (error . show) id res ) fawltyContentTurtle + , bench "xml-xmlbf" $ + nf (\t -> + let res = parseString (XmlParser Nothing Nothing) t :: Either ParseFailure (RDF TList) + in either (error . show) id res + ) xmlContent + , bench "xml-xht" $ + nf (\t -> + let res = parseString (XmlParserHXT Nothing Nothing) t :: Either ParseFailure (RDF TList) + in either (error . show) id res + ) xmlContent ] , bgroup "query" - (queryBench "TList" triplesList ++ - queryBench "AdjHashMap" adjMap - -- queryBench "SP" mapSP ++ queryBench "HashSP" hashMapSP + (queryBench "TList" triplesList <> + queryBench "AdjHashMap" adjMap <> + queryBench "AlgebraicGraph" algGraph + -- queryBench "SP" mapSP <> queryBench "HashSP" hashMapSP ) , bgroup "select" - (selectBench "TList" triplesList ++ - selectBench "AdjHashMap" adjMap - -- selectBench "SP" mapSP ++ selectBench "HashSP" hashMapSP + (selectBench "TList" triplesList <> + selectBench "AdjHashMap" adjMap <> + selectBench "AlgebraicGraph" algGraph + -- selectBench "SP" mapSP <> selectBench "HashSP" hashMapSP ) , bgroup "add-remove-triples" - (addRemoveTriples "TList" triples (empty :: RDF TList) triplesList - ++ addRemoveTriples "AdjHashMap" triples (empty :: RDF AdjHashMap) adjMap + (addRemoveTriples "TList" triples (empty :: RDF TList) triplesList <> + addRemoveTriples "AdjHashMap" triples (empty :: RDF AdjHashMap) adjMap <> + addRemoveTriples "AlgebraicGraph" triples (empty :: RDF AlgebraicGraph) algGraph ) , bgroup "count_triples" [ bench "TList" (nf (length . triplesOf) triplesList) , bench "AdjHashMap" (nf (length . triplesOf) adjMap) + , bench "AlgebraicGraph" (nf (length . triplesOf) algGraph) ] ] ] selectBench :: Rdf a => String -> RDF a -> [Benchmark] selectBench label gr = - [ bench (label ++ " SPO") $ nf selectGr (subjSelect,predSelect,objSelect,gr) - , bench (label ++ " SP") $ nf selectGr (subjSelect,predSelect,selectNothing,gr) - , bench (label ++ " S") $ nf selectGr (subjSelect,selectNothing,selectNothing,gr) - , bench (label ++ " PO") $ nf selectGr (selectNothing,predSelect,objSelect,gr) - , bench (label ++ " SO") $ nf selectGr (subjSelect,selectNothing,objSelect,gr) - , bench (label ++ " P") $ nf selectGr (selectNothing,predSelect,selectNothing,gr) - , bench (label ++ " O") $ nf selectGr (selectNothing,selectNothing,objSelect,gr) + [ bench (label <> " SPO") $ nf selectGr (subjSelect,predSelect,objSelect,gr) + , bench (label <> " SP") $ nf selectGr (subjSelect,predSelect,selectNothing,gr) + , bench (label <> " S") $ nf selectGr (subjSelect,selectNothing,selectNothing,gr) + , bench (label <> " PO") $ nf selectGr (selectNothing,predSelect,objSelect,gr) + , bench (label <> " SO") $ nf selectGr (subjSelect,selectNothing,objSelect,gr) + , bench (label <> " P") $ nf selectGr (selectNothing,predSelect,selectNothing,gr) + , bench (label <> " O") $ nf selectGr (selectNothing,selectNothing,objSelect,gr) ] subjSelect, predSelect, objSelect, selectNothing :: Maybe (Node -> Bool) @@ -133,25 +146,25 @@ queryNothing = Nothing queryBench :: Rdf a => String -> RDF a -> [Benchmark] queryBench label gr = - [ bench (label ++ " SPO") $ nf queryGr (subjQuery,predQuery,objQuery,gr) - , bench (label ++ " SP") $ nf queryGr (subjQuery,predQuery,queryNothing,gr) - , bench (label ++ " S") $ nf queryGr (subjQuery,queryNothing,queryNothing,gr) - , bench (label ++ " PO") $ nf queryGr (queryNothing,predQuery,objQuery,gr) - , bench (label ++ " SO") $ nf queryGr (subjQuery,queryNothing,objQuery,gr) - , bench (label ++ " P") $ nf queryGr (queryNothing,predQuery,queryNothing,gr) - , bench (label ++ " O") $ nf queryGr (queryNothing,queryNothing,objQuery,gr) + [ bench (label <> " SPO") $ nf queryGr (subjQuery,predQuery,objQuery,gr) + , bench (label <> " SP") $ nf queryGr (subjQuery,predQuery,queryNothing,gr) + , bench (label <> " S") $ nf queryGr (subjQuery,queryNothing,queryNothing,gr) + , bench (label <> " PO") $ nf queryGr (queryNothing,predQuery,objQuery,gr) + , bench (label <> " SO") $ nf queryGr (subjQuery,queryNothing,objQuery,gr) + , bench (label <> " P") $ nf queryGr (queryNothing,predQuery,queryNothing,gr) + , bench (label <> " O") $ nf queryGr (queryNothing,queryNothing,objQuery,gr) ] -addRemoveTriples :: (NFData a,NFData (RDF a), Rdf a) => String -> Triples -> RDF a -> RDF a -> [Benchmark] +addRemoveTriples :: (NFData (RDF a), Rdf a) => String -> Triples -> RDF a -> RDF a -> [Benchmark] addRemoveTriples lbl triples emptyGr populatedGr = - [ bench (lbl ++ "-add-triples") $ nf addTriples (triples,emptyGr) - , bench (lbl ++ "-remove-triples") $ nf removeTriples (triples,populatedGr) + [ bench (lbl <> "-add-triples") $ nf addTriples (triples,emptyGr) + , bench (lbl <> "-remove-triples") $ nf removeTriples (triples,populatedGr) ] addTriples :: Rdf a => (Triples,RDF a) -> RDF a addTriples (triples,emptyGr) = - foldr (\t g -> addTriple g t) emptyGr triples + foldr (flip addTriple) emptyGr triples removeTriples :: Rdf a => (Triples,RDF a) -> RDF a removeTriples (triples,populatedGr) = - foldr (\t g -> removeTriple g t) populatedGr triples + foldr (flip removeTriple) populatedGr triples diff --git a/data/ttl/conformance/test-00.out b/data/ttl/conformance/test-00.out index 1209817..7f8b717 100644 --- a/data/ttl/conformance/test-00.out +++ b/data/ttl/conformance/test-00.out @@ -1 +1 @@ -_:genid1 . +_:genid1 . diff --git a/examples/ESWC.hs b/examples/ESWC.hs index b0b8f39..2ef8cc7 100644 --- a/examples/ESWC.hs +++ b/examples/ESWC.hs @@ -13,8 +13,8 @@ heldByProp = "swc:heldBy" eswcCommitteeMembers :: RDF TList -> [T.Text] eswcCommitteeMembers graph = let triples = query graph (Just (unode eswcCommitteeURI)) (Just (unode heldByProp)) Nothing - memberURIs = map objectOf triples - in map + memberURIs = fmap objectOf triples + in fmap (\memberURI -> let (LNode (PlainL (firstName::T.Text))) = objectOf $ head $ query graph (Just memberURI) (Just (unode "foaf:firstName")) Nothing @@ -22,7 +22,7 @@ eswcCommitteeMembers graph = objectOf $ head $ query graph (Just memberURI) (Just (unode "foaf:lastName")) Nothing in (T.append firstName (T.append (T.pack " ") lastName))) memberURIs - + main :: IO () main = do result <- parseURL diff --git a/examples/ParseURLs.hs b/examples/ParseURLs.hs index bbfc286..8986ebe 100644 --- a/examples/ParseURLs.hs +++ b/examples/ParseURLs.hs @@ -11,7 +11,7 @@ timBernersLee :: IO () timBernersLee = do Right (rdf::RDF TList) <- parseURL (XmlParser Nothing Nothing) "http://www.w3.org/People/Berners-Lee/card.rdf" let ts = query rdf (Just (UNode "http://www.w3.org/2011/Talks/0331-hyderabad-tbl/data#talk")) (Just (UNode "dct:title")) Nothing - let talks = map (\(Triple _ _ (LNode (PlainL s))) -> s) ts + let talks = fmap (\(Triple _ _ (LNode (PlainL s))) -> s) ts print talks main :: IO () diff --git a/rdf-tests b/rdf-tests index e24f243..280e9de 160000 --- a/rdf-tests +++ b/rdf-tests @@ -1 +1 @@ -Subproject commit e24f243f79087a61a1b1aa72f5c7c27470155c33 +Subproject commit 280e9de3aaefa6b292a151bd455204d49a0c09db diff --git a/rdf4h.cabal b/rdf4h.cabal index 0b0ec3b..c5dc581 100644 --- a/rdf4h.cabal +++ b/rdf4h.cabal @@ -19,18 +19,32 @@ cabal-version: >= 1.8 build-type: Simple category: RDF stability: Experimental -tested-with: GHC==7.10.2, GHC==8.0.2 +tested-with: GHC==7.10.3, GHC==8.0.2, GHC==8.8.2, GHC==8.4.3, GHC==8.6.5 extra-tmp-files: test extra-source-files: examples/ParseURLs.hs , examples/ESWC.hs + +source-repository head + type: git + location: https://github.com/robstewart57/rdf4h.git + + +flag dev + description: Developer build + manual: True + default: False + + library + hs-source-dirs: src exposed-modules: Data.RDF , Data.RDF.IRI , Data.RDF.Namespace , Data.RDF.Types , Data.RDF.Query , Data.RDF.Graph.AdjHashMap + , Data.RDF.Graph.AlgebraicGraph , Data.RDF.Graph.TList , Text.RDF.RDF4H.TurtleParser , Text.RDF.RDF4H.TurtleSerializer @@ -38,6 +52,7 @@ library , Text.RDF.RDF4H.NTriplesSerializer , Text.RDF.RDF4H.XmlParser , Text.RDF.RDF4H.XmlParserHXT + , Text.RDF.RDF4H.XmlParser.Identifiers , Text.RDF.RDF4H.ParserUtils build-depends: attoparsec , base >= 4.8.0.0 @@ -48,6 +63,7 @@ library , HTTP >= 4000.0.0 , hxt >= 9.3.1.2 , text >= 1.2.1.0 + , algebraic-graphs >= 0.3 && < 0.5 , unordered-containers , hashable , deepseq @@ -65,8 +81,12 @@ library if !impl(ghc >= 8.0) build-depends: semigroups == 0.18.* - hs-source-dirs: src - ghc-options: -Wall -funbox-strict-fields + ghc-options: -Wall -Wcompat -Wincomplete-record-updates -Wincomplete-uni-patterns -Wredundant-constraints + if flag(dev) + ghc-options: -O0 + else + ghc-options: -O2 -funbox-strict-fields + executable rdf4h main-is: src/Rdf4hParseMain.hs @@ -78,11 +98,17 @@ executable rdf4h if impl(ghc < 7.6) build-depends: ghc-prim - ghc-options: -Wall -funbox-strict-fields + ghc-options: -Wall -Wcompat -Wincomplete-record-updates -Wincomplete-uni-patterns -Wredundant-constraints + if flag(dev) + ghc-options: -O0 + else + ghc-options: -O2 -funbox-strict-fields + test-suite test-rdf4h type: exitcode-stdio-1.0 main-is: Test.hs + hs-source-dirs: testsuite/tests other-modules: Data.RDF.PropertyTests Data.RDF.GraphImplTests Data.RDF.IRITests @@ -90,9 +116,9 @@ test-suite test-rdf4h Text.RDF.RDF4H.XmlParser_Test W3C.Manifest W3C.NTripleTest + W3C.TurtleTest W3C.RdfXmlTest W3C.W3CAssertions - ghc-options: -Wall -fno-warn-orphans -funbox-strict-fields build-depends: base >= 4.8.0.0 && < 6 , rdf4h , tasty @@ -108,9 +134,15 @@ test-suite test-rdf4h if impl(ghc < 7.6) build-depends: ghc-prim + if !impl(ghc >= 8.0) + build-depends: semigroups == 0.18.* + + ghc-options: -Wall -Wcompat -Wincomplete-record-updates -Wincomplete-uni-patterns -Wredundant-constraints + if flag(dev) + ghc-options: -O0 + else + ghc-options: -O2 -funbox-strict-fields - other-modules: W3C.TurtleTest - hs-source-dirs: testsuite/tests benchmark rdf4h-bench type: exitcode-stdio-1.0 @@ -121,8 +153,12 @@ benchmark rdf4h-bench criterion, rdf4h, text >= 1.2.1.0 - ghc-options: -Wall -source-repository head - type: git - location: https://github.com/robstewart57/rdf4h.git + if !impl(ghc >= 8.0) + build-depends: semigroups == 0.18.* + + ghc-options: -Wall -Wcompat -Wincomplete-record-updates -Wincomplete-uni-patterns -Wredundant-constraints + if flag(dev) + ghc-options: -O0 + else + ghc-options: -O2 -funbox-strict-fields diff --git a/src/Data/RDF.hs b/src/Data/RDF.hs index ee68d6a..2173371 100644 --- a/src/Data/RDF.hs +++ b/src/Data/RDF.hs @@ -15,6 +15,7 @@ module Data.RDF ( -- * RDF type class instances module Data.RDF.Graph.TList, module Data.RDF.Graph.AdjHashMap, + module Data.RDF.Graph.AlgebraicGraph, -- module Data.RDF.Graph.HashMapSP, -- module Data.RDF.Graph.MapSP, @@ -30,6 +31,7 @@ module Data.RDF ( import Data.RDF.Namespace import Data.RDF.Graph.TList import Data.RDF.Graph.AdjHashMap +import Data.RDF.Graph.AlgebraicGraph -- import Data.RDF.Graph.HashMapSP -- import Data.RDF.Graph.MapSP import Text.RDF.RDF4H.NTriplesSerializer diff --git a/src/Data/RDF/Graph/AdjHashMap.hs b/src/Data/RDF/Graph/AdjHashMap.hs index 4c93c9f..1148725 100644 --- a/src/Data/RDF/Graph/AdjHashMap.hs +++ b/src/Data/RDF/Graph/AdjHashMap.hs @@ -3,7 +3,6 @@ {-# LANGUAGE DataKinds #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE TypeFamilies #-} -{-# LANGUAGE TupleSections, GeneralizedNewtypeDeriving #-} {-# LANGUAGE EmptyDataDecls #-} {-# LANGUAGE ScopedTypeVariables #-} @@ -13,11 +12,11 @@ module Data.RDF.Graph.AdjHashMap (AdjHashMap) where import Prelude hiding (pred) +import Data.Semigroup ((<>)) import Data.List import Data.Binary (Binary) import Data.RDF.Types import Data.RDF.Query -import Data.RDF.Namespace import Data.Hashable () import Data.HashMap.Strict (HashMap) import qualified Data.HashMap.Strict as HashMap @@ -103,16 +102,16 @@ instance Rdf AdjHashMap where -- show (AdjHashMap ((spoMap, _), _, _)) = -- let ts = concatMap (uncurry tripsSubj) subjPredMaps -- where subjPredMaps = HashMap.toList spoMap --- in concatMap (\t -> show t ++ "\n") ts +-- in concatMap (\t -> show t <> "\n") ts -showGraph' :: RDF AdjHashMap -> [Char] +showGraph' :: RDF AdjHashMap -> String showGraph' ((AdjHashMap ((spoMap, _), _, _))) = let ts = concatMap (uncurry tripsSubj) subjPredMaps where subjPredMaps = HashMap.toList spoMap - in concatMap (\t -> show t ++ "\n") ts + in concatMap (\t -> show t <> "\n") ts -- instance Show (RDF AdjHashMap) where --- show gr = concatMap (\t -> show t ++ "\n") (triplesOf gr) +-- show gr = concatMap (\t -> show t <> "\n") (triplesOf gr) -- some convenience type alias for readability @@ -132,7 +131,7 @@ prefixMappings' (AdjHashMap (_, _, pms)) = pms addPrefixMappings' :: RDF AdjHashMap -> PrefixMappings -> Bool -> RDF AdjHashMap addPrefixMappings' (AdjHashMap (ts, baseURL, pms)) pms' replace = - let merge = if replace then flip mergePrefixMappings else mergePrefixMappings + let merge = if replace then flip (<>) else (<>) in AdjHashMap (ts, baseURL, merge pms pms') empty' :: RDF AdjHashMap diff --git a/src/Data/RDF/Graph/AlgebraicGraph.hs b/src/Data/RDF/Graph/AlgebraicGraph.hs new file mode 100644 index 0000000..9d2c16f --- /dev/null +++ b/src/Data/RDF/Graph/AlgebraicGraph.hs @@ -0,0 +1,113 @@ +{-# OPTIONS_GHC -fno-warn-orphans #-} -- [TODO] Remove when the missing NFData instance is added to Alga. +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE DeriveAnyClass #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE EmptyDataDecls #-} + + +module Data.RDF.Graph.AlgebraicGraph + ( AlgebraicGraph + ) where + + +import Data.Semigroup (Semigroup(..)) +import Control.DeepSeq (NFData(..)) +import Data.Binary +import Data.RDF.Namespace +import Data.RDF.Query +import Data.RDF.Types (RDF, Rdf(..), BaseUrl, Triples, Triple(..), Node, Subject, Predicate, Object, NodeSelector) +import qualified Algebra.Graph.Labelled as G +import Data.HashSet (HashSet) +import qualified Data.HashSet as HS +import GHC.Generics + + +data AlgebraicGraph deriving (Generic) + +instance Binary AlgebraicGraph +instance NFData AlgebraicGraph + +data instance RDF AlgebraicGraph = AlgebraicGraph + { _graph :: G.Graph (HashSet Node) Node + , _baseUrl :: Maybe BaseUrl + , _prefixMappings :: PrefixMappings + } deriving (Generic, NFData) + +-- [TODO] Remove this orphan instance when the missing NFData instance is added to Alga. +instance (NFData e, NFData a) => NFData (G.Graph e a) where + rnf G.Empty = () + rnf (G.Vertex x ) = rnf x + rnf (G.Connect e x y) = e `seq` rnf x `seq` rnf y + +instance Rdf AlgebraicGraph where + baseUrl = _baseUrl + prefixMappings = _prefixMappings + addPrefixMappings = addPrefixMappings' + empty = empty' + mkRdf = mkRdf' + addTriple = addTriple' + removeTriple = removeTriple' + triplesOf = triplesOf' + uniqTriplesOf = triplesOf' + select = select' + query = query' + showGraph = showGraph' + +toEdge :: Triple -> (HashSet Predicate, Subject, Object) +toEdge (Triple s p o) = (HS.singleton p, s, o) + +toTriples :: (HashSet Predicate, Subject, Object) -> Triples +toTriples (ps, s, o) = [Triple s p o | p <- HS.toList ps] + +showGraph' :: RDF AlgebraicGraph -> String +showGraph' r = concatMap (\t -> show t ++ "\n") (expandTriples r) + +addPrefixMappings' :: RDF AlgebraicGraph -> PrefixMappings -> Bool -> RDF AlgebraicGraph +addPrefixMappings' (AlgebraicGraph g baseURL pms) pms' replace = + let merge = if replace then flip (<>) else (<>) + in AlgebraicGraph g baseURL (merge pms pms') + +empty' :: RDF AlgebraicGraph +empty' = AlgebraicGraph G.empty mempty (PrefixMappings mempty) + +mkRdf' :: Triples -> Maybe BaseUrl -> PrefixMappings -> RDF AlgebraicGraph +mkRdf' ts baseURL pms = + let g = G.edges . fmap toEdge $ ts + in AlgebraicGraph g baseURL pms + +addTriple' :: RDF AlgebraicGraph -> Triple -> RDF AlgebraicGraph +addTriple' (AlgebraicGraph g baseURL pms) (Triple s p o) = + let g' = G.edge (HS.singleton p) s o + in AlgebraicGraph (G.overlay g g') baseURL pms + +removeTriple' :: RDF AlgebraicGraph -> Triple -> RDF AlgebraicGraph +removeTriple' (AlgebraicGraph g baseURL pms) (Triple s p o) = + let ps = G.edgeLabel s o g + g' + | HS.null ps = g + | elem p ps = G.replaceEdge (HS.delete p ps) s o g + | otherwise = g + in AlgebraicGraph g' baseURL pms + +triplesOf' :: RDF AlgebraicGraph -> Triples +triplesOf' (AlgebraicGraph g _ _) = mconcat $ toTriples <$> G.edgeList g + +select' :: RDF AlgebraicGraph -> NodeSelector -> NodeSelector -> NodeSelector -> Triples +select' r Nothing Nothing Nothing = triplesOf r +select' (AlgebraicGraph g _ _) s p o = let (res, _, _) = G.foldg e v c g in res + where + e = (mempty, mempty, mempty) + v x = (mempty, s ?? x, o ?? x) + (??) f x' = let xs = HS.singleton x' in maybe xs (`HS.filter` xs) f + c ps (ts1, ss1, os1) (ts2, ss2, os2) = (ts3, ss3, os3) + where + ss3 = ss1 <> ss2 + os3 = os1 <> os2 + ts3 + | HS.null ps' = ts1 <> ts2 + | otherwise = ts1 <> ts2 <> [Triple s' p' o' | s' <- HS.toList ss3, p' <- HS.toList ps', o' <- HS.toList os3] + ps' = maybe ps (`HS.filter` ps) p + +query' :: RDF AlgebraicGraph -> Maybe Subject -> Maybe Predicate -> Maybe Object -> Triples +query' r Nothing Nothing Nothing = triplesOf r +query' r s p o = select r ((==) <$> s) ((==) <$> p) ((==) <$> o) diff --git a/src/Data/RDF/Graph/HashMapSP.hs b/src/Data/RDF/Graph/HashMapSP.hs index 4bc6a95..2893fc7 100644 --- a/src/Data/RDF/Graph/HashMapSP.hs +++ b/src/Data/RDF/Graph/HashMapSP.hs @@ -3,7 +3,6 @@ {-# LANGUAGE DataKinds #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE TypeFamilies #-} -{-# LANGUAGE TupleSections, GeneralizedNewtypeDeriving #-} {-# LANGUAGE EmptyDataDecls #-} -- |A graph implementation mapping (S,P) pairs to O, backed by 'Data.Map'. @@ -11,6 +10,7 @@ module Data.RDF.Graph.HashMapSP (HashSP) where import Prelude hiding (pred) +import Data.Semigroup ((<>)) import Control.DeepSeq (NFData) import Data.RDF.Types import Data.RDF.Query @@ -47,15 +47,15 @@ instance Rdf HashSP where -- instance Show (HashSP) where -- show (HashSP (tsMap,_,_)) = -- let ts = (concatMap (\((s,p),oList) -> map (Triple s p) oList) . HashMap.toList) tsMap --- in concatMap (\t -> show t ++ "\n") ts +-- in concatMap (\t -> show t <> "\n") ts -showGraph' :: RDF HashSP -> [Char] +showGraph' :: RDF HashSP -> String showGraph' (HashSP (tsMap,_,_)) = - let ts = (concatMap (\((s,p),oList) -> map (Triple s p) oList) . HashMap.toList) tsMap - in concatMap (\t -> show t ++ "\n") ts - + let ts = (concatMap (\((s,p),oList) -> fmap (Triple s p) oList) . HashMap.toList) tsMap + in concatMap (\t -> show t <> "\n") ts + -- instance Show (HashSP) where --- show gr = concatMap (\t -> show t ++ "\n") (triplesOf gr) +-- show gr = concatMap (\t -> show t <> "\n") (triplesOf gr) type SPMap = HashMap (Subject,Predicate) [Object] @@ -67,7 +67,7 @@ prefixMappings' (HashSP (_, _, pms)) = pms addPrefixMappings' :: RDF HashSP -> PrefixMappings -> Bool -> RDF HashSP addPrefixMappings' (HashSP (tsMap, baseURL, pms)) pms' replace = - let merge = if replace then flip mergePrefixMappings else mergePrefixMappings + let merge = if replace then flip (<>) else (<>) in HashSP (tsMap, baseURL, merge pms pms') empty' :: RDF HashSP @@ -77,10 +77,10 @@ mkRdf' :: Triples -> Maybe BaseUrl -> PrefixMappings -> RDF HashSP mkRdf' triples baseURL pms = HashSP (tsMap, baseURL, pms) where tsMap = sortAndGroup triples - sortAndGroup xs = HashMap.fromListWith (++) [((s,p), [o]) | Triple s p o <- xs] + sortAndGroup xs = HashMap.fromListWith (<>) [((s,p), [o]) | Triple s p o <- xs] triplesOf' :: RDF HashSP -> Triples -triplesOf' (HashSP (tsMap,_,_)) = (concatMap (\((s,p),oList) -> map (Triple s p) oList) . HashMap.toList) tsMap +triplesOf' (HashSP (tsMap,_,_)) = (concatMap (\((s,p),oList) -> fmap (Triple s p) oList) . HashMap.toList) tsMap uniqTriplesOf' :: RDF HashSP -> Triples uniqTriplesOf' = nub . expandTriples @@ -93,47 +93,47 @@ select' (HashSP (tsMap,_,_)) Nothing (Just pSelector) Nothing = HashMap.foldrWithKey findTripleWithP [] tsMap where findTripleWithP (s,p) oList ts = if pSelector p - then map (Triple s p) oList ++ ts + then fmap (Triple s p) oList <> ts else ts select' (HashSP (tsMap,_,_)) Nothing Nothing (Just oSelector) = HashMap.foldrWithKey findTripleWithS [] tsMap where - findTripleWithS (s,p) oList ts = map (Triple s p) (filter oSelector oList) ++ ts + findTripleWithS (s,p) oList ts = fmap (Triple s p) (filter oSelector oList) <> ts select' (HashSP (tsMap,_,_)) Nothing (Just pSelector) (Just oSelector) = HashMap.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s,p) oList ts = if pSelector p - then map (Triple s p) (filter oSelector oList) ++ ts + then fmap (Triple s p) (filter oSelector oList) <> ts else ts select' (HashSP (tsMap,_,_)) (Just sSelector) Nothing Nothing = HashMap.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s,p) oList ts = if sSelector s - then map (Triple s p) oList ++ ts + then fmap (Triple s p) oList <> ts else ts select' (HashSP (tsMap,_,_)) (Just sSelector) (Just pSelector) Nothing = HashMap.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s,p) oList ts = if sSelector s && pSelector p - then map (Triple s p) oList ++ ts + then fmap (Triple s p) oList <> ts else ts select' (HashSP (tsMap,_,_)) (Just sSelector) Nothing (Just oSelector) = HashMap.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s,p) oList ts = if sSelector s - then map (Triple s p) (filter oSelector oList) ++ ts + then fmap (Triple s p) (filter oSelector oList) <> ts else ts select' (HashSP (tsMap,_,_)) (Just sSelector) (Just pSelector) (Just oSelector) = HashMap.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s,p) oList ts = if sSelector s && pSelector p - then map (Triple s p) (filter oSelector oList) ++ ts + then fmap (Triple s p) (filter oSelector oList) <> ts else ts query' :: RDF HashSP -> Maybe Subject -> Maybe Predicate -> Maybe Object -> Triples @@ -144,42 +144,42 @@ query' (HashSP (tsMap,_,_)) Nothing (Just p) Nothing = HashMap.foldrWithKey findTripleWithP [] tsMap where findTripleWithP (s,p') oList ts = if p == p' - then map (Triple s p) oList ++ ts + then fmap (Triple s p) oList <> ts else ts query' (HashSP (tsMap,_,_)) Nothing Nothing (Just o) = HashMap.foldrWithKey findTripleWithS [] tsMap where - findTripleWithS (s,p) oList ts = map (Triple s p) (filter (== o) oList) ++ ts + findTripleWithS (s,p) oList ts = fmap (Triple s p) (filter (== o) oList) <> ts query' (HashSP (tsMap,_,_)) Nothing (Just p) (Just o) = HashMap.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s,p') oList ts = if p == p' - then map (Triple s p) (filter (== o) oList) ++ ts + then fmap (Triple s p) (filter (== o) oList) <> ts else ts query' (HashSP (tsMap,_,_)) (Just s) Nothing Nothing = HashMap.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s',p) oList ts = if s == s' - then map (Triple s p) oList ++ ts + then fmap (Triple s p) oList <> ts else ts -- optimal pattern for this RDF HashSP instance query' (HashSP (tsMap,_,_)) (Just s) (Just p) Nothing = - (map (Triple s p) . HashMap.lookupDefault [] (s,p)) tsMap + (fmap (Triple s p) . HashMap.lookupDefault [] (s,p)) tsMap query' (HashSP (tsMap,_,_)) (Just s) Nothing (Just o) = HashMap.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s',p) oList ts = if s == s' - then map (Triple s p) (filter (== o) oList) ++ ts + then fmap (Triple s p) (filter (== o) oList) <> ts else ts query' (HashSP (tsMap,_,_)) (Just s) (Just p) (Just o) = HashMap.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s',p') oList ts = if s == s' && p == p' - then map (Triple s p) (filter (== o) oList) ++ ts + then fmap (Triple s p) (filter (== o) oList) <> ts else ts diff --git a/src/Data/RDF/Graph/MapSP.hs b/src/Data/RDF/Graph/MapSP.hs index 9b4f58b..036047e 100644 --- a/src/Data/RDF/Graph/MapSP.hs +++ b/src/Data/RDF/Graph/MapSP.hs @@ -3,8 +3,6 @@ {-# LANGUAGE DataKinds #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE TypeFamilies #-} -{-# LANGUAGE TupleSections #-} -{-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE EmptyDataDecls #-} @@ -73,12 +71,12 @@ instance Rdf SP where -- instance Show SP where -- show (SP (tsMap,_,_)) = -- let ts = (concatMap (\((s,p),oList) -> map (Triple s p) oList) . Map.toList) tsMap --- in concatMap (\t -> show t ++ "\n") ts +-- in concatMap (\t -> show t <> "\n") ts showGraph' :: RDF SP -> String showGraph' (SP (tsMap,_,_)) = - let ts = (concatMap (\((s,p),oList) -> map (Triple s p) oList) . Map.toList) tsMap - in concatMap (\t -> show t ++ "\n") ts + let ts = (concatMap (\((s,p),oList) -> fmap (Triple s p) oList) . Map.toList) tsMap + in concatMap (\t -> show t <> "\n") ts type SPMap = Map (Subject,Predicate) [Object] @@ -100,10 +98,10 @@ mkRdf' :: Triples -> Maybe BaseUrl -> PrefixMappings -> RDF SP mkRdf' triples baseURL pms = SP (tsMap, baseURL, pms) where tsMap = sortAndGroup triples - sortAndGroup xs = Map.fromListWith (++) [((s,p), [o]) | Triple s p o <- xs] + sortAndGroup xs = Map.fromListWith (<>) [((s,p), [o]) | Triple s p o <- xs] triplesOf' :: RDF SP -> Triples -triplesOf' (SP (tsMap,_,_)) = (concatMap (\((s,p),oList) -> map (Triple s p) oList) . Map.toList) tsMap +triplesOf' (SP (tsMap,_,_)) = (concatMap (\((s,p),oList) -> fmap (Triple s p) oList) . Map.toList) tsMap uniqTriplesOf' :: RDF SP -> Triples uniqTriplesOf' = nub . expandTriples @@ -116,47 +114,47 @@ select' (SP (tsMap,_,_)) Nothing (Just pSelector) Nothing = Map.foldrWithKey findTripleWithP [] tsMap where findTripleWithP (s,p) oList ts = if pSelector p - then map (Triple s p) oList ++ ts + then fmap (Triple s p) oList <> ts else ts select' (SP (tsMap,_,_)) Nothing Nothing (Just oSelector) = Map.foldrWithKey findTripleWithS [] tsMap where - findTripleWithS (s,p) oList ts = map (Triple s p) (filter oSelector oList) ++ ts + findTripleWithS (s,p) oList ts = fmap (Triple s p) (filter oSelector oList) <> ts select' (SP (tsMap,_,_)) Nothing (Just pSelector) (Just oSelector) = Map.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s,p) oList ts = if pSelector p - then map (Triple s p) (filter oSelector oList) ++ ts + then fmap (Triple s p) (filter oSelector oList) <> ts else ts select' (SP (tsMap,_,_)) (Just sSelector) Nothing Nothing = Map.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s,p) oList ts = if sSelector s - then map (Triple s p) oList ++ ts + then fmap (Triple s p) oList <> ts else ts select' (SP (tsMap,_,_)) (Just sSelector) (Just pSelector) Nothing = Map.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s,p) oList ts = if sSelector s && pSelector p - then map (Triple s p) oList ++ ts + then fmap (Triple s p) oList <> ts else ts select' (SP (tsMap,_,_)) (Just sSelector) Nothing (Just oSelector) = Map.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s,p) oList ts = if sSelector s - then map (Triple s p) (filter oSelector oList) ++ ts + then fmap (Triple s p) (filter oSelector oList) <> ts else ts select' (SP (tsMap,_,_)) (Just sSelector) (Just pSelector) (Just oSelector) = Map.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s,p) oList ts = if sSelector s && pSelector p - then map (Triple s p) (filter oSelector oList) ++ ts + then fmap (Triple s p) (filter oSelector oList) <> ts else ts query' :: RDF SP -> Maybe Subject -> Maybe Predicate -> Maybe Object -> Triples @@ -167,42 +165,42 @@ query' (SP (tsMap,_,_)) Nothing (Just p) Nothing = Map.foldrWithKey findTripleWithP [] tsMap where findTripleWithP (s,p') oList ts = if p == p' - then map (Triple s p) oList ++ ts + then fmap (Triple s p) oList <> ts else ts query' (SP (tsMap,_,_)) Nothing Nothing (Just o) = Map.foldrWithKey findTripleWithS [] tsMap where - findTripleWithS (s,p) oList ts = map (Triple s p) (filter (== o) oList) ++ ts + findTripleWithS (s,p) oList ts = fmap (Triple s p) (filter (== o) oList) <> ts query' (SP (tsMap,_,_)) Nothing (Just p) (Just o) = Map.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s,p') oList ts = if p == p' - then map (Triple s p) (filter (== o) oList) ++ ts + then fmap (Triple s p) (filter (== o) oList) <> ts else ts query' (SP (tsMap,_,_)) (Just s) Nothing Nothing = Map.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s',p) oList ts = if s == s' - then map (Triple s p) oList ++ ts + then fmap (Triple s p) oList <> ts else ts -- optimal pattern for this SP instance query' (SP (tsMap,_,_)) (Just s) (Just p) Nothing = - (map (Triple s p) . Map.findWithDefault [] (s,p)) tsMap + (fmap (Triple s p) . Map.findWithDefault [] (s,p)) tsMap query' (SP (tsMap,_,_)) (Just s) Nothing (Just o) = Map.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s',p) oList ts = if s == s' - then map (Triple s p) (filter (== o) oList) ++ ts + then fmap (Triple s p) (filter (== o) oList) <> ts else ts query' (SP (tsMap,_,_)) (Just s) (Just p) (Just o) = Map.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s',p') oList ts = if s == s' && p == p' - then map (Triple s p) (filter (== o) oList) ++ ts + then fmap (Triple s p) (filter (== o) oList) <> ts else ts diff --git a/src/Data/RDF/Graph/TList.hs b/src/Data/RDF/Graph/TList.hs index 7c76435..bffa82a 100644 --- a/src/Data/RDF/Graph/TList.hs +++ b/src/Data/RDF/Graph/TList.hs @@ -2,7 +2,6 @@ {-# LANGUAGE DataKinds #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE TypeFamilies #-} -{-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE EmptyDataDecls #-} @@ -20,6 +19,7 @@ module Data.RDF.Graph.TList (TList) where import Prelude +import Data.Semigroup ((<>)) import Control.DeepSeq (NFData) import Data.Binary import Data.RDF.Namespace @@ -70,15 +70,15 @@ instance Rdf TList where query = query' showGraph = showGraph' -showGraph' :: RDF TList -> [Char] -showGraph' gr = concatMap (\t -> show t ++ "\n") (expandTriples gr) +showGraph' :: RDF TList -> String +showGraph' gr = concatMap (\t -> show t <> "\n") (expandTriples gr) prefixMappings' :: RDF TList -> PrefixMappings prefixMappings' (TListC(_, _, pms)) = pms addPrefixMappings' :: RDF TList -> PrefixMappings -> Bool -> RDF TList addPrefixMappings' (TListC(ts, baseURL, pms)) pms' replace = - let merge = if replace then flip mergePrefixMappings else mergePrefixMappings + let merge = if replace then flip (<>) else (<>) in TListC(ts, baseURL, merge pms pms') baseUrl' :: RDF TList -> Maybe BaseUrl diff --git a/src/Data/RDF/Graph/TPatriciaTree.hs b/src/Data/RDF/Graph/TPatriciaTree.hs index 09e3276..eae2b4a 100644 --- a/src/Data/RDF/Graph/TPatriciaTree.hs +++ b/src/Data/RDF/Graph/TPatriciaTree.hs @@ -2,7 +2,7 @@ {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE DataKinds #-} {-# LANGUAGE TypeFamilies #-} -{-# LANGUAGE GeneralizedNewtypeDeriving, FlexibleInstances, BangPatterns #-} +{-# LANGUAGE FlexibleInstances, BangPatterns #-} {-# LANGUAGE EmptyDataDecls #-} module Data.RDF.Graph.TPatriciaTree (TPatriciaTree) where @@ -49,7 +49,7 @@ prefixMappings' (TPatriciaTree (_,_,_,pms')) = pms' addPrefixMappings' :: RDF TPatriciaTree -> PrefixMappings -> Bool -> RDF TPatriciaTree addPrefixMappings' (TPatriciaTree (g, idxLookup, baseURL, pms)) pms' replace = - let merge = if replace then flip mergePrefixMappings else mergePrefixMappings + let merge = if replace then flip (<>) else (<>) in TPatriciaTree (g, idxLookup, baseURL, merge pms pms') baseUrl' :: RDF TPatriciaTree -> Maybe BaseUrl @@ -101,28 +101,28 @@ mkRdf' ts base' pms' = triplesOf' :: RDF TPatriciaTree -> Triples triplesOf' (TPatriciaTree (g,idxLookup,_,_)) = - map (\(sIdx,oIdx,p) -> - let [s,o] = map (\idx -> fromJust $ IntMap.lookup idx idxLookup) [sIdx,oIdx] + fmap (\(sIdx,oIdx,p) -> + let [s,o] = fmap (\idx -> fromJust $ IntMap.lookup idx idxLookup) [sIdx,oIdx] in Triple s p o) (G.labEdges g) uniqTriplesOf' :: RDF TPatriciaTree -> Triples uniqTriplesOf' ptG@(TPatriciaTree (g,idxLookup,_,_)) = - nub $ map (\(sIdx,oIdx,p) -> - let [s,o] = map (\idx -> fromJust $ IntMap.lookup idx idxLookup) [sIdx,oIdx] + nub $ fmap (\(sIdx,oIdx,p) -> + let [s,o] = fmap (\idx -> fromJust $ IntMap.lookup idx idxLookup) [sIdx,oIdx] in expandTriple (prefixMappings ptG) (Triple s p o)) (G.labEdges g) mkTriples :: IntMap.IntMap Node -> Node -> [(Node, IntMap.Key)] -> [(Node, IntMap.Key)] -> [Triple] mkTriples idxLookup thisNode adjsIn adjsOut = - let ts1 = map (\(predNode,subjIdx) -> + let ts1 = fmap (\(predNode,subjIdx) -> let s = fromJust (IntMap.lookup subjIdx idxLookup) in Triple s predNode thisNode ) adjsIn - ts2 = map (\(predNode,objIdx) -> + ts2 = fmap (\(predNode,objIdx) -> let o = fromJust (IntMap.lookup objIdx idxLookup) in Triple thisNode predNode o ) adjsOut - in ts1 ++ ts2 + in ts1 <> ts2 select' :: RDF TPatriciaTree -> NodeSelector -> NodeSelector -> NodeSelector -> Triples select' (TPatriciaTree (g,idxLookup,_,_)) maybeSubjSel maybePredSel maybeObjSel = @@ -137,7 +137,7 @@ select' (TPatriciaTree (g,idxLookup,_,_)) maybeSubjSel maybePredSel maybeObjSel ts2 = if fromJust maybeSubjSel thisNode then mkTriples idxLookup thisNode [] adjsOut else [] - in ts1 ++ ts2 + in ts1 <> ts2 | isNothing maybeSubjSel && isJust maybePredSel && isNothing maybeObjSel = let adjsIn' = filter (\(p,_idxSubj) -> fromJust maybePredSel p ) adjsIn adjsOut' = filter (\(p,_idxObj) -> fromJust maybePredSel p ) adjsOut @@ -147,7 +147,7 @@ select' (TPatriciaTree (g,idxLookup,_,_)) maybeSubjSel maybePredSel maybeObjSel ts2 = if not (null adjsOut') then mkTriples idxLookup thisNode [] adjsOut' else [] - in ts1 ++ ts2 + in ts1 <> ts2 | isNothing maybeSubjSel && isNothing maybePredSel && isJust maybeObjSel = let adjsOut' = filter (\(_p,idxObj) -> fromJust maybeObjSel (fromJust (IntMap.lookup idxObj idxLookup)) ) adjsOut @@ -155,7 +155,7 @@ select' (TPatriciaTree (g,idxLookup,_,_)) maybeSubjSel maybePredSel maybeObjSel ts2 = if fromJust maybeObjSel thisNode then mkTriples idxLookup thisNode adjsIn [] else [] - in ts1 ++ ts2 + in ts1 <> ts2 | isJust maybeSubjSel && isJust maybePredSel && isNothing maybeObjSel = let adjsIn' = filter (\(p,idxSubj) -> fromJust maybeSubjSel (fromJust (IntMap.lookup idxSubj idxLookup)) @@ -165,7 +165,7 @@ select' (TPatriciaTree (g,idxLookup,_,_)) maybeSubjSel maybePredSel maybeObjSel ts2 = if fromJust maybeSubjSel thisNode then mkTriples idxLookup thisNode [] adjsOut' else [] - in ts1 ++ ts2 + in ts1 <> ts2 | isJust maybeSubjSel && isNothing maybePredSel && isJust maybeObjSel = let adjsIn' = filter (\(_p,idxSubj) -> fromJust maybeSubjSel (fromJust (IntMap.lookup idxSubj idxLookup)) ) adjsIn @@ -176,7 +176,7 @@ select' (TPatriciaTree (g,idxLookup,_,_)) maybeSubjSel maybePredSel maybeObjSel ts2 = if fromJust maybeSubjSel thisNode then mkTriples idxLookup thisNode [] adjsOut' else [] - in ts1 ++ ts2 + in ts1 <> ts2 | isNothing maybeSubjSel && isJust maybePredSel && isJust maybeObjSel = let adjsIn' = filter (\(p,_idxSubj) -> fromJust maybePredSel p ) adjsIn @@ -186,7 +186,7 @@ select' (TPatriciaTree (g,idxLookup,_,_)) maybeSubjSel maybePredSel maybeObjSel then mkTriples idxLookup thisNode adjsIn' [] else [] ts2 = mkTriples idxLookup thisNode [] adjsOut' - in ts1 ++ ts2 + in ts1 <> ts2 | isJust maybeSubjSel && isJust maybePredSel && isJust maybeObjSel = let adjsIn' = filter (\(p,idxSubj) -> fromJust maybeSubjSel (fromJust (IntMap.lookup idxSubj idxLookup)) @@ -199,7 +199,7 @@ select' (TPatriciaTree (g,idxLookup,_,_)) maybeSubjSel maybePredSel maybeObjSel ts2 = if fromJust maybeSubjSel thisNode then mkTriples idxLookup thisNode [] adjsOut' else [] - in ts1 ++ ts2 + in ts1 <> ts2 cfun ( _ , _ , _ , _) = undefined -- not sure why this pattern is needed to exhaust cfun arg patterns @@ -218,7 +218,7 @@ query' (TPatriciaTree (g,idxLookup,_,_)) maybeSubj maybePred maybeObj = ts2 = if thisNode == fromJust maybeSubj then mkTriples idxLookup thisNode [] adjsOut else [] - in ts1 ++ ts2 + in ts1 <> ts2 | isNothing maybeSubj && isJust maybePred && isNothing maybeObj = let adjsIn' = filter (\(p,_idxSubj) -> p == fromJust maybePred ) adjsIn @@ -229,7 +229,7 @@ query' (TPatriciaTree (g,idxLookup,_,_)) maybeSubj maybePred maybeObj = ts2 = if not (null adjsOut') then mkTriples idxLookup thisNode [] adjsOut' else [] - in ts1 ++ ts2 + in ts1 <> ts2 | isNothing maybeSubj && isNothing maybePred && isJust maybeObj = let adjsOut' = filter (\(_p,idxObj) -> fromJust (IntMap.lookup idxObj idxLookup) == fromJust maybeObj ) adjsOut @@ -237,7 +237,7 @@ query' (TPatriciaTree (g,idxLookup,_,_)) maybeSubj maybePred maybeObj = ts2 = if thisNode == fromJust maybeObj then mkTriples idxLookup thisNode adjsIn [] else [] - in ts1 ++ ts2 + in ts1 <> ts2 | isJust maybeSubj && isJust maybePred && isNothing maybeObj = let adjsIn' = filter (\(p,idxSubj) -> fromJust (IntMap.lookup idxSubj idxLookup) == fromJust maybeSubj @@ -247,7 +247,7 @@ query' (TPatriciaTree (g,idxLookup,_,_)) maybeSubj maybePred maybeObj = ts2 = if thisNode == fromJust maybeSubj then mkTriples idxLookup thisNode [] adjsOut' else [] - in ts1 ++ ts2 + in ts1 <> ts2 | isJust maybeSubj && isNothing maybePred && isJust maybeObj = let adjsIn' = filter (\(_p,idxSubj) -> fromJust (IntMap.lookup idxSubj idxLookup) == fromJust maybeSubj ) adjsIn @@ -258,7 +258,7 @@ query' (TPatriciaTree (g,idxLookup,_,_)) maybeSubj maybePred maybeObj = ts2 = if thisNode == fromJust maybeSubj then mkTriples idxLookup thisNode [] adjsOut' else [] - in ts1 ++ ts2 + in ts1 <> ts2 | isNothing maybeSubj && isJust maybePred && isJust maybeObj = let adjsIn' = filter (\(p,_idxSubj) -> p == fromJust maybePred ) adjsIn @@ -268,7 +268,7 @@ query' (TPatriciaTree (g,idxLookup,_,_)) maybeSubj maybePred maybeObj = then mkTriples idxLookup thisNode adjsIn' [] else [] ts2 = mkTriples idxLookup thisNode [] adjsOut' - in ts1 ++ ts2 + in ts1 <> ts2 | isJust maybeSubj && isJust maybePred && isJust maybeObj = let adjsIn' = filter (\(p,idxSubj) -> fromJust (IntMap.lookup idxSubj idxLookup) == fromJust maybeSubj @@ -281,7 +281,7 @@ query' (TPatriciaTree (g,idxLookup,_,_)) maybeSubj maybePred maybeObj = ts2 = if thisNode == fromJust maybeSubj then mkTriples idxLookup thisNode [] adjsOut' else [] - in ts1 ++ ts2 + in ts1 <> ts2 cfun ( _ , _ , _ , _ ) = undefined -- not sure why this pattern is needed to exhaust cfun arg patterns diff --git a/src/Data/RDF/IRI.hs b/src/Data/RDF/IRI.hs index 28fdcc5..0cdd7ce 100644 --- a/src/Data/RDF/IRI.hs +++ b/src/Data/RDF/IRI.hs @@ -16,15 +16,16 @@ module Data.RDF.IRI , serializeIRI , parseIRI, parseRelIRI , validateIRI, resolveIRI + , removeIRIFragment ) where import Data.Semigroup (Semigroup(..)) -import Data.Maybe (fromMaybe, isJust) +import Data.Maybe (maybe, isJust) import Data.Functor import Data.List (intersperse) import Control.Applicative import Control.Monad (guard) -import Control.Arrow ((***), (&&&), (>>>)) +import Control.Arrow (first, (&&&), (>>>)) import Data.Char (isAlpha, isDigit, isAlphaNum, toUpper, toLower) import Data.Text (Text) import qualified Data.Text as T @@ -92,21 +93,24 @@ data SchemaError | MissingColon -- ^ Schemas must be followed by a colon deriving (Show, Eq) +removeIRIFragment :: IRIRef -> IRIRef +removeIRIFragment (IRIRef s a p q _) = IRIRef s a p q Nothing + -- [TODO] use Builder serializeIRI :: IRIRef -> Text serializeIRI (IRIRef s a p q f) = mconcat - [ fromMaybe mempty (scheme <$> s) - , fromMaybe mempty (authority <$> a) + [ maybe mempty scheme s + , maybe mempty authority a , path p - , fromMaybe mempty (query <$> q) - , fromMaybe mempty (fragment <$> f)] + , maybe mempty query q + , maybe mempty fragment f ] where scheme (Scheme s') = s' <> ":" authority (Authority u (Host h) p') = mconcat [ "//" - , fromMaybe mempty (userInfo <$> u) + , maybe mempty userInfo u , h - , fromMaybe mempty (port <$> p') ] + , maybe mempty port p' ] userInfo (UserInfo u) = u <> "@" port (Port p') = (":" <>) . T.pack . show $ p' path (Path p') = p' @@ -123,12 +127,15 @@ parseRelIRI :: Text -> Either String IRIRef parseRelIRI = P.parseOnly $ irelativeRefParser <* (P.endOfInput "Unexpected characters at the end") validateIRI :: Text -> Either String Text -validateIRI t = const t <$> parseIRI t +validateIRI t = t <$ parseIRI t -- | IRI parsing and resolution according to algorithm 5.2 from RFC3986 -- See: http://www.ietf.org/rfc/rfc3986.txt --- [FIXME] Currently, this is a correct but naive implemenation. -resolveIRI :: Text -> Text -> Either String Text +-- [FIXME] Currently, this is a correct but naive implementation. +resolveIRI + :: Text -- ^ Base URI + -> Text -- ^ URI to resolve + -> Either String Text resolveIRI baseIri iri = serializeIRI <$> resolvedIRI where resolvedIRI = either (const resolvedRelativeIRI) resolveAbsoluteIRI (parseIRI iri) @@ -169,8 +176,8 @@ iriParser = do scheme <- Just <$> schemeParser _ <- P.string ":" "Missing colon after scheme" (authority, path) <- ihierPartParser - query <- P.option Nothing (Just <$> iqueryParser) - fragment <- P.option Nothing (Just <$> ifragmentParser) + query <- optional iqueryParser + fragment <- optional ifragmentParser return (IRIRef scheme authority path query fragment) -- ihier-part = "//" iauthority ipath-abempty @@ -194,8 +201,8 @@ ihierPartParser = irelativeRefParser :: Parser IRIRef irelativeRefParser = do (authority, path) <- irelativePartParser - query <- P.option Nothing (Just <$> iqueryParser) - fragment <- P.option Nothing (Just <$> ifragmentParser) + query <- optional iqueryParser + fragment <- optional ifragmentParser return (IRIRef Nothing authority path query fragment) -- irelative-part = "//" iauthority ipath-abempty @@ -212,9 +219,9 @@ irelativePartParser = -- iauthority = [ iuserinfo "@" ] ihost [ ":" port ] iauthorityParser :: Parser Authority iauthorityParser = - Authority <$> P.option Nothing (Just <$> (iuserInfoParser <* P.string "@")) + Authority <$> optional (iuserInfoParser <* P.string "@") <*> ihostParser - <*> P.option Nothing (Just <$> (P.string ":" *> portParser)) + <*> optional (P.string ":" *> portParser) "Authority" -- iuserinfo = *( iunreserved / pct-encoded / sub-delims / ":" ) @@ -270,7 +277,7 @@ ipathRootlessParser' = mconcat <$> sequence [isegmentNzParser, ipathAbEmptyParse -- ipath-empty = 0 ipathEmptyParser :: Parser (Maybe Authority, Path) -ipathEmptyParser = const (Nothing, mempty) <$> ipathEmptyParser' +ipathEmptyParser = (Nothing, mempty) <$ ipathEmptyParser' ipathEmptyParser' :: Parser Text ipathEmptyParser' = P.string mempty "Empty path" @@ -406,7 +413,7 @@ ipV6AddressParser = do h16 = parseBetween 1 4 (P.takeWhile isHexaDigit) ipNotElided (leading, lengthL) = guard (lengthL == 7 && isDecOctet (last leading)) *> partialIpV4 <|> - guard (lengthL == 8) *> pure mempty + (guard (lengthL == 8) $> mempty) ipElided (_, lengthL) = do guard $ lengthL <= 8 elision <- P.string "::" @@ -476,10 +483,10 @@ isSubDelims c = c `elem` ("!$&'()*+,;=" :: String) iauthWithPathParser :: Parser (Maybe Authority, Path) iauthWithPathParser = do void (P.string "//") - curry (Just *** id) <$> iauthorityParser <*> ipathAbEmptyParser + curry (first Just) <$> iauthorityParser <*> ipathAbEmptyParser isHexaDigit :: Char -> Bool -isHexaDigit c = (c >= '0' && c <= '9') || +isHexaDigit c = (isDigit c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') diff --git a/src/Data/RDF/Namespace.hs b/src/Data/RDF/Namespace.hs index afc3eb2..8c48674 100644 --- a/src/Data/RDF/Namespace.hs +++ b/src/Data/RDF/Namespace.hs @@ -6,75 +6,73 @@ module Data.RDF.Namespace( -- * Namespace types and functions Namespace(..), mkPlainNS, mkPrefixedNS, mkPrefixedNS', PrefixMapping(PrefixMapping), PrefixMappings(PrefixMappings), toPMList, - mergePrefixMappings, mkUri, prefixOf, uriOf, -- * Predefined namespace values - rdf, rdfs, dc, dct, owl, xsd, skos, foaf, ex, ex2, + rdf, rdfs, dc, dct, owl, xml, xsd, skos, foaf, ex, ex2, standard_ns_mappings, ns_mappings ) where import qualified Data.Text as T import Data.RDF.Types import qualified Data.Map as Map +import Data.Semigroup ((<>)) standard_namespaces :: [Namespace] standard_namespaces = [rdf, rdfs, dc, dct, owl, xsd, skos, foaf, ex, ex2] -- |The set of common predefined namespaces as a 'PrefixMappings' value. -standard_ns_mappings :: PrefixMappings +standard_ns_mappings :: PrefixMappings standard_ns_mappings = ns_mappings standard_namespaces -- |Takes a list of 'Namespace's and returns 'PrefixMappings'. ns_mappings :: [Namespace] -> PrefixMappings -ns_mappings ns = PrefixMappings $ Map.fromList $ - map (\(PrefixedNS pre uri) -> (pre, uri)) ns +ns_mappings ns = PrefixMappings $ Map.fromList $ + fmap (\(PrefixedNS pre uri) -> (pre, uri)) ns -- |The RDF namespace. -rdf :: Namespace -rdf = mkPrefixedNS' "rdf" "http://www.w3.org/1999/02/22-rdf-syntax-ns#" +rdf :: Namespace +rdf = mkPrefixedNS' "rdf" "http://www.w3.org/1999/02/22-rdf-syntax-ns#" -- |The RDF Schema namespace. rdfs :: Namespace -rdfs = mkPrefixedNS' "rdfs" "http://www.w3.org/2000/01/rdf-schema#" +rdfs = mkPrefixedNS' "rdfs" "http://www.w3.org/2000/01/rdf-schema#" -- |The Dublin Core namespace. -dc :: Namespace -dc = mkPrefixedNS' "dc" "http://purl.org/dc/elements/1.1/" +dc :: Namespace +dc = mkPrefixedNS' "dc" "http://purl.org/dc/elements/1.1/" -- |The Dublin Core terms namespace. -dct :: Namespace -dct = mkPrefixedNS' "dct" "http://purl.org/dc/terms/" +dct :: Namespace +dct = mkPrefixedNS' "dct" "http://purl.org/dc/terms/" -- |The OWL namespace. -owl :: Namespace -owl = mkPrefixedNS' "owl" "http://www.w3.org/2002/07/owl#" +owl :: Namespace +owl = mkPrefixedNS' "owl" "http://www.w3.org/2002/07/owl#" -- |The XML Schema namespace. -xsd :: Namespace -xsd = mkPrefixedNS' "xsd" "http://www.w3.org/2001/XMLSchema#" +xml :: Namespace +xml = mkPrefixedNS' "xml" "http://www.w3.org/XML/1998/namespace" + +-- |The XML Schema namespace. +xsd :: Namespace +xsd = mkPrefixedNS' "xsd" "http://www.w3.org/2001/XMLSchema#" -- |The SKOS namespace. skos :: Namespace -skos = mkPrefixedNS' "skos" "http://www.w3.org/2004/02/skos/core#" +skos = mkPrefixedNS' "skos" "http://www.w3.org/2004/02/skos/core#" -- |The friend of a friend namespace. foaf :: Namespace -foaf = mkPrefixedNS' "foaf" "http://xmlns.com/foaf/0.1/" +foaf = mkPrefixedNS' "foaf" "http://xmlns.com/foaf/0.1/" -- |Example namespace #1. -ex :: Namespace -ex = mkPrefixedNS' "ex" "http://www.example.org/" +ex :: Namespace +ex = mkPrefixedNS' "ex" "http://www.example.org/" -- |Example namespace #2. -ex2 :: Namespace -ex2 = mkPrefixedNS' "ex2" "http://www2.example.org/" - - --- |Perform a left-biased merge of the two sets of prefix mappings. -mergePrefixMappings :: PrefixMappings -> PrefixMappings -> PrefixMappings -mergePrefixMappings (PrefixMappings p1s) (PrefixMappings p2s) = - PrefixMappings $ Map.union p1s p2s +ex2 :: Namespace +ex2 = mkPrefixedNS' "ex2" "http://www2.example.org/" -- |View the prefix mappings as a list of key-value pairs. The PM in -- in the name is to reduce name clashes if used without qualifying. @@ -87,13 +85,13 @@ mkUri ns local = uriOf ns `T.append` local -- |Make a namespace for the given URI reference. -mkPlainNS :: T.Text -> Namespace -mkPlainNS = PlainNS +mkPlainNS :: T.Text -> Namespace +mkPlainNS = PlainNS -- |Make a namespace having the given prefix for the given URI reference, -- respectively. -mkPrefixedNS :: T.Text -> T.Text -> Namespace -mkPrefixedNS = PrefixedNS +mkPrefixedNS :: T.Text -> T.Text -> Namespace +mkPrefixedNS = PrefixedNS -- |Make a namespace having the given prefix for the given URI reference, -- respectively, using strings which will be converted to bytestrings @@ -102,11 +100,11 @@ mkPrefixedNS' :: String -> String -> Namespace mkPrefixedNS' s1 s2 = mkPrefixedNS (T.pack s1) (T.pack s2) -- |Determine the URI of the given namespace. -uriOf :: Namespace -> T.Text -uriOf (PlainNS uri) = uri -uriOf (PrefixedNS _ uri) = uri +uriOf :: Namespace -> T.Text +uriOf (PlainNS uri) = uri +uriOf (PrefixedNS _ uri) = uri -- |Determine the prefix of the given namespace, if it has one. -prefixOf :: Namespace -> Maybe T.Text -prefixOf (PlainNS _) = Nothing -prefixOf (PrefixedNS p _) = Just p +prefixOf :: Namespace -> Maybe T.Text +prefixOf (PlainNS _) = Nothing +prefixOf (PrefixedNS p _) = Just p diff --git a/src/Data/RDF/Query.hs b/src/Data/RDF/Query.hs index 2cbe131..8a34757 100644 --- a/src/Data/RDF/Query.hs +++ b/src/Data/RDF/Query.hs @@ -20,6 +20,8 @@ module Data.RDF.Query ( import Prelude hiding (pred) import Data.List +import Data.Maybe (fromMaybe) +import Data.Semigroup ((<>)) import Data.RDF.Types import qualified Data.RDF.Namespace as NS import Data.Text (Text) @@ -132,7 +134,7 @@ isGraphIsomorphic g1 g2 = Automorphism.isIsomorphic g1' g2' where triples = expandTriples g triplesHashMap :: HashMap (Subject,Predicate) [Object] - triplesHashMap = HashMap.fromListWith (++) [((s,p), [o]) | Triple s p o <- triples] + triplesHashMap = HashMap.fromListWith (<>) [((s,p), [o]) | Triple s p o <- triples] triplesGrouped :: [((Subject,Predicate),[Object])] triplesGrouped = HashMap.toList triplesHashMap (dataGraph,_,_) = (graphFromEdges . fmap (\((s,p),os) -> (s,p,os))) triplesGrouped @@ -156,7 +158,7 @@ expandNode _ n = n -- Also expands "a" to "http://www.w3.org/1999/02/22-rdf-syntax-ns#type". expandURI :: PrefixMappings -> Text -> Text expandURI _ "a" = NS.mkUri NS.rdf "type" -expandURI pms iri = maybe iri id $ foldl' f Nothing (NS.toPMList pms) +expandURI pms iri = fromMaybe iri $ foldl' f Nothing (NS.toPMList pms) where f :: Maybe Text -> (Text, Text) -> Maybe Text f x (p, u) = x <|> (T.append u <$> T.stripPrefix (T.append p ":") iri) diff --git a/src/Data/RDF/Types.hs b/src/Data/RDF/Types.hs index 47b4cde..066ce15 100644 --- a/src/Data/RDF/Types.hs +++ b/src/Data/RDF/Types.hs @@ -40,7 +40,7 @@ module Data.RDF.Types ( PrefixMappings(PrefixMappings),PrefixMapping(PrefixMapping), -- * Supporting types - BaseUrl(BaseUrl), NodeSelector, ParseFailure(ParseFailure) + BaseUrl(..), NodeSelector, ParseFailure(ParseFailure) ) where @@ -188,10 +188,10 @@ uriValidate = either (const Nothing) Just . isRdfURI -- |Same as 'uriValidate', but on 'String' rather than 'Text' uriValidateString :: String -> Maybe String -uriValidateString = liftA T.unpack . uriValidate . fromString +uriValidateString = fmap T.unpack . uriValidate . fromString isRdfURI :: Text -> Either ParseError Text -isRdfURI t = parse (iriFragment <* eof) ("Invalid URI: " ++ T.unpack t) t +isRdfURI t = parse (iriFragment <* eof) ("Invalid URI: " <> T.unpack t) t -- IRIREF from NTriples spec (without <> enclosing) -- [8] IRIREF ::= '<' ([^#x00-#x20<>"{}|^`\] | UCHAR)* '>' @@ -274,9 +274,9 @@ type Triples = [Triple] -- /subj/ must be a 'UNode' or 'BNode', and /pred/ must be a 'UNode'. triple :: Subject -> Predicate -> Object -> Triple triple s p o - | isLNode s = error $ "subject must be UNode or BNode: " ++ show s - | isLNode p = error $ "predicate must be UNode, not LNode: " ++ show p - | isBNode p = error $ "predicate must be UNode, not BNode: " ++ show p + | isLNode s = error $ "subject must be UNode or BNode: " <> show s + | isLNode p = error $ "predicate must be UNode, not LNode: " <> show p + | isBNode p = error $ "predicate must be UNode, not BNode: " <> show p | otherwise = Triple s p o -- |Answer if given node is a URI Ref node. @@ -462,7 +462,7 @@ class RdfSerializer s where -- |The base URL of an RDF. -newtype BaseUrl = BaseUrl Text +newtype BaseUrl = BaseUrl { unBaseUrl :: Text } deriving (Eq, Ord, Show, NFData, Semigroup, Generic) instance Binary BaseUrl @@ -569,8 +569,8 @@ instance Show Namespace where show (PrefixedNS prefix uri) = printf "(PrefixNS %s %s)" (T.unpack prefix) (T.unpack uri) -- |An alias for a map from prefix to namespace URI. -newtype PrefixMappings = PrefixMappings (Map Text Text) - deriving (Eq, Ord,NFData, Generic) +newtype PrefixMappings = PrefixMappings (Map Text Text) + deriving (Eq, Ord, NFData, Semigroup, Monoid, Generic) instance Binary PrefixMappings @@ -579,7 +579,7 @@ instance Show PrefixMappings where -- worth optimizing yet. show (PrefixMappings pmap) = printf "PrefixMappings [%s]" mappingsStr where showPM = show . PrefixMapping - mappingsStr = List.intercalate ", " (map showPM (Map.toList pmap)) + mappingsStr = List.intercalate ", " (fmap showPM (Map.toList pmap)) -- |A mapping of a prefix to the URI for that prefix. newtype PrefixMapping = PrefixMapping (Text, Text) @@ -592,7 +592,7 @@ instance Show PrefixMapping where -- | Resolve a prefix using the given prefix mappings. resolveQName :: Text -> PrefixMappings -> Maybe Text -resolveQName prefix (PrefixMappings pms') = Map.lookup prefix pms' +resolveQName prefix (PrefixMappings pms) = Map.lookup prefix pms {-# INLINE mkAbsoluteUrl #-} {-# DEPRECATED mkAbsoluteUrl "Use resolveIRI instead, because mkAbsoluteUrl is a partial function" #-} @@ -624,12 +624,10 @@ canonicalizerTable = doubleUri = "http://www.w3.org/2001/XMLSchema#double" _integerStr, _decimalStr, _doubleStr :: Text -> Text -_integerStr t = - if T.length t == 1 - then t - else if T.head t == '0' - then _integerStr (T.tail t) - else t +_integerStr t + | T.length t == 1 = t + | T.head t == '0' = _integerStr (T.tail t) + | otherwise = t -- exponent: [eE] ('-' | '+')? [0-9]+ -- ('-' | '+') ? ( [0-9]+ '.' [0-9]* exponent | '.' ([0-9])+ exponent | ([0-9])+ exponent ) @@ -649,12 +647,12 @@ fileSchemeToFilePath (UNode fileScheme) | otherwise = Nothing where textToFilePath = pure . fromString <=< stringToFilePath . T.unpack - stringToFilePath = fixPrefix <=< pure . Network.uriPath <=< Network.parseURI + stringToFilePath = fixPrefix <=< pure . unEscapeString . Network.uriPath <=< Network.parseURI fixPrefix "" = Nothing fixPrefix p@(p':p'') | p' == FP.pathSeparator = Just (FP.normalise p) -- Posix path | p' == '/' = Just (FP.normalise p'') -- Windows classic Path - | otherwise = Just ("\\\\" ++ FP.normalise p) -- Windows UNC Path + | otherwise = Just ("\\\\" <> FP.normalise p) -- Windows UNC Path fileSchemeToFilePath _ = Nothing -- | Converts a file path to a URI with "file:" scheme @@ -663,7 +661,7 @@ filePathToUri p | FP.isRelative p = Nothing | otherwise = Just . fromString . as_uri . FP.normalise $ p where - as_uri = ("file://" ++) . escapeURIString isAllowedInURI . as_posix . fix_prefix + as_uri = ("file://" <>) . escapeURIString isAllowedInURI . as_posix . fix_prefix fix_prefix p' = case (FP.takeDrive p') of "/" -> p' '\\':'\\':_ -> drop 2 p' diff --git a/src/Rdf4hParseMain.hs b/src/Rdf4hParseMain.hs index efd159a..26992a8 100644 --- a/src/Rdf4hParseMain.hs +++ b/src/Rdf4hParseMain.hs @@ -7,6 +7,7 @@ module Main where import Data.RDF +import Data.Semigroup ((<>)) import qualified Data.Text as T import qualified Data.Text.IO as TIO @@ -32,7 +33,7 @@ main = when (null args) (ioError (userError - ("\n\n" ++ "INPUT-URI required\n\n" ++ usageInfo header options))) + ("\n\n" <> "INPUT-URI required\n\n" <> usageInfo header options))) let debug = Debug `elem` opts inputUri = head args inputFormat = getWithDefault (InputFormat "turtle") opts @@ -41,8 +42,8 @@ main = outputBaseUri = getWithDefault (OutputBaseUri inputBaseUri) opts unless (outputFormat == "ntriples" || outputFormat == "turtle") (hPrintf stderr - ("'" ++ - outputFormat ++ + ("'" <> + outputFormat <> "' is not a valid output format. Supported output formats are: ntriples, turtle\n") >> exitWith (ExitFailure 1)) when debug @@ -88,7 +89,7 @@ main = >>= \ (res :: Either ParseFailure (RDF TList)) -> write outputFormat docUri emptyPms res - (str, _) -> putStrLn ("Invalid format: " ++ str) >> exitFailure + (str, _) -> putStrLn ("Invalid format: " <> str) >> exitFailure write :: (Rdf a) => String -> Maybe T.Text -> PrefixMappings -> Either ParseFailure (RDF a) -> IO () write format docUri pms res = case res of @@ -98,7 +99,7 @@ write format docUri pms res = case res of doWriteRdf rdf = case format of "turtle" -> writeRdf (TurtleSerializer docUri pms) rdf "ntriples" -> writeRdf NTriplesSerializer rdf - unknown -> error $ "Unknown output format: " ++ unknown + unknown -> error $ "Unknown output format: " <> unknown -- Get the input base URI from the argument list or flags, using the -- first string arg as the default if not found in string args (as @@ -135,7 +136,7 @@ strValue (InputFormat s) = s strValue (InputBaseUri s) = s strValue (OutputFormat s) = s strValue (OutputBaseUri s) = s -strValue flag = error $ "No string value for flag: " ++ show flag +strValue flag = error $ "No string value for flag: " <> show flag -- The commandline arguments we accept. None are required. data Flag @@ -160,28 +161,28 @@ instance Eq Flag where -- The top part of the usage output. header :: String header = - "\nrdf4h_parse: an RDF parser and serializer\n\n" ++ - "\nUsage: rdf4h_parse [OPTION...] INPUT-URI [INPUT-BASE-URI]\n\n" ++ - " INPUT-URI a filename, URI or '-' for standard input (stdin).\n" ++ - " INPUT-BASE-URI the input/parser base URI or '-' for none.\n" ++ - " Default is INPUT-URI\n" ++ + "\nrdf4h_parse: an RDF parser and serializer\n\n" <> + "\nUsage: rdf4h_parse [OPTION...] INPUT-URI [INPUT-BASE-URI]\n\n" <> + " INPUT-URI a filename, URI or '-' for standard input (stdin).\n" <> + " INPUT-BASE-URI the input/parser base URI or '-' for none.\n" <> + " Default is INPUT-URI\n" <> " Equivalent to -I INPUT-BASE-URI, --input-base-uri INPUT-BASE-URI\n\n" options :: [OptDescr Flag] options = [ Option "h" ["help"] (NoArg Help) "Display this help, then exit" , Option "d" ["debug"] (NoArg Debug) "Print debug info (like INPUT-BASE-URI used, etc.)" - , Option "i" ["input"] (ReqArg InputFormat "FORMAT") $ "Set input format/parser to one of:\n" ++ - " turtle Turtle (default)\n" ++ - " ntriples N-Triples\n" ++ + , Option "i" ["input"] (ReqArg InputFormat "FORMAT") $ "Set input format/parser to one of:\n" <> + " turtle Turtle (default)\n" <> + " ntriples N-Triples\n" <> " xml RDF/XML" - , Option "I" ["input-base-uri"] (ReqArg InputBaseUri "URI") $ "Set the input/parser base URI. '-' for none.\n" ++ + , Option "I" ["input-base-uri"] (ReqArg InputBaseUri "URI") $ "Set the input/parser base URI. '-' for none.\n" <> " Default is INPUT-BASE-URI argument value.\n\n" - , Option "o" ["output"] (ReqArg OutputFormat "FORMAT") $ "Set output format/serializer to one of:\n" ++ - " ntriples N-Triples (default)\n" ++ + , Option "o" ["output"] (ReqArg OutputFormat "FORMAT") $ "Set output format/serializer to one of:\n" <> + " ntriples N-Triples (default)\n" <> " turtle Turtle" - , Option "O" ["output-base-uri"] (ReqArg OutputBaseUri "URI") $ "Set the output format/serializer base URI. '-' for none.\n" ++ + , Option "O" ["output-base-uri"] (ReqArg OutputBaseUri "URI") $ "Set the output format/serializer base URI. '-' for none.\n" <> " Default is input/parser base URI." ] @@ -189,4 +190,4 @@ compilerOpts :: [String] -> IO ([Flag], [String]) compilerOpts argv = case getOpt Permute options argv of (o,n,[] ) -> return (o,n) - (_,_,errs) -> ioError (userError ("\n\n" ++ concat errs ++ usageInfo header options)) + (_,_,errs) -> ioError (userError ("\n\n" <> concat errs <> usageInfo header options)) diff --git a/src/Rdf4hQueryMain.hs b/src/Rdf4hQueryMain.hs index ed07bfd..7ebe05d 100644 --- a/src/Rdf4hQueryMain.hs +++ b/src/Rdf4hQueryMain.hs @@ -12,21 +12,21 @@ data Flag header :: String header = - "\nrdf4h_query: utility for querying RDF data\n\n" ++ - "\nUsage: rdf4h_query [OPTION...] INPUT-URI [INPUT-BASE-URI]\n\n" ++ - " INPUT-URI a filename, URI or '-' for standard input (stdin).\n" ++ - " INPUT-BASE-URI the input/parser base URI or '-' for none.\n" ++ - " Default is INPUT-URI\n" ++ + "\nrdf4h_query: utility for querying RDF data\n\n" <> + "\nUsage: rdf4h_query [OPTION...] INPUT-URI [INPUT-BASE-URI]\n\n" <> + " INPUT-URI a filename, URI or '-' for standard input (stdin).\n" <> + " INPUT-BASE-URI the input/parser base URI or '-' for none.\n" <> + " Default is INPUT-URI\n" <> " Equivalent to -I INPUT-BASE-URI, --input-base-uri INPUT-BASE-URI\n\n" options :: [OptDescr Flag] options = [ Option "h" ["help"] (NoArg Help) "Display this help, then exit" , Option "v" ["verbose"] (NoArg Verbose) "Display extra information messages to stderr" - , Option "i" ["input"] (ReqArg InputFormat "FORMAT") $ "Set input format/parser to one of:\n" ++ - " turtle Turtle (default)\n" ++ + , Option "i" ["input"] (ReqArg InputFormat "FORMAT") $ "Set input format/parser to one of:\n" <> + " turtle Turtle (default)\n" <> " ntriples N-Triples" - , Option "I" ["input-base-uri"] (ReqArg InputBaseUri "URI") $ "Set the input/parser base URI. '-' for none.\n" ++ + , Option "I" ["input-base-uri"] (ReqArg InputBaseUri "URI") $ "Set the input/parser base URI. '-' for none.\n" <> " Default is INPUT-BASE-URI argument value.\n\n" ] diff --git a/src/Text/RDF/RDF4H/NTriplesParser.hs b/src/Text/RDF/RDF4H/NTriplesParser.hs index f1d22ee..2295ca9 100644 --- a/src/Text/RDF/RDF4H/NTriplesParser.hs +++ b/src/Text/RDF/RDF4H/NTriplesParser.hs @@ -13,7 +13,7 @@ module Text.RDF.RDF4H.NTriplesParser import Prelude hiding (readFile) import Data.Semigroup ((<>)) -import Data.Char (isDigit, isLetter, isAlphaNum) +import Data.Char (isDigit, isLetter, isAlphaNum, isAsciiUpper, isAsciiLower) import Control.Applicative import Control.Monad (void) @@ -39,7 +39,7 @@ import System.IO (IOMode(..), withFile, hSetNewlineMode, noNewlineTranslation, h -- class. data NTriplesParser = NTriplesParser -data NTriplesParserCustom = NTriplesParserCustom Parser +newtype NTriplesParserCustom = NTriplesParserCustom Parser -- |'NTriplesParser' is an instance of 'RdfParser' using parsec based parsers. instance RdfParser NTriplesParser where @@ -109,13 +109,13 @@ nt_langtag :: (CharParsing m, Monad m) => m T.Text nt_langtag = do ss <- char '@' *> some (satisfy isLetter) rest <- concat <$> many (char '-' *> some (satisfy isAlphaNum) >>= \lang_str -> pure ('-':lang_str)) - pure (T.pack (ss ++ rest)) + pure (T.pack (ss <> rest)) -- [8] IRIREF nt_iriref :: (CharParsing m, Monad m) => m T.Text nt_iriref = between (char '<') (char '>') $ do raw_iri <- iriFragment - either (const empty) pure (validateIRI raw_iri) "Only absolute IRIs allowed in NTriples format, which this isn't: " ++ show raw_iri + either (const empty) pure (validateIRI raw_iri) "Only absolute IRIs allowed in NTriples format, which this isn't: " <> show raw_iri -- [153s] ECHAR nt_echar :: (CharParsing m, Monad m) => m Char @@ -165,8 +165,8 @@ nt_blank_node_label = do -- [157s] PN_CHARS_BASE ::= [A-Z] | [a-z] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x02FF] | [#x0370-#x037D] | [#x037F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] nt_pn_chars_base :: CharParsing m => m Char nt_pn_chars_base = try $ satisfy isBaseChar - where isBaseChar c = (c >= 'A' && c <= 'Z') - || (c >= 'a' && c <= 'z') + where isBaseChar c = (isAsciiUpper c) + || (isAsciiLower c) || (c >= '\x00C0' && c <= '\x00D6') || (c >= '\x00D8' && c <= '\x00F6') || (c >= '\x00F8' && c <= '\x02FF') @@ -251,11 +251,11 @@ handleAttoparsec :: (Rdf a) => T.Text -> Either ParseFailure (RDF a) handleAttoparsec bs = handleResult $ parse nt_ntripleDoc (T.encodeUtf8 bs) where handleResult res = case res of - Fail _i _contexts err -> Left $ ParseFailure $ "Parse failure: \n" ++ show err + Fail _i _contexts err -> Left $ ParseFailure $ "Parse failure: \n" <> show err -- error $ - -- "\nnot consumed: " ++ show i - -- ++ "\ncontexts: " ++ show contexts - -- ++ "\nerror: " ++ show err + -- "\nnot consumed: " <> show i + -- <> "\ncontexts: " <> show contexts + -- <> "\nerror: " <> show err Partial f -> handleResult (f (T.encodeUtf8 mempty)) Done _ ts -> Right $ mkRdf ts Nothing (PrefixMappings mempty) diff --git a/src/Text/RDF/RDF4H/ParserUtils.hs b/src/Text/RDF/RDF4H/ParserUtils.hs index f4d6dcd..dda1d22 100644 --- a/src/Text/RDF/RDF4H/ParserUtils.hs +++ b/src/Text/RDF/RDF4H/ParserUtils.hs @@ -1,16 +1,31 @@ +{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} -module Text.RDF.RDF4H.ParserUtils( - parseFromURL, - Parser(..) -) where +module Text.RDF.RDF4H.ParserUtils + ( Parser(..) + , parseFromURL + -- RDF + , rdfTypeNode, rdfNilNode, rdfFirstNode, rdfRestNode + , rdfSubjectNode, rdfPredicateNode, rdfObjectNode, rdfStatementNode + , rdfTag, rdfID, rdfAbout, rdfParseType, rdfResource, rdfNodeID, rdfDatatype + , rdfType, rdfLi, rdfListIndex + , rdfDescription, rdfXmlLiteral + , rdfAboutEach, rdfAboutEachPrefix, rdfBagID + -- XML + , xmlLang + -- XSD + , xsdIntUri, xsdDoubleUri, xsdDecimalUri, xsdBooleanUri + ) where import Data.RDF.Types +import Data.RDF.Namespace import Control.Exception.Lifted import Network.HTTP.Conduit import Data.Text.Encoding (decodeUtf8) +import Data.Semigroup ((<>)) import qualified Data.ByteString.Lazy as BS +import Data.Text (Text) import qualified Data.Text as T data Parser = Parsec | Attoparsec @@ -24,15 +39,61 @@ parseFromURL :: (Rdf rdfImpl) => (T.Text -> Either ParseFailure (RDF rdfImpl)) - parseFromURL parseFunc url = do result <- Control.Exception.Lifted.try $ simpleHttp url case result of - Left (ex::HttpException) -> - case ex of + Left (err :: HttpException) -> + case err of (HttpExceptionRequest _req content) -> case content of - ConnectionTimeout -> do + ConnectionTimeout -> return $ errResult "Connection timed out" - _ -> return $ errResult ("HttpExceptionRequest content: " ++ show content) + _ -> return $ errResult ("HttpExceptionRequest content: " <> show content) (InvalidUrlException{}) -> return $ errResult "Invalid URL exception" Right bs -> do let s = decodeUtf8 $ BS.toStrict bs return (parseFunc s) + +rdfTypeNode, rdfNilNode, rdfFirstNode, rdfRestNode :: Node +rdfTypeNode = UNode $ mkUri rdf "type" +rdfNilNode = UNode $ mkUri rdf "nil" +rdfFirstNode = UNode $ mkUri rdf "first" +rdfRestNode = UNode $ mkUri rdf "rest" + +rdfSubjectNode, rdfPredicateNode, rdfObjectNode, rdfStatementNode :: Node +rdfSubjectNode = UNode $ mkUri rdf "subject" +rdfPredicateNode = UNode $ mkUri rdf "predicate" +rdfObjectNode = UNode $ mkUri rdf "object" +rdfStatementNode = UNode $ mkUri rdf "Statement" + +-- Core terms +rdfTag, rdfID, rdfAbout, rdfParseType, rdfResource, rdfNodeID, rdfDatatype :: Text +rdfTag = mkUri rdf "RDF" +rdfID = mkUri rdf "ID" +rdfAbout = mkUri rdf "about" +rdfParseType = mkUri rdf "parseType" +rdfResource = mkUri rdf "resource" +rdfNodeID = mkUri rdf "nodeID" +rdfDatatype = mkUri rdf "datatype" + +rdfType, rdfLi, rdfListIndex :: Text +rdfType = mkUri rdf "type" +rdfLi = mkUri rdf "li" +rdfListIndex = mkUri rdf "_" + +rdfXmlLiteral, rdfDescription :: Text +rdfXmlLiteral = mkUri rdf "XMLLiteral" +rdfDescription = mkUri rdf "Description" + +-- Old terms +rdfAboutEach, rdfAboutEachPrefix, rdfBagID :: Text +rdfAboutEach = mkUri rdf "aboutEach" +rdfAboutEachPrefix = mkUri rdf "aboutEachPrefix" +rdfBagID = mkUri rdf "bagID" + +xmlLang :: Text +xmlLang = mkUri xml "lang" + +xsdIntUri, xsdDoubleUri, xsdDecimalUri, xsdBooleanUri :: Text +xsdIntUri = mkUri xsd "integer" +xsdDoubleUri = mkUri xsd "double" +xsdDecimalUri = mkUri xsd "decimal" +xsdBooleanUri = mkUri xsd "boolean" diff --git a/src/Text/RDF/RDF4H/TurtleParser.hs b/src/Text/RDF/RDF4H/TurtleParser.hs index 675c2e8..ec4e8d6 100644 --- a/src/Text/RDF/RDF4H/TurtleParser.hs +++ b/src/Text/RDF/RDF4H/TurtleParser.hs @@ -8,6 +8,7 @@ module Text.RDF.RDF4H.TurtleParser ( TurtleParser(TurtleParser) , TurtleParserCustom(TurtleParserCustom) + , parseTurtleDebug ) where import Prelude hiding (readFile) @@ -16,20 +17,23 @@ import Data.Char (toLower, toUpper, isDigit, isHexDigit) import qualified Data.Map as Map import Data.Map (Map) import Data.Maybe +import Data.Either +import Data.Semigroup ((<>)) import Data.RDF.Types import Data.RDF.IRI -import Data.RDF.Namespace +import Data.RDF.Graph.TList import Text.RDF.RDF4H.ParserUtils import Text.RDF.RDF4H.NTriplesParser import Text.Parsec (runParser, ParseError) import qualified Data.Text as T import Data.Sequence (Seq, (|>)) +import Data.Functor (($>)) import qualified Data.Foldable as F import Control.Monad import Text.Parser.Char import Text.Parser.Combinators import Text.Parser.LookAhead -import Control.Applicative +import Control.Applicative hiding (empty) import Control.Monad.State.Class import Control.Monad.State.Strict @@ -73,6 +77,9 @@ type ParseState = , Seq Triple -- the triples encountered while parsing; always added to on the right side , Map String Integer ) -- map blank node names to generated id. +parseTurtleDebug :: String -> IO (RDF TList) +parseTurtleDebug f = fromRight empty <$> parseFile (TurtleParserCustom (Just . BaseUrl $ "http://base-url.com/") (Just "http://doc-url.com/") Attoparsec) f + -- grammar rule: [1] turtleDoc t_turtleDoc :: (MonadState ParseState m, CharParsing m, LookAheadParsing m) => m (Seq Triple, PrefixMappings) t_turtleDoc = @@ -176,7 +183,7 @@ t_sparql_base = do updateBaseUrl (Just $ Just newBaseIri) t_verb :: (MonadState ParseState m, CharParsing m, LookAheadParsing m) => m () -t_verb = try t_predicate <|> (char 'a' *> pure rdfTypeNode) >>= setPredicate +t_verb = try t_predicate <|> (char 'a' $> rdfTypeNode) >>= setPredicate -- grammar rule: [11] predicate ::= iri t_predicate :: (MonadState ParseState m, CharParsing m, LookAheadParsing m) => m Node @@ -189,7 +196,7 @@ t_pname_ns = do (_, _, _, pms, _, _, _, _) <- get case resolveQName pre pms of Just n -> pure n - Nothing -> unexpected ("Cannot resolve QName prefix: " ++ T.unpack pre) + Nothing -> unexpected ("Cannot resolve QName prefix: " <> T.unpack pre) -- grammar rules: [168s] PN_LOCAL -- [168s] PN_LOCAL ::= (PN_CHARS_U | ':' | [0-9] | PLX) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX))? @@ -199,9 +206,9 @@ t_pn_local = do xs <- option "" $ try $ do let recsve = (t_pn_chars_str <|> string ":" <|> t_plx) <|> (t_pn_chars_str <|> string ":" <|> t_plx <|> try (string "." <* lookAhead (try recsve))) <|> - (t_pn_chars_str <|> string ":" <|> t_plx <|> try (string "." *> notFollowedBy t_ws *> pure ".")) + (t_pn_chars_str <|> string ":" <|> t_plx <|> try (string "." *> notFollowedBy t_ws $> ".")) concat <$> many recsve - pure (T.pack (x ++ xs)) + pure (T.pack (x <> xs)) where satisfy_str = pure <$> satisfy isDigit t_pn_chars_str = pure <$> t_pn_chars @@ -235,7 +242,7 @@ t_subject = iri <|> t_blankNode <|> t_collection >>= setSubject -- [137s] BlankNode ::= BLANK_NODE_LABEL | ANON t_blankNode :: (CharParsing m, MonadState ParseState m) => m Node t_blankNode = do - genID <- try t_blank_node_label <|> (t_anon *> pure mempty) + genID <- try t_blank_node_label <|> (t_anon $> mempty) mp <- currGenIdLookup maybe (newBN genID) getExistingBN (Map.lookup genID mp) where @@ -297,7 +304,7 @@ t_collection = withConstantSubjectPredicate $ void (many t_ws) return root where - empty_list = lookAhead (char ')') *> return rdfNilNode + empty_list = lookAhead (char ')') $> rdfNilNode non_empty_list = do ns <- sepEndBy1 element (some t_ws) addTripleForObject rdfNilNode @@ -313,18 +320,6 @@ t_collection = withConstantSubjectPredicate $ return bn getSubject = get >>= \(_, _, _, _, s, _, _, _) -> pure s -rdfTypeNode, rdfNilNode, rdfFirstNode, rdfRestNode :: Node -rdfTypeNode = UNode $ mkUri rdf "type" -rdfNilNode = UNode $ mkUri rdf "nil" -rdfFirstNode = UNode $ mkUri rdf "first" -rdfRestNode = UNode $ mkUri rdf "rest" - -xsdIntUri, xsdDoubleUri, xsdDecimalUri, xsdBooleanUri :: T.Text -xsdIntUri = mkUri xsd "integer" -xsdDoubleUri = mkUri xsd "double" -xsdDecimalUri = mkUri xsd "decimal" -xsdBooleanUri = mkUri xsd "boolean" - t_literal :: (MonadState ParseState m, CharParsing m, LookAheadParsing m) => m Node t_literal = LNode <$> try t_rdf_literal <|> @@ -507,7 +502,7 @@ updateBaseUrl val = _modifyState val no no no no no -- combines get_current and increment into a single function nextIdCounter :: MonadState ParseState m => m Integer nextIdCounter = get >>= \(bUrl, dUrl, i, pms, s, p, ts, genMap) -> - put (bUrl, dUrl, i+1, pms, s, p, ts, genMap) *> pure i + put (bUrl, dUrl, i+1, pms, s, p, ts, genMap) $> i nextBlankNode :: MonadState ParseState m => m Node nextBlankNode = BNodeGen . fromIntegral <$> nextIdCounter @@ -570,8 +565,8 @@ addTripleForObject obj = do t <- getTriple s p put (bUrl, dUrl, i, pms, s, p, ts |> t, genMap) where - getTriple Nothing _ = unexpected $ "No Subject with which to create triple for: " ++ show obj - getTriple _ Nothing = unexpected $ "No Predicate with which to create triple for: " ++ show obj + getTriple Nothing _ = unexpected $ "No Subject with which to create triple for: " <> show obj + getTriple _ Nothing = unexpected $ "No Predicate with which to create triple for: " <> show obj getTriple (Just s') (Just p') = pure $ Triple s' p' obj @@ -631,7 +626,7 @@ parseStringAttoparsec bUrl docUrl t = handleResult' $ parse (evalStateT t_turtle where handleResult' res = case res of Fail _ _ err -> -- error err - Left $ ParseFailure $ "Parse failure: \n" ++ show err + Left $ ParseFailure $ "Parse failure: \n" <> show err Partial f -> handleResult' (f mempty) Done _ (ts,pms) -> Right $! mkRdf (F.toList ts) bUrl pms @@ -650,12 +645,12 @@ parseURLAttoparsec bUrl docUrl = parseFromURL (parseStringAttoparsec bUrl docUrl --------------------------------- initialState :: Maybe BaseUrl -> Maybe T.Text -> ParseState -initialState bUrl docUrl = (bUrl, docUrl, 1, PrefixMappings mempty, Nothing, Nothing, mempty, mempty) +initialState bUrl docUrl = (BaseUrl <$> docUrl <|> bUrl, docUrl, 1, PrefixMappings mempty, Nothing, Nothing, mempty, mempty) handleResult :: Rdf a => Maybe BaseUrl -> Either ParseError (Seq Triple, PrefixMappings) -> Either ParseFailure (RDF a) handleResult bUrl result = case result of - (Left err) -> Left (ParseFailure $ "Parse failure: \n" ++ show err) + (Left err) -> Left (ParseFailure $ "Parse failure: \n" <> show err) (Right (ts, pms)) -> Right $! mkRdf (F.toList ts) bUrl pms @@ -668,7 +663,7 @@ caseInsensitiveChar c = char (toLower c) <|> char (toUpper c) -- Match the string 's', accepting either lowercase or uppercase form of each character caseInsensitiveString :: (CharParsing m, Monad m) => String -> m String -caseInsensitiveString s = try (mapM caseInsensitiveChar s) "\"" ++ s ++ "\"" +caseInsensitiveString s = try (mapM caseInsensitiveChar s) "\"" <> s <> "\"" tryIriResolution :: (CharParsing m, Monad m) => Maybe BaseUrl -> Maybe T.Text -> T.Text -> m T.Text tryIriResolution mbUrl mdUrl iriFrag = tryIriResolution' mbUrl mdUrl @@ -676,4 +671,4 @@ tryIriResolution mbUrl mdUrl iriFrag = tryIriResolution' mbUrl mdUrl tryIriResolution' (Just (BaseUrl bIri)) _ = either err pure (resolveIRI bIri iriFrag) tryIriResolution' _ (Just dIri) = either err pure (resolveIRI dIri iriFrag) tryIriResolution' _ _ = either err pure (resolveIRI mempty iriFrag) - err m = unexpected $ "Cannot resolve IRI: " ++ m ++ " " ++ show (mbUrl, mdUrl, iriFrag) + err m = unexpected $ mconcat ["Cannot resolve IRI: ", m, " ", show (mbUrl, mdUrl, iriFrag)] diff --git a/src/Text/RDF/RDF4H/TurtleSerializer.hs b/src/Text/RDF/RDF4H/TurtleSerializer.hs index ace29b1..46ffcd7 100644 --- a/src/Text/RDF/RDF4H/TurtleSerializer.hs +++ b/src/Text/RDF/RDF4H/TurtleSerializer.hs @@ -1,4 +1,4 @@ --- |An RDF serializer for Turtle +-- |An RDF serializer for Turtle -- . module Text.RDF.RDF4H.TurtleSerializer( @@ -23,7 +23,7 @@ data TurtleSerializer = TurtleSerializer (Maybe T.Text) PrefixMappings instance RdfSerializer TurtleSerializer where hWriteRdf (TurtleSerializer docUrl pms) h rdf = _writeRdf h docUrl (addPrefixMappings rdf pms False) writeRdf s = hWriteRdf s stdout - hWriteH (TurtleSerializer _ pms) h rdf = writeHeader h (baseUrl rdf) (mergePrefixMappings (prefixMappings rdf) pms) + hWriteH (TurtleSerializer _ pms) h rdf = writeHeader h (baseUrl rdf) (prefixMappings rdf <> pms) writeH s = hWriteRdf s stdout -- TODO: should use mdUrl to render <> where appropriate hWriteTs (TurtleSerializer docUrl pms) h = writeTriples h docUrl pms @@ -31,7 +31,7 @@ instance RdfSerializer TurtleSerializer where hWriteT (TurtleSerializer docUrl pms) h = writeTriple h docUrl pms writeT s = hWriteT s stdout hWriteN (TurtleSerializer docUrl (PrefixMappings pms)) h n = writeNode h docUrl n pms - writeN s = hWriteN s stdout + writeN s = hWriteN s stdout -- TODO: writeRdf currently merges standard namespace prefix mappings with -- the ones that the RDF already contains, so that if the RDF has none @@ -72,10 +72,10 @@ writeTriples :: Handle -> Maybe T.Text -> PrefixMappings -> Triples -> IO () writeTriples h mdUrl (PrefixMappings pms) ts = mapM_ (writeSubjGroup h mdUrl revPms) (groupBy equalSubjects ts) where - revPms = Map.fromList $ map (\(k,v) -> (v,k)) $ Map.toList pms + revPms = Map.fromList $ (\(k,v) -> (v,k)) <$> Map.toList pms writeTriple :: Handle -> Maybe T.Text -> PrefixMappings -> Triple -> IO () -writeTriple h mdUrl (PrefixMappings pms) t = +writeTriple h mdUrl (PrefixMappings pms) t = w subjectOf >> space >> w predicateOf >> space >> w objectOf where w :: (Triple -> Node) -> IO () @@ -100,9 +100,9 @@ writeSubjGroup h dUrl pms ts@(t:_) = writePredGroup :: Handle -> Maybe T.Text -> Map T.Text T.Text -> Triples -> IO () writePredGroup _ _ _ [] = return () writePredGroup h docUrl pms (t:ts) = - -- The doesn't rule out <> in either the predicate or object (as well as subject), + -- The doesn't rule out <> in either the predicate or object (as well as subject), -- so we pass the docUrl through to writeNode in all cases. - writeNode h docUrl (predicateOf t) pms >> hPutChar h ' ' >> + writeNode h docUrl (predicateOf t) pms >> hPutChar h ' ' >> writeNode h docUrl (objectOf t) pms >> mapM_ (\t' -> hPutStr h ", " >> writeNode h docUrl (objectOf t') pms) ts @@ -131,7 +131,7 @@ _debugPMs pms = mapM_ (\(k, v) -> T.putStr k >> putStr "__" >> T.putStrLn v) (M -- Expects a map from uri to prefix, and returns the (prefix, uri_expansion) -- from the mappings such that uri_expansion is a prefix of uri, or Nothing if --- there is no such mapping. This function does a linear-time search over the +-- there is no such mapping. This function does a linear-time search over the -- map, but the prefix mappings should always be very small, so it's okay for now. findMapping :: Map T.Text T.Text -> T.Text -> Maybe (T.Text, T.Text) findMapping pms uri = diff --git a/src/Text/RDF/RDF4H/XmlParser.hs b/src/Text/RDF/RDF4H/XmlParser.hs index ebd143d..9958fed 100644 --- a/src/Text/RDF/RDF4H/XmlParser.hs +++ b/src/Text/RDF/RDF4H/XmlParser.hs @@ -1,532 +1,654 @@ {-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE DoAndIfThenElse #-} -{-# LANGUAGE DeriveDataTypeable #-} -{-# LANGUAGE RankNTypes #-} -{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE OverloadedLists #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE DoAndIfThenElse #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE LambdaCase #-} -- |An parser for the RDF/XML format -- . module Text.RDF.RDF4H.XmlParser - -- ( - -- XmlParser'(XmlParser') - -- , xmlEg - -- ) -where - -import Text.RDF.RDF4H.ParserUtils (parseFromURL) - -import Debug.Trace -import qualified Control.Applicative as Applicative -import Control.Exception -import Control.Monad -import qualified Data.HashMap.Strict as HashMap + ( XmlParser(..) + , parseXmlDebug + ) where + +import Data.RDF.Types hiding (empty, resolveQName) +import qualified Data.RDF.Types as RDF +import Data.RDF.IRI +import Data.RDF.Graph.TList +import Text.RDF.RDF4H.ParserUtils hiding (Parser) +import Text.RDF.RDF4H.XmlParser.Identifiers + +import Control.Applicative +import Control.Monad +import Control.Monad.Except +import Control.Monad.State.Strict +import Data.Semigroup ((<>)) +import Data.Set (Set) +import qualified Data.Set as S import qualified Data.Map as Map -import Data.Maybe -import Data.RDF.IRI -import Data.RDF.Types -import Data.RDF.Graph.TList +import Data.Maybe +import Data.Either +import Data.Bifunctor +import Data.HashSet (HashSet) +import qualified Data.HashSet as HS +import Data.HashMap.Strict (HashMap) import qualified Data.HashMap.Strict as HM -import Data.Text (Text) -import Data.Text.Encoding +import Data.Text (Text) import qualified Data.Text as T import qualified Data.Text.IO as TIO import qualified Data.Text.Lazy as TL import qualified Data.Text.Encoding as T -import Xmlbf hiding (Node) -import qualified Xmlbf (Node) +import qualified Data.ByteString.Lazy as BL +import qualified Data.ByteString.Builder as BB +import Xmlbf hiding (Node, State) import qualified Xmlbf.Xeno as Xeno - -data XmlParser = XmlParser (Maybe BaseUrl) (Maybe Text) + instance RdfParser XmlParser where - parseString (XmlParser bUrl dUrl) = parseXmlRDF bUrl dUrl - parseFile (XmlParser bUrl dUrl) = parseFile' bUrl dUrl - parseURL (XmlParser bUrl dUrl) = parseURL' bUrl dUrl + parseString (XmlParser bUrl dUrl) = parseXmlRDF bUrl dUrl + parseFile (XmlParser bUrl dUrl) = parseFile' bUrl dUrl + parseURL (XmlParser bUrl dUrl) = parseURL' bUrl dUrl + +-- |Configuration for the XML parser +data XmlParser = XmlParser + (Maybe BaseUrl) + -- ^ The /default/ base URI to parse the document. + (Maybe Text) + -- ^ The /retrieval URI/ of the XML document. + +parseFile' :: (Rdf a) + => Maybe BaseUrl + -> Maybe Text + -> FilePath + -> IO (Either ParseFailure (RDF a)) +parseFile' bUrl dUrl fpath = parseXmlRDF bUrl dUrl <$> TIO.readFile fpath -parseFile' :: - (Rdf a) +parseURL' :: (Rdf a) => Maybe BaseUrl + -- ^ The optional base URI of the document. -> Maybe Text + -- ^ The document URI (i.e., the URI of the document itself); if Nothing, use location URI. -> String + -- ^ The location URI from which to retrieve the XML document. -> IO (Either ParseFailure (RDF a)) -parseFile' bUrl dUrl fpath = - TIO.readFile fpath >>= return . parseXmlRDF bUrl dUrl - -parseURL' :: - (Rdf a) - => Maybe BaseUrl -- ^ The optional base URI of the document. - -> Maybe Text -- ^ The document URI (i.e., the URI of the document itself); if Nothing, use location URI. - -> String -- ^ The location URI from which to retrieve the XML document. - -> IO (Either ParseFailure (RDF a)) -- ^ The parse result, which is either a @ParseFailure@ or the RDF - -- corresponding to the XML document. + -- ^ The parse result, which is either a @ParseFailure@ or the RDF + -- corresponding to the XML document. parseURL' bUrl docUrl = parseFromURL (parseXmlRDF bUrl docUrl) --- -- |Global state for the parser --- data GParseState = GParseState --- { stateGenId :: Int --- } deriving (Show) +-- |The parser monad. +type Parser = ParserT (ExceptT String (State ParseState)) -- |Local state for the parser (dependant on the parent xml elements) -data ParseState = ParseState { stateBaseUrl :: Maybe BaseUrl - , stateLang :: Maybe Text - , stateSubject :: Subject - , stateGenId :: Int - } - deriving(Show) - -data ParserException = ParserException String - deriving (Show) -instance Exception ParserException - -testXeno :: Text -> Either String [Xmlbf.Node] -testXeno = Xeno.nodes . T.encodeUtf8 +data ParseState = ParseState + { stateBaseUri :: Maybe BaseUrl + -- ^ The local base URI. + , stateIdSet :: Set Text + -- ^ The set of @rdf:ID@ found in the scope of the current base URI. + , statePrefixMapping :: PrefixMappings + -- ^ The namespace mapping. + , stateLang :: Maybe Text + -- ^ The local @xml:lang@ + , stateNodeAttrs :: HashMap Text Text + -- ^ Current node RDF attributes. + , stateSubject :: Maybe Subject + -- ^ Current subject for triple construction. + , stateCollectionIndex :: Int + -- ^ Current collection index. + , stateGenId :: Int + } deriving(Show) -- |Parse a xml Text to an RDF representation parseXmlRDF :: (Rdf a) - => Maybe BaseUrl -- ^ The base URL for the RDF if required - -> Maybe Text -- ^ DocUrl: The request URL for the RDF if available - -> Text -- ^ The contents to parse - -> Either ParseFailure (RDF a) -- ^ The RDF representation of the triples or ParseFailure -parseXmlRDF bUrl dUrl xmlStr = - case Xeno.nodes (T.encodeUtf8 xmlStr) of - Left xmlParseError -> Left (ParseFailure xmlParseError) - Right nodes -> -- error (show nodes) - case runParser (rdfParser bUrl dUrl) nodes of - Left rdfParseError -> Left (ParseFailure rdfParseError) - Right rdf -> Right rdf --- TODO: use bUrl and dUrl - -rdfParser :: Rdf a => Maybe BaseUrl -> Maybe Text -> Parser (RDF a) -rdfParser bUrl dUrl = do - let initState = ParseState bUrl Nothing undefined 0 - rdf <- rdfDescription initState - newlines - -- tree <- showTree - -- error (show tree) - void pEndOfInput - return rdf - --- Text "\n" --- TODO: check that all that follows from \n is zero or more ' ' characters. -newline :: Parser () -newline = do - t <- pText - if not $ anyUsefulChars (TL.toStrict t) - then pure () - else pFail "not a newline text node" - where - anyUsefulChars t = - if T.length t == 0 - then False - else - let c = T.head t - in if (c /= '\n' && c /= '\r' && c /= ' ') - then True - else anyUsefulChars (T.tail t) - -newlines :: Parser () -newlines = void (many newline) - -pNodeNot :: Text -> Parser () -pNodeNot t = do - n <- pName - if (n /= t) - then pure () - else pFail ("forbidden element name: " ++ show t) - -{- -[ ("xmlns:si","https://www.w3schools.com/rdf/") -, ("xmlns:rdf","http://www.w3.org/1999/02/22-rdf-syntax-ns#") -] --} -prefixes :: Parser [(Text,Text)] -prefixes = do - xs <- HashMap.toList <$> pAttrs - pure (map (\(k,v) -> (fromJust (T.stripPrefix "xmlns:" k),v)) xs) - -oneAttr :: Parser (Text,Text) -oneAttr = do - xs <- pAttrs - case length (HashMap.toList xs) of - 1 -> pure $ head (HashMap.toList xs) - _ -> pFail "not one attr" - -rdfTriplesP :: ParseState -> Parser (Triples,ParseState) -rdfTriplesP st = do - newlines - pElement "rdf:Description" $ do - newlines - ((subj, reifiedTriples), st') <- subjP st - (ts) <- concat <$> many (predObjP st') - newlines - -- tree <- showTree - -- error (show tree) - void pEndOfInput - pure (ts ++ reifiedTriples,st') - -- pure $ ((map (\(p, o) -> triple subj p o) predObjs ++ reifiedTriples), st') - -{- NOTE: - remember to use `showTree` in the fork of xmlbf when pEndOfInput needs - debugging. --} - -subjP :: ParseState -> Parser ((Node,Triples),ParseState) -subjP st = do - -- void (pNodeNot "rdf:RDF") -- rdfms-rdf-names-use-error-001 - (do - s <- unode <$> pAttr "rdf:about" - pure ((s,[]),st { stateSubject = s } ) - <|> do - -- theId <- pAttr "rdf:ID" - let theBnode = BNodeGen (stateGenId st) - st' = st { stateGenId = stateGenId st + 1} - pure ((theBnode,[]),st')) - --- predObjP :: Parser ((Node,Node)) -predObjP :: ParseState -> Parser Triples -predObjP st = do - void newlines - (do pAnyElement $ do - void (pNodeNot "rdf:Description") -- rdfms-rdf-names-use-error-011 - p <- unode <$> pName - (ts) <- - (do - -- typed literal - theType <- pAttr "rdf:datatype" - theText <- pText - pure [triple (stateSubject st) p ((lnode (typedL (TL.toStrict theText) theType)))]) - <|> - -- blank node - (do (p1,o1) <- oneAttr - -- TODO: increment stateGenId - let bnode = BNodeGen (stateGenId st) - t1 = triple (stateSubject st) p bnode - a = case stateBaseUrl st of - Nothing -> T.pack "" - Just (BaseUrl uri) -> uri - Right txt = resolveIRI a p1 - p2 = unode txt - -- TODO: typed and lang literals - t2 = triple bnode p2 (lnode (plainL o1)) - pure [t1,t2]) - <|> - (do - -- plain literal - theText <- pText - newlines - pure [triple - (stateSubject st) - p - (lnode (plainL (TL.toStrict theText)))]) - newlines - pure ts - <|> - pFail "unable to parse predicate/object pair" - ) - -- TODO: reify triple - --- TODO: unodes, and all different kinds of plain text nodes --- objP :: Parser (Node) --- objP {- st -} = do --- -- unode --- -- xs <- head <$> prefixes --- -- error (show xs) --- -- TODO for: --- -- Element "eg:Creator" --- -- [("eg:named","D\252rst")] --- -- [] --- pure (unode "http://www.example.com") --- <|> do --- -- typed literal --- theType <- pAttr "rdf:datatype" --- theText <- pText --- pure ((lnode (typedL (TL.toStrict theText) theType))) --- <|> do --- -- plain literal --- theText <- pText --- pure (lnode (plainL (TL.toStrict theText))) - - - -rdfDescription' :: ParseState -> Parser (PrefixMappings,Maybe BaseUrl,Triples) -rdfDescription' st = do - newlines - pfixes <- prefixes - (_,(triples,st')) <- pElement' (rdfTriplesP st) - newlines - pure (PrefixMappings (Map.fromList pfixes), Nothing, triples) - -rdfDescription :: Rdf a => ParseState -> Parser (RDF a) -rdfDescription st = do - (pfixes,bUrl,triples) <- pElement "rdf:RDF" (rdfDescription' st) - pure $ mkRdf triples bUrl pfixes - -{- -[ Text "\n" -, Element - "rdf:RDF" - [ ("xmlns:si","https://www.w3schools.com/rdf/") - , ("xmlns:rdf","http://www.w3.org/1999/02/22-rdf-syntax-ns#") - ] - [ Text "\n" - , Element - "rdf:Description" - [ ("rdf:about","https://www.w3schools.com") ] - [ Text "\n" - , Element - "si:title" - [] - [ Text "W3Schools" ] - , Text "\n" - , Element - "si:author" - [] - [ Text "Jan Egil Refsnes" ] - , Text "\n" - ] - ,Text "\n" - ] -,Text "\n" -] --} -xmlEg = T.pack $ unlines - [ "" - , "" - , "" - , "W3Schools" - , "Jan Egil Refsnes" - , "" - , "" - ] - -test1 :: Bool -test1 = triplesOf got == expected + => Maybe BaseUrl + -- ^ The base URI for the RDF if required + -> Maybe Text + -- ^ The request URI for the document to if available + -> Text + -- ^ The contents to parse + -> Either ParseFailure (RDF a) + -- ^ The RDF representation of the triples or ParseFailure +parseXmlRDF bUrl dUrl = parseRdf . parseXml where - Right (got::RDF TList) = parseXmlRDF Nothing Nothing xmlEg - expected = - [ Triple - (UNode "https://www.w3schools.com") - (UNode "si:title") - (LNode (PlainL "W3Schools")) - , Triple - (UNode "https://www.w3schools.com") - (UNode "si:author") - (LNode (PlainL "Jan Egil Refsnes")) - ] - --- missing in Xmlbf - --- | @'pElement'' p@ runs a 'Parser' @p@ inside a element node and --- returns a pair with the name of the parsed element and result of --- @p@. This fails if such element does not exist at the current --- position. + bUrl' = BaseUrl <$> dUrl <|> bUrl + parseXml = Xeno.fromRawXml . T.encodeUtf8 + parseRdf = first ParseFailure . join . second parseRdf' + parseRdf' ns = join $ evalState (runExceptT (parseM rdfParser ns)) initState + initState = ParseState bUrl' mempty mempty empty mempty empty 0 0 + +-- |A parser for debugging purposes. +parseXmlDebug + :: FilePath + -- ^ Path of the file to parse. + -> IO (RDF TList) +parseXmlDebug f = fromRight RDF.empty <$> parseFile (XmlParser (Just . BaseUrl $ "http://base-url.com/") (Just "http://doc-url.com/")) f + +-- |Document parser +rdfParser :: Rdf a => Parser (RDF a) +rdfParser = do + bUri <- currentBaseUri + triples <- (pRdf <* pWs) <|> pNodeElementList + pEndOfInput + mkRdf triples bUri <$> currentPrefixMappings + +-- |Parser for @rdf:RDF@, if present. +-- See: https://www.w3.org/TR/rdf-syntax-grammar/#RDF +pRdf :: Parser Triples +pRdf = pAnyElement $ do + attrs <- pRDFAttrs + uri <- pName >>= pQName + guard (uri == rdfTag) + unless (null attrs) $ throwError "rdf:RDF: The set of attributes should be empty." + pNodeElementList + +-- |Parser for XML QName: resolve the namespace with the mapping in context. -- --- Leading whitespace is ignored. If you need to preserve that whitespace for --- some reason, capture it using 'pText' before using 'pElement''. +-- Throws an error if the namespace is not defined. +pQName :: Text -> Parser Text +pQName qn = do + pm <- currentPrefixMappings + let qn' = resolveQName pm qn >>= validateIRI + either throwError pure qn' + +-- |Process the attributes of an XML element. +-- +-- To be called __once__ per XML element. +pRDFAttrs :: Parser (HashMap Text Text) +pRDFAttrs = do + -- Language (xml:lang) + liftA2 (<|>) pLang currentLang >>= setLang + -- Base URI (xml:base) + liftA2 (<|>) pBase currentBaseUri >>= setBaseUri + bUri <- currentBaseUri + -- Process the rest of the attributes + attrs <- pAttrs + -- Get the namespace definitions (xmlns:) + pm <- updatePrefixMappings (PrefixMappings $ HM.foldlWithKey' mkNameSpace mempty attrs) + -- Filter and resolve RDF attributes + let as = HM.foldlWithKey' (mkRdfAttribute pm bUri) mempty attrs + setNodeAttrs as + pure as + where + -- |Check if an XML attribute is a namespace definition + -- and if so add it to the mapping. + mkNameSpace + :: Map.Map Text Text + -- ^ Current namespace mapping + -> Text + -- ^ XML attribute to process + -> Text + -- ^ Value of the attribute + -> Map.Map Text Text + mkNameSpace ns qn iri = + let qn' = parseQName qn + ns' = f <$> qn' <*> validateIRI iri + f (Nothing , "xmlns") iri' = Map.insert mempty iri' ns + f (Just "xmlns", prefix ) iri' = Map.insert prefix iri' ns + f _ _ = ns + in either (const ns) id ns' + -- |Check if an XML attribute is an RDF attribute + -- and if so resolve its URI and keep it. + mkRdfAttribute + :: PrefixMappings + -- ^ Namespace mapping + -> Maybe BaseUrl + -- ^ Base URI + -> HM.HashMap Text Text + -- ^ Current set of RDF attributes + -> Text + -- ^ XML attribute to process + -> Text + -- ^ Value of the attribute + -> HM.HashMap Text Text + mkRdfAttribute pm bUri as qn v = + let as' = parseQName qn >>= f + -- [NOTE] Ignore XML reserved names + f (Nothing, n) + | T.isPrefixOf "xml" n = Right as + | otherwise = case bUri of + Nothing -> Right as -- [FIXME] manage missing base URI + Just (BaseUrl bUri') -> (\a -> HM.insert a v as) <$> resolveIRI bUri' n + f qn'@(Just prefix, _) + | T.isPrefixOf "xml" prefix = Right as + | otherwise = (\a -> HM.insert a v as) <$> resolveQName' pm qn' + in either (const as) id as' + +-- |Return the value of the requested RDF attribute using its URI. -- --- Consumes the element from the parser state. -pElement' :: Parser a -> Parser (T.Text,a) -pElement' p = do - res <- p - name <- pName - return (name,res) - -pText' :: TL.Text -> Parser TL.Text -pText' t = do - let pTextFail = pFail ("Missing text node " ++ show t) - (do t' <- pText - if t == t' then pure t - else pTextFail - <|> pTextFail) - - --- parser combinators missing in Xmlbf -between :: Parser a -> Parser b -> Parser c -> Parser c -between open close thing = open *> thing <* close - -manyTill :: Parser a -> Parser end -> Parser [a] -manyTill thing z = many thing <* z - -(<|>) :: Parser a -> Parser a -> Parser a -(<|>) a b = a Applicative.<|> b - -some :: Parser a -> Parser [a] -some = Applicative.some - -many :: Parser a -> Parser [a] -many = Applicative.many - --- pElem :: Text -> Parser Text --- oneOf :: Parser [a] -> Parser a --- noneOf :: Parser [a] -> Parser a - - ---------------------------- --- Example trees from xeno - --- data/xml/example07.rdf -{- -[ Text "\n" -, Element "rdf:RDF" - [ ("xmlns:dc","http://purl.org/dc/elements/1.1/") - , ("xmlns:ex","http://example.org/stuff/1.0/") - , ("xmlns:rdf","http://www.w3.org/1999/02/22-rdf-syntax-ns#") - ] - [ Text "\n " - , Element "rdf:Description" - [ ("rdf:about","http://www.w3.org/TR/rdf-syntax-grammar") - , ("dc:title","RDF/XML Syntax Specification (Revised)") - ] - [ Text "\n " - , Element "ex:editor" - [] - [ Text "\n " - , Element "rdf:Description" - [ ("ex:fullName","Dave Beckett") - ] - [ Text "\n " - , Element "ex:homePage" - [ ("rdf:resource","http://purl.org/net/dajobe/") - ] - [] - , Text "\n " - ] - , Text "\n " - ] - , Text "\n " - ] - , Text "\n" - ] -, Text "\n" -] --} - - -{- rdf-tests/rdf-xml/amp-in-url/test001.rdf - -[ Text "\n\n" -, Element "rdf:RDF" - [("xmlns:rdf","http://www.w3.org/1999/02/22-rdf-syntax-ns#") - ] - [ Text "\n\n " - , Element "rdf:Description" - [("rdf:about","http://example/q?abc=1&def=2") - ] - [ Text "\n " - , Element "rdf:value" - [] - [Text "xxx"] - , Text "\n " - ] - , Text "\n\n" - ] -, Text "\n" -] --} - -{- "rdf-tests/rdf-xml/rdfms-rdf-names-use/error-001.rdf" - -[ Text "\n\n\n" -, Element "rdf:RDF" - [("xmlns:rdf","http://www.w3.org/1999/02/22-rdf-syntax-ns#")] - [ Text "\n " - , Element "rdf:RDF" - [] - [] - , Text "\n" - ] -, Text "\n" -] - --} - - - -{- "rdf-tests/rdf-xml/rdfms-rdf-names-use/error-011.rdf" -Description is forbidden as a property element name. - -[ Text "\n\n\n" -, Element "rdf:RDF" - [("xmlns:rdf","http://www.w3.org/1999/02/22-rdf-syntax-ns#")] - [ Text "\n " - , Element "rdf:Description" - [("rdf:about","http://example.org/node1")] - [ Text "\n " - , Element "rdf:Description" - [("rdf:resource","http://example.org/node2")] - [] - , Text "\n " - ] - , Text "\n" - ] -, Text "\n" -] - --} - -{- "rdf-tests/rdf-xml/rdf-charmod-literals/test001.rdf" - -[ Text "\n\n\n" -, Element "rdf:RDF" - [("xmlns:eg","http://example.org/"),("xmlns:rdf","http://www.w3.org/1999/02/22-rdf-syntax-ns#")] - [ Text "\n \n\n " - , Element "rdf:Description" - [("rdf:about","http://www.w3.org/TR/2002/WD-charmod-20020220")] - [ Text "\n\n \n " - , Element "eg:Creator" - [("eg:named","D\252rst")] - [] - , Text "\n \n\n " - ] - , Text "\n" - ] -, Text "\n" -] - --} - -{- rdf-tests/rdf-xml/rdf-charmod-uris/test001.rdf - -[ Text "\r\n\r\n\r\n" -, Element "rdf:RDF" - [("xmlns:eg","http://example.org/#"),("xmlns:rdf","http://www.w3.org/1999/02/22-rdf-syntax-ns#")] - [ Text "\r\n\r\n \r\n\r\n " - , Element "rdf:Description" - [("rdf:about","http://example.org/#Andr\233")] - [ Text "\r\n " - , Element "eg:owes" - [] - [Text "2000"] - , Text "\r\n " - ] - , Text "\r\n" - ] -, Text "\r\n" -] - --} - -{- "rdf-tests/rdf-xml/rdf-charmod-uris/test002.rdf" - -[ Text "\r\n\r\n" -, Element "rdf:RDF" - [("xmlns:eg","http://example.org/#"),("xmlns:rdf","http://www.w3.org/1999/02/22-rdf-syntax-ns#")] - [ Text "\r\n \r\n \r\n\r\n " - , Element "rdf:Description" - [("rdf:about","http://example.org/#Andr%C3%A9")] - [ Text "\r\n " - , Element "eg:owes" - [] - [Text "2000"] - , Text "\r\n " - ] - , Text "\r\n" - ] -, Text " \r\n" -] - --} +-- Fails if the attribute is not defined. +pRDFAttr :: Text -> Parser Text +pRDFAttr a = do + as <- currentNodeAttrs + maybe + (fail . mconcat $ ["Attribute \"", T.unpack a, "\" not found."]) + pure + (HM.lookup a as) + +-- See: https://www.w3.org/TR/rdf-syntax-grammar/#nodeElementList +pNodeElementList :: Parser Triples +pNodeElementList = pWs *> (mconcat <$> some (keepState pNodeElement <* pWs)) + +-- |White spaces parser +-- See: https://www.w3.org/TR/rdf-syntax-grammar/#ws +pWs :: Parser () +pWs = maybe True (T.all ws . TL.toStrict) <$> optional pText >>= guard + where + -- See: https://www.w3.org/TR/2000/REC-xml-20001006#NT-S + ws c = c == '\x20' || c == '\x09' || c == '\x0d' || c == '\x0a' + +-- https://www.w3.org/TR/rdf-syntax-grammar/#nodeElement +pNodeElement :: Parser Triples +pNodeElement = pAnyElement $ do + -- Process attributes + void pRDFAttrs + -- Process URI, subject and @rdf:type@. + (s, mt) <- pSubject + ts1 <- pPropertyAttrs s + -- Process propertyEltList + ts2 <- keepState pPropertyEltList + setSubject (Just s) + let ts = ts1 <> ts2 + pure $ maybe ts (:ts) mt + +-- |Process the following parts of a @nodeElement@: URI, subject and @rdf:type@. +-- See: https://www.w3.org/TR/rdf-syntax-grammar/#nodeElement +pSubject :: Parser (Node, Maybe Triple) +pSubject = do + -- Create the subject + -- [TODO] check the attributes that only one of the following may work + s <- pUnodeId <|> pBnode <|> pUnode <|> pBnodeGen + setSubject (Just s) + -- Resolve URI + uri <- pName >>= pQName + -- Check that the URI is allowed + unless (checkNodeUri uri) (throwError $ "URI not allowed: " <> T.unpack uri) + -- Optional rdf:type triple + mtype <- optional (pType1 s uri) + pure (s, mtype) + where + checkNodeUri uri = isNotCoreSyntaxTerm uri && uri /= rdfLi && isNotOldTerm uri + pUnodeId = (pIdAttr >>= mkUNodeID) <* removeNodeAttr rdfID + pBnode = (BNode <$> pNodeIdAttr) <* removeNodeAttr rdfNodeID + pUnode = (unode <$> pAboutAttr) <* removeNodeAttr rdfAbout + -- Default subject: a new blank node + pBnodeGen = newBNode + pType1 n uri = + if uri /= rdfDescription + then pure $ Triple n rdfTypeNode (unode uri) + else empty + +-- See: https://www.w3.org/TR/rdf-syntax-grammar/#propertyAttr +pPropertyAttrs :: Node -> Parser Triples +pPropertyAttrs s = do + attrs <- currentNodeAttrs + HM.elems <$> HM.traverseWithKey f attrs + where + f attr value + | not (isPropertyAttrURI attr) = throwError $ "URI not allowed for attribute: " <> T.unpack attr + | attr == rdfType = pure $ Triple s rdfTypeNode (unode value) + | otherwise = do + lang <- currentLang + pure $ let mkLiteral = maybe plainL (flip plainLL) lang + in Triple s (unode attr) (lnode (mkLiteral value)) + +pLang :: Parser (Maybe Text) +pLang = optional (pAttr "xml:lang") + +-- [TODO] resolve base uri in context +pBase :: Parser (Maybe BaseUrl) +pBase = optional $ do + uri <- pAttr "xml:base" + -- Parse and remove fragment + BaseUrl <$> either + throwError + (pure . serializeIRI . removeIRIFragment) + (parseIRI uri) + +-- See: https://www.w3.org/TR/rdf-syntax-grammar/#propertyEltList +pPropertyEltList :: Parser Triples +pPropertyEltList = pWs + *> resetCollectionIndex + *> fmap mconcat (many (pPropertyElt <* pWs)) + +-- See: https://www.w3.org/TR/rdf-syntax-grammar/#propertyElt +pPropertyElt :: Parser Triples +pPropertyElt = pAnyElement $ do + -- Process attributes + void pRDFAttrs + -- Process the predicate from the URI + uri <- pName >>= pQName >>= listExpansion + unless (isPropertyAttrURI uri) (throwError $ "URI not allowed for propertyElt: " <> T.unpack uri) + let p = unode uri + -- Process 'propertyElt' + pParseTypeLiteralPropertyElt p + <|> pParseTypeResourcePropertyElt p + <|> pParseTypeCollectionPropertyElt p + <|> pParseTypeOtherPropertyElt p + <|> pResourcePropertyElt p + <|> pLiteralPropertyElt p + <|> pEmptyPropertyElt p + where + listExpansion u + | u == rdfLi = nextCollectionIndex + | otherwise = pure u + +-- See: https://www.w3.org/TR/rdf-syntax-grammar/#resourcePropertyElt +pResourcePropertyElt :: Node -> Parser Triples +pResourcePropertyElt p = do + pWs + -- [NOTE] We need to restore part of the state after exploring the element' children. + (ts1, o) <- keepState $ liftA2 (,) pNodeElement currentSubject + pWs + mi <- optional pIdAttr <* removeNodeAttr rdfID + -- No other attribute is allowed. + checkAllowedAttributes [] + -- Generated triple + s <- currentSubject + let mt = flip Triple p <$> s <*> o + -- Reify the triple + ts2 <- maybe (pure mempty) (uncurry reifyTriple) (liftA2 (,) mi mt) + pure $ maybe (ts1 <> ts2) (:(ts1 <> ts2)) mt + +-- See: https://www.w3.org/TR/rdf-syntax-grammar/#literalPropertyElt +pLiteralPropertyElt :: Node -> Parser Triples +pLiteralPropertyElt p = do + l <- pText + -- No children + pChildren >>= guard . null + mi <- optional pIdAttr <* removeNodeAttr rdfID + checkAllowedAttributes [rdfDatatype] + dt <- optional pDatatypeAttr + s <- currentSubject + lang <- currentLang + -- Generated triple + let l' = TL.toStrict l + o = lnode . fromMaybe (plainL l') $ (typedL l' <$> dt) <|> (plainLL l' <$> lang) + mt = (\s' -> Triple s' p o) <$> s + -- Reify the triple + ts <- maybe (pure mempty) (uncurry reifyTriple) (liftA2 (,) mi mt) + pure $ maybe ts (:ts) mt + +-- See: https://www.w3.org/TR/rdf-syntax-grammar/#parseTypeLiteralPropertyElt +pParseTypeLiteralPropertyElt :: Node -> Parser Triples +pParseTypeLiteralPropertyElt p = do + pt <- pRDFAttr rdfParseType + guard (pt == "Literal") + mi <- optional pIdAttr <* removeNodeAttr rdfID + checkAllowedAttributes [rdfParseType] + l <- pXMLLiteral + -- Generated triple + s <- currentSubject + let o = lnode (typedL l rdfXmlLiteral) + mt = (\s' -> Triple s' p o) <$> s + -- Reify the triple + ts <- maybe (pure mempty) (uncurry reifyTriple) (liftA2 (,) mi mt) + pure $ maybe ts (:ts) mt + +-- See: https://www.w3.org/TR/rdf-syntax-grammar/#parseTypeResourcePropertyElt +pParseTypeResourcePropertyElt :: Node -> Parser Triples +pParseTypeResourcePropertyElt p = do + pt <- pRDFAttr rdfParseType + guard (pt == "Resource") + mi <- optional pIdAttr <* removeNodeAttr rdfID + checkAllowedAttributes [rdfParseType] + -- Generated triple + s <- currentSubject + o <- newBNode + let mt = (\s' -> Triple s' p o) <$> s + -- Reify the triple + ts1 <- maybe (pure mempty) (uncurry reifyTriple) (liftA2 (,) mi mt) + setSubject (Just o) + -- Explore children + ts2 <- keepCollectionIndex pPropertyEltList + --setSubject s + pure $ maybe (ts1 <> ts2) ((<> ts2) . (:ts1)) mt + +-- See: https://www.w3.org/TR/rdf-syntax-grammar/#parseTypeCollectionPropertyElt +pParseTypeCollectionPropertyElt :: Node -> Parser Triples +pParseTypeCollectionPropertyElt p = do + pt <- pRDFAttr rdfParseType + guard (pt == "Collection") + mi <- optional pIdAttr <* removeNodeAttr rdfID + checkAllowedAttributes [rdfParseType] + s <- currentSubject + case s of + Nothing -> pure mempty + Just s' -> do + r <- optional pNodeElement + case r of + Nothing -> + -- Empty collection + let t = Triple s' p rdfNilNode + in ([t] <>) <$> maybe (pure mempty) (`reifyTriple` t) mi + Just ts1 -> do + -- Non empty collection + s'' <- currentSubject + n <- newBNode + -- Triples corresping to the first item + let t = Triple s' p n + ts2 = maybe mempty (\s''' -> [t, Triple n rdfFirstNode s''']) s'' + -- Process next item + ts3 <- go n + -- Reify triple + ts4 <- maybe (pure mempty) (`reifyTriple` t) mi + pure $ mconcat [ts1, ts2, ts3, ts4] + where + go s = do + -- Generate the triples of the current item. + r <- optional pNodeElement + case r of + -- End of the collection + Nothing -> pure [Triple s rdfRestNode rdfNilNode] + -- Add the item to the collection and process the next item + Just ts1 -> do + s' <- currentSubject + n <- newBNode + let ts2 = maybe mempty (\s'' -> [Triple s rdfRestNode n, Triple n rdfFirstNode s'']) s' + -- Next item + ts3 <- go n + pure $ mconcat [ts1, ts2, ts3] + +-- See: https://www.w3.org/TR/rdf-syntax-grammar/#parseTypeOtherPropertyElt +pParseTypeOtherPropertyElt :: Node -> Parser Triples +pParseTypeOtherPropertyElt _p = do + pt <- pRDFAttr rdfParseType + guard (pt /= "Resource" && pt /= "Literal" && pt /= "Collection") + checkAllowedAttributes [rdfParseType] + _mi <- optional pIdAttr <* removeNodeAttr rdfID + -- [FIXME] Implement 'parseTypeOtherPropertyElt' + throwError "Not implemented: rdf:parseType = other" + +-- See: https://www.w3.org/TR/rdf-syntax-grammar/#emptyPropertyElt +pEmptyPropertyElt :: Node -> Parser Triples +pEmptyPropertyElt p = do + s <- currentSubject + case s of + Nothing -> pure mempty + Just s' -> do + mi <- optional pIdAttr <* removeNodeAttr rdfID + o <- pResourceAttr' <|> pNodeIdAttr' <|> newBNode + let t = Triple s' p o + -- Reify triple + ts1 <- maybe (pure mempty) (`reifyTriple` t) mi + ts2 <- pPropertyAttrs o + pure (t:ts1 <> ts2) + where + pResourceAttr' = unode <$> pResourceAttr <* removeNodeAttr rdfResource + pNodeIdAttr' = BNode <$> pNodeIdAttr <* removeNodeAttr rdfNodeID + +checkAllowedAttributes :: HashSet Text -> Parser () +checkAllowedAttributes as = do + attrs <- currentNodeAttrs + let diff = HS.difference (HM.keysSet attrs) as + unless (null diff) (throwError $ "Attributes not allowed: " <> show diff) + +-- See: https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-xmlliteral, +-- https://www.w3.org/TR/rdf-syntax-grammar/#literal +pXMLLiteral :: Parser Text +pXMLLiteral = + T.decodeUtf8 . BL.toStrict . BB.toLazyByteString . encode <$> pChildren + +pIdAttr :: Parser Text +pIdAttr = do + i <- pRDFAttr rdfID + i' <- either throwError pure (checkRdfId i) + -- Check the uniqueness of the ID in the context of the current base URI. + checkIdIsUnique i' + pure i' + +checkIdIsUnique :: Text -> Parser () +checkIdIsUnique i = do + notUnique <- S.member i <$> currentIdSet + when notUnique (throwError $ "rdf:ID already used in this context: " <> T.unpack i) + updateIdSet i + +pNodeIdAttr :: Parser Text +pNodeIdAttr = do + i <- pRDFAttr rdfNodeID + either throwError pure (checkRdfId i) + +pAboutAttr :: Parser Text +pAboutAttr = pRDFAttr rdfAbout >>= checkIRI "rdf:about" + +pResourceAttr :: Parser Text +pResourceAttr = pRDFAttr rdfResource >>= checkIRI "rdf:resource" + +pDatatypeAttr :: Parser Text +pDatatypeAttr = pRDFAttr rdfDatatype >>= checkIRI "rdf:datatype" + +reifyTriple :: Text -> Triple -> Parser Triples +reifyTriple i (Triple s p' o) = do + n <- mkUNodeID i + pure [ Triple n rdfTypeNode rdfStatementNode + , Triple n rdfSubjectNode s + , Triple n rdfPredicateNode p' + , Triple n rdfObjectNode o ] + +-------------------------------------------------------------------------------- +-- URI checks + +checkIRI :: String -> Text -> Parser Text +checkIRI msg iri = do + bUri <- maybe mempty unBaseUrl <$> currentBaseUri + case uriValidate iri of + Nothing -> throwError $ mconcat ["Malformed IRI for \"", msg, "\": ", T.unpack iri] + Just iri' -> either throwError pure (resolveIRI bUri iri') + +-- https://www.w3.org/TR/rdf-syntax-grammar/#propertyAttributeURIs +isPropertyAttrURI :: Text -> Bool +isPropertyAttrURI uri + = isNotCoreSyntaxTerm uri + && uri /= rdfDescription + && uri /= rdfLi + && isNotOldTerm uri + +-- https://www.w3.org/TR/rdf-syntax-grammar/#coreSyntaxTerms +isNotCoreSyntaxTerm :: Text -> Bool +isNotCoreSyntaxTerm uri + = uri /= rdfTag && uri /= rdfID && uri /= rdfAbout + && uri /= rdfParseType && uri /= rdfResource + && uri /= rdfNodeID && uri /= rdfDatatype + +-- https://www.w3.org/TR/rdf-syntax-grammar/#oldTerms +isNotOldTerm :: Text -> Bool +isNotOldTerm uri = uri /= rdfAboutEach + && uri /= rdfAboutEachPrefix + && uri /= rdfBagID + +-------------------------------------------------------------------------------- +-- Parser's state utils + +-- |Create a new unique blank node +newBNode :: Parser Node +newBNode = do + modify $ \st -> st { stateGenId = stateGenId st + 1 } + BNodeGen . stateGenId <$> get + +-- |Process a parser, restoring the state except for stateGenId and stateIdSet +keepState :: Parser a -> Parser a +keepState p = do + st <- get + let bUri = stateBaseUri st + is = stateIdSet st + p <* do + st' <- get + let i = stateGenId st' + bUri' = stateBaseUri st' + is' = stateIdSet st' + -- Update the set of ID if necessary + if bUri /= bUri' + then put (st { stateGenId = i }) + else put (st { stateGenId = i, stateIdSet = is <> is' }) + +currentIdSet :: Parser (Set Text) +currentIdSet = stateIdSet <$> get + +updateIdSet :: Text -> Parser () +updateIdSet i = do + is <- currentIdSet + modify (\st -> st { stateIdSet = S.insert i is }) + +currentNodeAttrs :: Parser (HashMap Text Text) +currentNodeAttrs = stateNodeAttrs <$> get + +setNodeAttrs :: HashMap Text Text -> Parser () +setNodeAttrs as = modify (\st -> st { stateNodeAttrs = as }) + +removeNodeAttr :: Text -> Parser () +removeNodeAttr a = HM.delete a <$> currentNodeAttrs >>= setNodeAttrs + +currentPrefixMappings :: Parser PrefixMappings +currentPrefixMappings = statePrefixMapping <$> get + +updatePrefixMappings :: PrefixMappings -> Parser PrefixMappings +updatePrefixMappings pm = do + pm' <- (<> pm) <$> currentPrefixMappings + modify (\st -> st { statePrefixMapping = pm' }) + pure pm' + +currentCollectionIndex :: Parser Int +currentCollectionIndex = stateCollectionIndex <$> get + +setCollectionIndex :: Int -> Parser () +setCollectionIndex i = modify (\st -> st { stateCollectionIndex = i }) + +keepCollectionIndex :: Parser a -> Parser a +keepCollectionIndex p = do + i <- currentCollectionIndex + p <* setCollectionIndex i + +-- See: https://www.w3.org/TR/rdf-syntax-grammar/#section-List-Expand +nextCollectionIndex :: Parser Text +nextCollectionIndex = do + modify $ \st -> st { stateCollectionIndex = stateCollectionIndex st + 1 } + (rdfListIndex <>) . T.pack . show . stateCollectionIndex <$> get + +resetCollectionIndex :: Parser () +resetCollectionIndex = modify $ \st -> st { stateCollectionIndex = 0 } + +currentBaseUri :: Parser (Maybe BaseUrl) +currentBaseUri = stateBaseUri <$> get + +setBaseUri :: (Maybe BaseUrl) -> Parser () +setBaseUri u = modify (\st -> st { stateBaseUri = u }) + +mkUNodeID :: Text -> Parser Node +mkUNodeID t = mkUnode <$> currentBaseUri + where + mkUnode = unode . \case + Nothing -> t + Just (BaseUrl u) -> mconcat [u, "#", t] + +currentSubject :: Parser (Maybe Subject) +currentSubject = stateSubject <$> get + +setSubject :: (Maybe Subject) -> Parser () +setSubject s = modify (\st -> st { stateSubject = s }) + +currentLang :: Parser (Maybe Text) +currentLang = stateLang <$> get + +setLang :: (Maybe Text) -> Parser () +setLang lang = modify (\st -> st { stateLang = lang }) diff --git a/src/Text/RDF/RDF4H/XmlParser/Identifiers.hs b/src/Text/RDF/RDF4H/XmlParser/Identifiers.hs new file mode 100644 index 0000000..6592525 --- /dev/null +++ b/src/Text/RDF/RDF4H/XmlParser/Identifiers.hs @@ -0,0 +1,107 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE TupleSections #-} + +module Text.RDF.RDF4H.XmlParser.Identifiers + ( -- rdf:ID validation + checkRdfId + -- Qualified names + , resolveQName, resolveQName' + , parseQName + ) where + + +import Data.Functor ((<$)) +import Control.Applicative (liftA2, Alternative(..)) +import Data.Text (Text) +import qualified Data.Text as T +import qualified Data.Map as Map +import Data.Attoparsec.Text (Parser, ()) +import qualified Data.Attoparsec.Text as P + +import Data.RDF.Namespace + +-------------------------------------------------------------------------------- +-- rdf:ID + +-- |Validate the value of @rdf:ID@. +-- +-- See: https://www.w3.org/TR/rdf-syntax-grammar/#rdf-id +checkRdfId + :: Text + -- ^ Value of a @rdf:ID@ attribute to validate. + -> Either String Text +checkRdfId t = t <$ parseId t + +parseId :: Text -> Either String Text +parseId = P.parseOnly $ pNCName <* (P.endOfInput "Unexpected characters at the end") + +-------------------------------------------------------------------------------- +-- Qualified names + +-- |Parse and resolve a qualified name. +-- +-- See: https://www.w3.org/TR/xml-names/#ns-qualnames +resolveQName + :: PrefixMappings + -- ^ Namespace mapping to resolve q qualified name. + -> Text + -- ^ Raw qualified name to process. + -> Either String Text +resolveQName pm qn = parseQName qn >>= resolveQName' pm + +-- |Resolve a qualified name. +resolveQName' + :: PrefixMappings + -- ^ Namespace mapping to resolve q qualified name. + -> (Maybe Text, Text) + -- ^ (namespace, local name) + -> Either String Text +resolveQName' (PrefixMappings pm) (Nothing, name) = + case Map.lookup mempty pm of + Nothing -> Left $ mconcat ["Cannot resolve QName \"", T.unpack name, "\": no default namespace defined."] + Just iri -> Right $ iri <> name +resolveQName' (PrefixMappings pm) (Just prefix, name) = + case Map.lookup prefix pm of + Nothing -> Left $ mconcat ["Cannot resolve QName: prefix \"", T.unpack prefix, "\" not defined"] + Just iri -> Right $ iri <> name + +-- |Parse a qualified name. +-- +-- See: https://www.w3.org/TR/xml-names/#ns-qualnames +parseQName :: Text -> Either String (Maybe Text, Text) +parseQName = P.parseOnly $ pQName <* (P.endOfInput "Unexpected characters at the end of a QName") + +-- https://www.w3.org/TR/xml-names/#ns-qualnames +-- https://www.w3.org/TR/xml-names/#NT-QName +pQName :: Parser (Maybe Text, Text) +pQName = pPrefixedName <|> pUnprefixedNamed + where pUnprefixedNamed = (empty,) <$> pLocalPart + +-- https://www.w3.org/TR/xml-names/#NT-PrefixedName +pPrefixedName :: Parser (Maybe Text, Text) +pPrefixedName = do + prefix <- pLocalPart <* P.char ':' + localPart <- pLocalPart + pure (Just prefix, localPart) + +-- https://www.w3.org/TR/xml-names/#NT-LocalPart +pLocalPart :: Parser Text +pLocalPart = pNCName + +-- http://www.w3.org/TR/REC-xml-names/#NT-NCName +pNCName :: Parser Text +pNCName = liftA2 T.cons pNameStartChar pNameRest + where + pNameStartChar = P.satisfy isValidFirstCharId + pNameRest = P.takeWhile isValidRestCharId + isValidFirstCharId c + = ('A' <= c && c <= 'Z') || c == '_' || ('a' <= c && c <= 'z') + || ('\xC0' <= c && c <= '\xD6') || ('\xD8' <= c && c <= '\xF6') + || ('\xF8' <= c && c <= '\x2FF') || ('\x370' <= c && c <= '\x37D') + || ('\x37F' <= c && c <= '\x1FFF') || ('\x200C' <= c && c <= '\x200D') + || ('\x2070' <= c && c <= '\x218F') || ('\x2C00' <= c && c <= '\x2FEF') + || ('\x3001' <= c && c <= '\xD7FF') || ('\xF900' <= c && c <= '\xFDCF') + || ('\xFDF0' <= c && c <= '\xFFFD') || ('\x10000' <= c && c <= '\xEFFFF') + isValidRestCharId c = isValidFirstCharId c + || c == '-' || c == '.' || ('0' <= c && c <= '9') + || ('\x0300' <= c && c <= '\x036F') || ('\x203F' <= c && c <= '\x2040') diff --git a/src/Text/RDF/RDF4H/XmlParserHXT.hs b/src/Text/RDF/RDF4H/XmlParserHXT.hs index 843ff78..8bec792 100644 --- a/src/Text/RDF/RDF4H/XmlParserHXT.hs +++ b/src/Text/RDF/RDF4H/XmlParserHXT.hs @@ -21,7 +21,7 @@ import Data.List (isPrefixOf) import qualified Data.Map as Map (fromList) import Data.Maybe import Data.Typeable -import Text.RDF.RDF4H.ParserUtils +import Text.RDF.RDF4H.ParserUtils hiding (rdfType) import Data.RDF.IRI import Data.RDF.Types (Rdf,RDF,RdfParser(..),Node(BNodeGen),BaseUrl(..),Triple(..),Triples,Subject,Predicate,Object,PrefixMappings(..),ParseFailure(ParseFailure),mkRdf,lnode,plainL,plainLL,typedL,unode,bnode,unodeValidate) import Data.Text (Text) @@ -220,7 +220,7 @@ isMetaAttr = isA (== "rdf:about") -- -- And that specifically: -- --- +-- -- foo -- -- @@ -278,7 +278,7 @@ parsePredicatesFromChildren = updateState , second hasPredicateAttr :-> (defaultA <+> (mkBlankNode &&& arr id >>> arr2A parsePredicateAttr)) , this :-> defaultA ] - + -- See: Issue http://www.w3.org/2000/03/rdf-tracking/#rdfms-rdf-names-use -- section: Illegal or unusual use of names from the RDF namespace -- @@ -317,7 +317,7 @@ validPropElementName = proc (state,predXml) -> do parseObjectsFromChildren :: forall a. (ArrowIf a, ArrowXml a, ArrowState GParseState a) => LParseState -> Predicate -> a XmlTree Triple parseObjectsFromChildren s p = - choiceA + choiceA [ isText :-> (neg( isWhiteSpace) >>> getText >>> arr (Triple (stateSubject s) p . mkLiteralNode s)) , isElem :-> (parseObjectDescription) ] @@ -405,10 +405,8 @@ validNodeElementName = neg (hasName "rdf:RDF") >>> neg (hasName "rdf:aboutEach") >>> neg (hasName "rdf:aboutEachPrefix") -rdfXmlLiteral :: Text rdfFirst,rdfRest,rdfNil,rdfType,rdfStatement,rdfSubject,rdfPredicate,rdfObject :: Node -rdfXmlLiteral = T.pack "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral" rdfFirst = (unode . T.pack) "rdf:first" rdfRest = (unode . T.pack) "rdf:rest" rdfNil = (unode . T.pack) "rdf:nil" @@ -466,7 +464,7 @@ xmlName str = go [] str else Nothing isValid c = isAlphaNum c || '_' == c - -- || '-' == c + -- '-' == c || '.' == c || ':' == c diff --git a/stack.yaml b/stack.yaml index 666abb9..d755ed7 100644 --- a/stack.yaml +++ b/stack.yaml @@ -2,12 +2,10 @@ resolver: lts-12.1 compiler-check: newer-minor packages: - '.' -# - /home/rob/code/haskell/xmlbf/xmlbf -# - /home/rob/code/haskell/xmlbf/xmlbf-xeno extra-deps: - hgal-2.0.0.2 -- git: git@gitlab.com:k0001/xmlbf.git - commit: f46b96a401ac2ef6f30f8939c9f7bf92f38df383 - subdirs: - - xmlbf - - xmlbf-xeno \ No newline at end of file + - git: git@gitlab.com:k0001/xmlbf.git + commit: ce65be6366c0aaafcc282268d767c52380e459e6 + subdirs: + - xmlbf + - xmlbf-xeno diff --git a/testsuite/tests/Data/RDF/IRITests.hs b/testsuite/tests/Data/RDF/IRITests.hs index 09d0664..3b5197a 100644 --- a/testsuite/tests/Data/RDF/IRITests.hs +++ b/testsuite/tests/Data/RDF/IRITests.hs @@ -1,6 +1,3 @@ ---{-# LANGUAGE DeriveGeneric #-} ---{-# LANGUAGE TypeFamilies #-} ---{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE OverloadedStrings #-} module Data.RDF.IRITests diff --git a/testsuite/tests/Data/RDF/PropertyTests.hs b/testsuite/tests/Data/RDF/PropertyTests.hs index f4417d9..9205d59 100644 --- a/testsuite/tests/Data/RDF/PropertyTests.hs +++ b/testsuite/tests/Data/RDF/PropertyTests.hs @@ -11,6 +11,7 @@ import Data.RDF.Namespace hiding (rdf) import qualified Data.Text as T import Test.QuickCheck import Data.List +import Data.Semigroup ((<>)) import qualified Data.Set as Set import qualified Data.Map as Map import Control.Monad @@ -105,7 +106,7 @@ instance Arbitrary PrefixMappings where arbitraryBaseUrl :: Gen BaseUrl arbitraryBaseUrl = oneof $ - map + fmap (return . BaseUrl . T.pack) ["http://example.org/", "http://example.com/a", "http://asdf.org/b", "http://asdf.org/c"] @@ -131,7 +132,7 @@ arbitraryPrefixMappings = p_empty :: Rdf rdf => RDF rdf -> Bool -p_empty empty = triplesOf empty == [] +p_empty empty = null (triplesOf empty) -- triplesOf any RDF should return unique triples used to create it p_mkRdf_triplesOf @@ -445,7 +446,7 @@ p_remove_triple_from_singleton_graph_query_s :: (Rdf rdf) => RDF rdf -> SingletonGraph rdf -> Bool p_remove_triple_from_singleton_graph_query_s _unused singletonGraph = - query newGr (Just s) Nothing Nothing == [] + null (query newGr (Just s) Nothing Nothing) where tripleInGraph@(Triple s _p _o) = head (triplesOf (rdfGraph singletonGraph)) newGr = removeTriple (rdfGraph singletonGraph) tripleInGraph @@ -457,7 +458,7 @@ p_remove_triple_from_singleton_graph_query_p :: (Rdf rdf) => RDF rdf -> SingletonGraph rdf -> Bool p_remove_triple_from_singleton_graph_query_p _unused singletonGraph = - query newGr Nothing (Just p) Nothing == [] + null (query newGr Nothing (Just p) Nothing) where tripleInGraph@(Triple _s p _o) = head (triplesOf (rdfGraph singletonGraph)) newGr = removeTriple (rdfGraph singletonGraph) tripleInGraph @@ -469,7 +470,7 @@ p_remove_triple_from_singleton_graph_query_o :: (Rdf rdf) => RDF rdf -> SingletonGraph rdf -> Bool p_remove_triple_from_singleton_graph_query_o _unused singletonGraph = - query newGr Nothing Nothing (Just o) == [] + null (query newGr Nothing Nothing (Just o)) where tripleInGraph@(Triple _s _p o) = head (triplesOf (rdfGraph singletonGraph)) newGr = removeTriple (rdfGraph singletonGraph) tripleInGraph @@ -482,10 +483,10 @@ p_add_then_remove_triples p_add_then_remove_triples _empty genTriples = let emptyGraph = _empty populatedGraph = - foldr (\t gr -> addTriple gr t) emptyGraph genTriples + foldr (flip addTriple) emptyGraph genTriples emptiedGraph = - foldr (\t gr -> removeTriple gr t) populatedGraph genTriples - in triplesOf emptiedGraph == [] + foldr (flip removeTriple) populatedGraph genTriples + in null (triplesOf emptiedGraph) equivNode :: (Node -> Node -> Bool) -> (Triple -> Node) @@ -546,7 +547,7 @@ tripleFromGen tripleFromGen _triplesOf rdf = if null ts then return Nothing - else oneof $ map (return . Just) ts + else oneof $ fmap (return . Just) ts where ts = _triplesOf rdf @@ -560,12 +561,13 @@ languages :: [T.Text] languages = [T.pack "fr", T.pack "en"] datatypes :: [T.Text] -datatypes = map (mkUri xsd . T.pack) ["string", "int", "token"] +datatypes = fmap (mkUri xsd . T.pack) ["string", "int", "token"] uris :: [T.Text] -uris = - map (mkUri ex) [T.pack n `T.append` T.pack (show (i::Int)) | n <- ["foo", "bar", "quz", "zak"], i <- [0..2]] - ++ [T.pack "ex:" `T.append` T.pack n `T.append` T.pack (show (i::Int)) | n <- ["s", "p", "o"], i <- [1..3]] +uris = [mkUri ex (n <> T.pack (show (i :: Int))) + | n <- ["foo", "bar", "quz", "zak"], i <- [0 .. 2]] + <> ["ex:" <> n <> T.pack (show (i::Int)) + | n <- ["s", "p", "o"], i <- [1..3]] plainliterals :: [LValue] plainliterals = [plainLL lit lang | lit <- litvalues, lang <- languages] @@ -574,16 +576,16 @@ typedliterals :: [LValue] typedliterals = [typedL lit dtype | lit <- litvalues, dtype <- datatypes] litvalues :: [T.Text] -litvalues = map T.pack ["hello", "world", "peace", "earth", "", "haskell"] +litvalues = fmap T.pack ["hello", "world", "peace", "earth", "", "haskell"] unodes :: [Node] -unodes = map UNode uris +unodes = fmap UNode uris bnodes :: [ Node] -bnodes = map (BNode . \i -> T.pack ":_genid" `T.append` T.pack (show (i::Int))) [1..5] +bnodes = fmap (BNode . \i -> T.pack ":_genid" <> T.pack (show (i::Int))) [1..5] lnodes :: [Node] -lnodes = [LNode lit | lit <- plainliterals ++ typedliterals] +lnodes = [LNode lit | lit <- plainliterals <> typedliterals] -- maximum number of triples maxN :: Int @@ -601,11 +603,10 @@ instance Arbitrary Triple where arbitrary = do s <- arbitraryS p <- arbitraryP - o <- arbitraryO - return (triple s p o) + triple s p <$> arbitraryO instance Arbitrary Node where - arbitrary = oneof $ map return unodes + arbitrary = oneof $ fmap return unodes arbitraryTs :: Gen Triples arbitraryTs = do @@ -613,9 +614,9 @@ arbitraryTs = do sequence [arbitrary | _ <- [1 .. n]] arbitraryS, arbitraryP, arbitraryO :: Gen Node -arbitraryS = oneof $ map return $ unodes ++ bnodes -arbitraryP = oneof $ map return unodes -arbitraryO = oneof $ map return $ unodes ++ bnodes ++ lnodes +arbitraryS = oneof $ fmap return $ unodes <> bnodes +arbitraryP = oneof $ fmap return unodes +arbitraryO = oneof $ fmap return $ unodes <> bnodes <> lnodes ---------------------------------------------------- -- Unit test cases -- diff --git a/testsuite/tests/Test.hs b/testsuite/tests/Test.hs index 9fb0053..96c4c52 100644 --- a/testsuite/tests/Test.hs +++ b/testsuite/tests/Test.hs @@ -25,13 +25,13 @@ suiteFilesDirXml = "rdf-tests/rdf-xml/" suiteFilesDirNTriples = "rdf-tests/ntriples/" mfPathTurtle,mfPathXml,mfPathNTriples :: T.Text -mfPathTurtle = T.concat [suiteFilesDirTurtle, "manifest.ttl"] -mfPathXml = T.concat [suiteFilesDirXml, "manifest.ttl"] -mfPathNTriples = T.concat [suiteFilesDirNTriples, "manifest.ttl"] +mfPathTurtle = mconcat [suiteFilesDirTurtle, "manifest.ttl"] +mfPathXml = mconcat [suiteFilesDirXml, "manifest.ttl"] +mfPathNTriples = mconcat [suiteFilesDirNTriples, "manifest.ttl"] mfBaseURITurtle,mfBaseURIXml,mfBaseURINTriples :: BaseUrl -mfBaseURITurtle = BaseUrl "http://www.w3.org/2013/TurtleTests/" -mfBaseURIXml = BaseUrl "http://www.w3.org/2013/RDFXMLTests/" +mfBaseURITurtle = W3CTurtleTest.mfBaseURITurtle +mfBaseURIXml = W3CRdfXmlTest.mfBaseURIXml mfBaseURINTriples = BaseUrl "http://www.w3.org/2013/N-TriplesTests/" main :: IO () @@ -41,9 +41,8 @@ main = do dir <- getCurrentDirectory let fileSchemeUri suitesDir = fromJust . filePathToUri $ (dir T.unpack suitesDir) - turtleManifest <- - loadManifest mfPathTurtle (fileSchemeUri suiteFilesDirTurtle) - xmlManifest <- loadManifest mfPathXml (fileSchemeUri suiteFilesDirXml) + turtleManifest <- loadManifest mfPathTurtle (unBaseUrl mfBaseURITurtle) + xmlManifest <- loadManifest mfPathXml (unBaseUrl mfBaseURIXml) nTriplesManifest <- loadManifest mfPathNTriples (fileSchemeUri suiteFilesDirNTriples) -- run tests @@ -61,7 +60,12 @@ main = do (graphTests "AdjHashMap" (empty :: RDF AdjHashMap) - (mkRdf :: Triples -> Maybe BaseUrl -> PrefixMappings -> RDF AdjHashMap))] + (mkRdf :: Triples -> Maybe BaseUrl -> PrefixMappings -> RDF AdjHashMap)) + , + (graphTests + "AlgebraicGraph" + (empty :: RDF AlgebraicGraph) + (mkRdf :: Triples -> Maybe BaseUrl -> PrefixMappings -> RDF AlgebraicGraph))] , testGroup "graph-impl-unit-tests" @@ -95,15 +99,15 @@ main = do "parser-w3c-tests-turtle" [ testGroup "parser-w3c-tests-turtle-parsec" - [W3CTurtleTest.testsParsec turtleManifest] + [W3CTurtleTest.testsParsec (dir T.unpack suiteFilesDirTurtle) turtleManifest] , testGroup "parser-w3c-tests-turtle-attoparsec" - [W3CTurtleTest.testsAttoparsec turtleManifest] + [W3CTurtleTest.testsAttoparsec (dir T.unpack suiteFilesDirTurtle) turtleManifest] ] , testGroup "parser-w3c-tests-xml" - [ W3CRdfXmlTest.tests xmlManifest + [ W3CRdfXmlTest.tests (dir T.unpack suiteFilesDirXml) xmlManifest ] ] ) diff --git a/testsuite/tests/Text/RDF/RDF4H/TurtleParser_ConformanceTest.hs b/testsuite/tests/Text/RDF/RDF4H/TurtleParser_ConformanceTest.hs index 62cdc02..382c039 100644 --- a/testsuite/tests/Text/RDF/RDF4H/TurtleParser_ConformanceTest.hs +++ b/testsuite/tests/Text/RDF/RDF4H/TurtleParser_ConformanceTest.hs @@ -4,6 +4,7 @@ module Text.RDF.RDF4H.TurtleParser_ConformanceTest ( tests ) where +import Data.Semigroup ((<>)) -- Testing imports import Test.Tasty import Test.Tasty.HUnit as TU @@ -48,7 +49,7 @@ fpath :: String -> Int -> String -> String fpath name i ext = printf "data/ttl/conformance/%s-%02d.%s" name i ext :: String tests :: [TestTree] -tests = ts1 ++ ts2 ++ ts3 +tests = ts1 <> ts2 <> ts3 where ts1 = fmap checkGoodConformanceTest [0..29] ts2 = fmap checkBadConformanceTest [0..15] ts3 = fmap (uncurry checkGoodOtherTest) otherTestFiles @@ -72,7 +73,7 @@ doGoodConformanceTest expGr inGr testname = let t1 = assertLoadSuccess (printf "expected (%s): " testname) expGr t2 = assertLoadSuccess (printf " input (%s): " testname) inGr t3 = assertEquivalent testname expGr inGr - in testGroup (printf "conformance-%s" testname) $ map (uncurry testCase) [("loading-expected-graph-data", t1), ("loading-input-graph-data", t2), ("comparing-graphs", t3)] + in testGroup (printf "conformance-%s" testname) $ fmap (uncurry testCase) [("loading-expected-graph-data", t1), ("loading-input-graph-data", t2), ("comparing-graphs", t3)] checkBadConformanceTest :: Int -> TestTree checkBadConformanceTest i = @@ -82,21 +83,21 @@ checkBadConformanceTest i = -- Determines if graphs are equivalent, returning Nothing if so or else a diagnostic message. -- First graph is expected graph, second graph is actual. equivalent :: Rdf a => Either ParseFailure (RDF a) -> Either ParseFailure (RDF a) -> Maybe String -equivalent (Left e) _ = Just $ "Parse failure of the expected graph: " ++ show e -equivalent _ (Left e) = Just $ "Parse failure of the input graph: " ++ show e +equivalent (Left e) _ = Just $ "Parse failure of the expected graph: " <> show e +equivalent _ (Left e) = Just $ "Parse failure of the input graph: " <> show e equivalent (Right gr1) (Right gr2) = checkSize <|> (test $! zip gr1ts gr2ts) where gr1ts = uordered $ triplesOf gr1 gr2ts = uordered $ triplesOf gr2 length1 = length gr1ts length2 = length gr2ts - checkSize = if (length1 == length2) then Nothing else (Just $ "Size different. Expected: " ++ (show length1) ++ ", got: " ++ (show length2)) + checkSize = if (length1 == length2) then Nothing else (Just $ "Size different. Expected: " <> (show length1) <> ", got: " <> (show length2)) test [] = Nothing test ((t1,t2):ts) = maybe (test ts) pure (compareTriple t1 t2) compareTriple t1@(Triple s1 p1 o1) t2@(Triple s2 p2 o2) = if equalNodes s1 s2 && equalNodes p1 p2 && equalNodes o1 o2 then Nothing - else Just ("Expected:\n " ++ show t1 ++ "\nFound:\n " ++ show t2 ++ "\n") + else Just ("Expected:\n " <> show t1 <> "\nFound:\n " <> show t2 <> "\n") -- I'm not sure it's right to compare blank nodes with generated -- blank nodes. This is because parsing an already generated blank @@ -140,14 +141,14 @@ assertLoadSuccess, assertLoadFailure :: String -> IO (Either ParseFailure (RDF T assertLoadSuccess idStr exprGr = do g <- exprGr case g of - Left (ParseFailure err) -> TU.assertFailure $ idStr ++ err + Left (ParseFailure err) -> TU.assertFailure $ idStr <> err Right _ -> return () assertLoadFailure idStr exprGr = do g <- exprGr case g of Left _ -> return () - Right _ -> TU.assertFailure $ "Bad test " ++ idStr ++ " loaded successfully." + Right _ -> TU.assertFailure $ "Bad test " <> idStr <> " loaded successfully." assertEquivalent :: Rdf a => String -> IO (Either ParseFailure (RDF a)) -> IO (Either ParseFailure (RDF a)) -> TU.Assertion assertEquivalent testname r1 r2 = do @@ -155,7 +156,7 @@ assertEquivalent testname r1 r2 = do gr2 <- r2 case equivalent gr1 gr2 of Nothing -> return () - (Just msg) -> fail $ "Graph " ++ testname ++ " not equivalent to expected:\n" ++ msg + (Just msg) -> fail $ "Graph " <> testname <> " not equivalent to expected:\n" <> msg mkDocUrl :: Text -> String -> Int -> Maybe Text mkDocUrl baseDocUrl fname testNum = Just . fromString $ printf "%s%s-%02d.ttl" baseDocUrl fname testNum diff --git a/testsuite/tests/Text/RDF/RDF4H/XmlParser_Test.hs b/testsuite/tests/Text/RDF/RDF4H/XmlParser_Test.hs index 8d8f979..530dfdc 100644 --- a/testsuite/tests/Text/RDF/RDF4H/XmlParser_Test.hs +++ b/testsuite/tests/Text/RDF/RDF4H/XmlParser_Test.hs @@ -7,9 +7,9 @@ module Text.RDF.RDF4H.XmlParser_Test -- todo: QuickCheck tests +import Data.Semigroup ((<>)) -- Testing imports import Test.Tasty -import Test.Tasty.HUnit import Test.Tasty.HUnit as TU -- Import common libraries to facilitate tests @@ -34,8 +34,8 @@ tests = , testCase "NML2" test_parseXmlRDF_NML2 , testCase "NML3" test_parseXmlRDF_NML3 ] - ++ - map (uncurry checkGoodOtherTest) otherTestFiles + <> + fmap (uncurry checkGoodOtherTest) otherTestFiles otherTestFiles :: [(String, String)] otherTestFiles = [ ("data/xml", "example07") @@ -70,8 +70,8 @@ loadExpectedGraph1 fname = do loadInputGraph1 :: String -> String -> IO (Either ParseFailure (RDF TList)) loadInputGraph1 dir fname = - TIO.readFile (printf "%s/%s.rdf" dir fname :: String) >>= - return . parseString (XmlParser Nothing (mkDocUrl1 testBaseUri fname)) >>= return . handleLoad + (parseString (XmlParser Nothing (mkDocUrl1 testBaseUri dir fname)) <$> + TIO.readFile (printf "%s/%s.rdf" dir fname :: String)) doGoodConformanceTest :: IO (Either ParseFailure (RDF TList)) -> IO (Either ParseFailure (RDF TList)) -> @@ -80,7 +80,7 @@ doGoodConformanceTest expGr inGr testname = let t1 = assertLoadSuccess (printf "expected (%s): " testname) expGr t2 = assertLoadSuccess (printf " input (%s): " testname) inGr t3 = assertEquivalent testname expGr inGr - in testGroup (printf "conformance-%s" testname) $ map (uncurry testCase) [("loading-expected-graph-data", t1), ("loading-input-graph-data", t2), ("comparing-graphs", t3)] + in testGroup (printf "conformance-%s" testname) $ fmap (uncurry testCase) [("loading-expected-graph-data", t1), ("loading-input-graph-data", t2), ("comparing-graphs", t3)] mkTextNode :: T.Text -> Node mkTextNode = lnode . plainL @@ -90,7 +90,7 @@ testParse exRDF ex = case parsed of Right result -> assertBool - ("expected: " ++ show ex ++ "but got: " ++ show result) + ("expected: " <> show ex <> "but got: " <> show result) (isIsomorphic (result :: RDF TList) (ex :: RDF TList)) Left (ParseFailure err) -> assertFailure err @@ -174,7 +174,7 @@ test_parseXmlRDF_vCardPersonal :: Assertion test_parseXmlRDF_vCardPersonal = testParse "\ - \\ + \\ \Corky Crystal\ \Corks\ \\ @@ -353,7 +353,7 @@ assertEquivalent testname r1 r2 = do gr2 <- r2 case equivalent gr1 gr2 of Nothing -> return () - (Just msg) -> fail $ "Graph " ++ testname ++ " not equivalent to expected:\n" ++ msg + (Just msg) -> fail $ "Graph " <> testname <> " not equivalent to expected:\n" <> msg -- Determines if graphs are equivalent, returning Nothing if so or else a diagnostic message. -- First graph is expected graph, second graph is actual. @@ -372,13 +372,13 @@ equivalent (Right gr1) (Right gr2) = test $! zip gr1ts gr2ts compareTriple t1 t2 = if equalNodes s1 s2 && equalNodes p1 p2 && equalNodes o1 o2 then Nothing - else Just ("Expected:\n " ++ show t1 ++ "\nFound:\n " ++ show t2 ++ "\n") + else Just ("Expected:\n " <> show t1 <> "\nFound:\n " <> show t2 <> "\n") where (s1, p1, o1) = f t1 (s2, p2, o2) = f t2 f t = (subjectOf t, predicateOf t, objectOf t) - -- equalNodes (BNode fs1) (BNodeGen i) = T.reverse fs1 == T.pack ("_:genid" ++ show i) - -- equalNodes (BNode fs1) (BNodeGen i) = fs1 == T.pack ("_:genid" ++ show i) + -- equalNodes (BNode fs1) (BNodeGen i) = T.reverse fs1 == T.pack ("_:genid" <> show i) + -- equalNodes (BNode fs1) (BNodeGen i) = fs1 == T.pack ("_:genid" <> show i) -- I'm not sure it's right to compare blank nodes with generated -- blank nodes. This is because parsing an already generated blank @@ -403,20 +403,20 @@ assertLoadSuccess :: String -> IO (Either ParseFailure (RDF TList)) -> TU.Assert assertLoadSuccess idStr exprGr = do g <- exprGr case g of - Left (ParseFailure err) -> TU.assertFailure $ idStr ++ err + Left (ParseFailure err) -> TU.assertFailure $ idStr <> err Right _ -> return () -- assertLoadFailure idStr exprGr = do -- g <- exprGr -- case g of -- Left _ -> return () --- Right _ -> TU.assertFailure $ "Bad test " ++ idStr ++ " loaded successfully." +-- Right _ -> TU.assertFailure $ "Bad test " <> idStr <> " loaded successfully." handleLoad :: Either ParseFailure (RDF TList) -> Either ParseFailure (RDF TList) handleLoad res = case res of l@(Left _) -> l - (Right gr) -> Right $ mkRdf (map normalize (triplesOf gr)) (baseUrl gr) (prefixMappings gr) + (Right gr) -> Right $ mkRdf (fmap normalize (triplesOf gr)) (baseUrl gr) (prefixMappings gr) normalize :: Triple -> Triple normalize t = let s' = normalizeN $ subjectOf t @@ -424,12 +424,12 @@ normalize t = let s' = normalizeN $ subjectOf t o' = normalizeN $ objectOf t in triple s' p' o' normalizeN :: Node -> Node -normalizeN (BNodeGen i) = BNode (T.pack $ "_:genid" ++ show i) +normalizeN (BNodeGen i) = BNode (T.pack $ "_:genid" <> show i) normalizeN n = n -- The Base URI to be used for all conformance tests: testBaseUri :: String testBaseUri = "http://www.w3.org/2001/sw/DataAccess/df1/tests/" -mkDocUrl1 :: String -> String -> Maybe T.Text -mkDocUrl1 baseDocUrl fname = Just $ T.pack $ printf "%s%s.rdf" baseDocUrl fname +mkDocUrl1 :: String -> String -> String -> Maybe T.Text +mkDocUrl1 baseDocUrl dir fname = Just . T.pack $ printf "%s/%s/%s.rdf" baseDocUrl dir fname diff --git a/testsuite/tests/W3C/Manifest.hs b/testsuite/tests/W3C/Manifest.hs index 736db43..cd46877 100644 --- a/testsuite/tests/W3C/Manifest.hs +++ b/testsuite/tests/W3C/Manifest.hs @@ -7,6 +7,7 @@ module W3C.Manifest ( TestEntry(..) ) where +import Data.Semigroup ((<>)) import Data.RDF.Graph.TList import Data.RDF.Query import Data.RDF.Types @@ -125,18 +126,18 @@ mfUnrecognizedDatatypes = unode "http://www.w3.org/2001/sw/DataAccess/tests/test -- | Load the manifest from the given file; -- apply the given namespace as the base IRI of the manifest. loadManifest :: T.Text -> T.Text -> IO Manifest -loadManifest manifestPath baseIRI = do - parseFile testParser (T.unpack manifestPath) >>= return . rdfToManifest . fromEither +loadManifest manifestPath baseIRI = + (rdfToManifest . fromEither) <$> parseFile testParser (T.unpack manifestPath) where testParser = TurtleParser (Just $ BaseUrl baseIRI) Nothing rdfToManifest :: RDF TList -> Manifest rdfToManifest rdf = Manifest desc tpls - where desc = lnodeText $ objectOf $ headDef (error ("query empty: subject mf:node & predicate mf:name in:\n\n" ++ show (triplesOf rdf))) descNode + where desc = lnodeText $ objectOf $ headDef (error ("query empty: subject mf:node & predicate mf:name in:\n\n" <> show (triplesOf rdf))) descNode -- FIXME: Inconsistent use of nodes for describing the manifest (W3C bug) descNode = query rdf (Just manifestNode) (Just rdfsLabel) Nothing - ++ query rdf (Just manifestNode) (Just mfName) Nothing + <> query rdf (Just manifestNode) (Just mfName) Nothing -- descNode = query rdf (Just manifestNode) (Just mfName) Nothing - tpls = map (rdfToTestEntry rdf) $ rdfCollectionToList rdf collectionHead + tpls = (rdfToTestEntry rdf) <$> rdfCollectionToList rdf collectionHead collectionHead = objectOf $ headDef (error "query: mf:node & mf:entries") $ query rdf (Just manifestNode) (Just mfEntries) Nothing manifestNode = headDef (error "manifestSubjectNodes yielding empty list") $ manifestSubjectNodes rdf @@ -156,7 +157,7 @@ triplesToTestEntry rdf ts = (UNode "http://www.w3.org/ns/rdftest#TestXMLNegativeSyntax") -> mkTestXMLNegativeSyntax ts (UNode "http://www.w3.org/ns/rdftest#TestNTriplesPositiveSyntax") -> mkTestNTriplesPositiveSyntax ts (UNode "http://www.w3.org/ns/rdftest#TestNTriplesNegativeSyntax") -> mkTestNTriplesNegativeSyntax ts - n -> error ("Unknown test case: " ++ show n) + n -> error ("Unknown test case: " <> show n) mkTestTurtleEval :: Triples -> TestEntry mkTestTurtleEval ts = TestTurtleEval { @@ -273,7 +274,7 @@ manifestSubjectNodes :: RDF TList -> [Subject] manifestSubjectNodes rdf = subjectNodes rdf [mfManifest] subjectNodes :: RDF TList -> [Object] -> [Subject] -subjectNodes rdf = (map subjectOf) . concatMap queryType +subjectNodes rdf = (fmap subjectOf) . concatMap queryType where queryType n = query rdf Nothing (Just rdfType) (Just n) -- | Text of the literal node. diff --git a/testsuite/tests/W3C/NTripleTest.hs b/testsuite/tests/W3C/NTripleTest.hs index e4ed915..3126545 100644 --- a/testsuite/tests/W3C/NTripleTest.hs +++ b/testsuite/tests/W3C/NTripleTest.hs @@ -3,6 +3,7 @@ module W3C.NTripleTest , testsAttoparsec ) where +import Data.Semigroup ((<>)) import Data.Maybe (fromJust) import Test.Tasty import qualified Test.Tasty.HUnit as TU @@ -36,11 +37,10 @@ mfEntryToTest testParser (TestNTriplesNegativeSyntax nm _ _ act') = let act = (UNode . fromJust . fileSchemeToFilePath) act' rdf = parseFile testParser (nodeURI act) :: IO (Either ParseFailure (RDF TList)) in TU.testCase (T.unpack nm) $ assertIsNotParsed rdf -mfEntryToTest _ x = error $ "unknown TestEntry pattern in mfEntryToTest: " ++ show x +mfEntryToTest _ x = error $ "unknown TestEntry pattern in mfEntryToTest: " <> show x testParserParsec :: NTriplesParserCustom testParserParsec = NTriplesParserCustom Parsec testParserAttoparsec :: NTriplesParserCustom testParserAttoparsec = NTriplesParserCustom Attoparsec - diff --git a/testsuite/tests/W3C/RdfXmlTest.hs b/testsuite/tests/W3C/RdfXmlTest.hs index 6da83dd..0bce804 100644 --- a/testsuite/tests/W3C/RdfXmlTest.hs +++ b/testsuite/tests/W3C/RdfXmlTest.hs @@ -2,8 +2,10 @@ module W3C.RdfXmlTest ( tests + , mfBaseURIXml ) where +import Data.Semigroup ((<>)) import Data.Maybe (fromJust) import Test.Tasty import qualified Test.Tasty.HUnit as TU @@ -18,29 +20,34 @@ import Text.RDF.RDF4H.XmlParser import Text.RDF.RDF4H.NTriplesParser import Data.RDF.Graph.TList -tests :: Manifest -> TestTree -tests = runManifestTests mfEntryToTest +tests :: String -> Manifest -> TestTree +tests = runManifestTests . mfEntryToTest -- Functions to map manifest test entries to unit tests. -- They are defined here to avoid cluttering W3C.Manifest -- with functions that may not be needed to those who -- just want to parse Manifest files. -- TODO: They should probably be moved to W3C.Manifest after all. -mfEntryToTest :: TestEntry -> TestTree -mfEntryToTest (TestXMLEval nm _ _ act' res') = - let act = (UNode . fromJust . fileSchemeToFilePath) act' - res = (UNode . fromJust . fileSchemeToFilePath) res' - parsedRDF = parseFile testParser (nodeURI act) >>= return . fromEither :: IO (RDF TList) - expectedRDF = parseFile NTriplesParser (nodeURI res) >>= return . fromEither :: IO (RDF TList) +mfEntryToTest :: String -> TestEntry -> TestTree +mfEntryToTest dir (TestXMLEval nm _ _ act res) = + let pathExpected = getFilePath dir res + pathAction = getFilePath dir act + parsedRDF = (fromEither <$> parseFile (testParser (nodeURI act)) pathAction) :: IO (RDF TList) + expectedRDF = (fromEither <$> parseFile NTriplesParser pathExpected) :: IO (RDF TList) in TU.testCase (T.unpack nm) $ assertIsIsomorphic parsedRDF expectedRDF -mfEntryToTest (TestXMLNegativeSyntax nm _ _ act') = - let act = (UNode . fromJust . fileSchemeToFilePath) act' - rdf = parseFile testParser (nodeURI act) :: IO (Either ParseFailure (RDF TList)) +mfEntryToTest dir (TestXMLNegativeSyntax nm _ _ act) = + let pathAction = getFilePath dir act + rdf = parseFile (testParser (nodeURI act)) pathAction :: IO (Either ParseFailure (RDF TList)) in TU.testCase (T.unpack nm) $ assertIsNotParsed rdf -mfEntryToTest x = error $ "unknown TestEntry pattern in mfEntryToTest: " ++ show x +mfEntryToTest _ x = error $ "unknown TestEntry pattern in mfEntryToTest: " <> show x + +getFilePath :: String -> Node -> String +getFilePath dir (UNode iri) = fixFilePath' iri + where fixFilePath' = (dir <>) . T.unpack . fromJust . T.stripPrefix (unBaseUrl mfBaseURIXml) +getFilePath _ _ = error "Unexpected node" mfBaseURIXml :: BaseUrl mfBaseURIXml = BaseUrl "http://www.w3.org/2013/RDFXMLTests/" -testParser :: XmlParser -testParser = XmlParser (Just mfBaseURIXml) Nothing +testParser :: String -> XmlParser +testParser dUri = XmlParser (Just mfBaseURIXml) (Just . T.pack $ dUri) diff --git a/testsuite/tests/W3C/TurtleTest.hs b/testsuite/tests/W3C/TurtleTest.hs index 8e30ede..68c19c2 100644 --- a/testsuite/tests/W3C/TurtleTest.hs +++ b/testsuite/tests/W3C/TurtleTest.hs @@ -3,11 +3,13 @@ module W3C.TurtleTest ( testsParsec , testsAttoparsec + , mfBaseURITurtle ) where import Test.Tasty import qualified Test.Tasty.HUnit as TU +import Data.Semigroup ((<>)) import Data.Maybe (fromJust) import qualified Data.Text as T @@ -21,41 +23,47 @@ import Text.RDF.RDF4H.NTriplesParser import Text.RDF.RDF4H.ParserUtils import Data.RDF.Graph.TList -testsParsec :: Manifest -> TestTree -testsParsec = runManifestTests (mfEntryToTest testParserParsec) +testsParsec :: String -> Manifest -> TestTree +testsParsec = runManifestTests . (`mfEntryToTest` testParserParsec) -testsAttoparsec :: Manifest -> TestTree -testsAttoparsec = runManifestTests (mfEntryToTest testParserAttoparsec) +testsAttoparsec :: String -> Manifest -> TestTree +testsAttoparsec = runManifestTests . (`mfEntryToTest` testParserAttoparsec) -mfEntryToTest :: TurtleParserCustom -> TestEntry -> TestTree -mfEntryToTest parser (TestTurtleEval nm _ _ act' res') = - let act = (UNode . fromJust . fileSchemeToFilePath) act' - res = (UNode . fromJust . fileSchemeToFilePath) res' - parsedRDF = parseFile parser (nodeURI act) >>= return . fromEither :: IO (RDF TList) - expectedRDF = parseFile NTriplesParser (nodeURI res) >>= return . fromEither :: IO (RDF TList) +mfEntryToTest :: String -> (String -> TurtleParserCustom) -> TestEntry -> TestTree +mfEntryToTest dir parser (TestTurtleEval nm _ _ act res) = + let pathExpected = getFilePath dir res + pathAction = getFilePath dir act + parsedRDF = (fromEither <$> parseFile (parser (nodeURI act)) pathAction) :: IO (RDF TList) + expectedRDF = (fromEither <$> parseFile NTriplesParser pathExpected) :: IO (RDF TList) in TU.testCase (T.unpack nm) $ assertIsIsomorphic parsedRDF expectedRDF -mfEntryToTest parser (TestTurtleNegativeEval nm _ _ act') = - let act = (UNode . fromJust . fileSchemeToFilePath) act' - rdf = parseFile parser (nodeURI act) :: IO (Either ParseFailure (RDF TList)) +mfEntryToTest dir parser (TestTurtleNegativeEval nm _ _ act) = + let pathAction = getFilePath dir act + rdf = parseFile (parser (nodeURI act)) pathAction :: IO (Either ParseFailure (RDF TList)) in TU.testCase (T.unpack nm) $ assertIsNotParsed rdf -mfEntryToTest parser (TestTurtlePositiveSyntax nm _ _ act') = - let act = (UNode . fromJust . fileSchemeToFilePath) act' - rdf = parseFile parser (nodeURI act) :: IO (Either ParseFailure (RDF TList)) +mfEntryToTest dir parser (TestTurtlePositiveSyntax nm _ _ act) = + let pathAction = getFilePath dir act + rdf = parseFile (parser (nodeURI act)) pathAction :: IO (Either ParseFailure (RDF TList)) in TU.testCase (T.unpack nm) $ assertIsParsed rdf -mfEntryToTest parser (TestTurtleNegativeSyntax nm _ _ act') = - let act = (UNode . fromJust . fileSchemeToFilePath) act' - rdf = parseFile parser (nodeURI act) :: IO (Either ParseFailure (RDF TList)) +mfEntryToTest dir parser (TestTurtleNegativeSyntax nm _ _ act) = + let pathAction = getFilePath dir act + rdf = parseFile (parser (nodeURI act)) pathAction :: IO (Either ParseFailure (RDF TList)) in TU.testCase (T.unpack nm) $ assertIsNotParsed rdf -mfEntryToTest _ x = error $ "unknown TestEntry pattern in mfEntryToTest: " ++ show x +mfEntryToTest _ _ x = error $ "unknown TestEntry pattern in mfEntryToTest: " <> show x +-- [NOTE] Was previously: http://www.w3.org/2013/TurtleTests/ mfBaseURITurtle :: BaseUrl -mfBaseURITurtle = BaseUrl "http://www.w3.org/2013/TurtleTests/" +mfBaseURITurtle = BaseUrl "http://w3c.github.io/rdf-tests/turtle/" -- testParser :: TurtleParser -- testParser = TurtleParser (Just mfBaseURITurtle) Nothing -testParserParsec :: TurtleParserCustom -testParserParsec = TurtleParserCustom (Just mfBaseURITurtle) Nothing Parsec +testParserParsec :: String -> TurtleParserCustom +testParserParsec dUrl = TurtleParserCustom (Just mfBaseURITurtle) (Just . T.pack $ dUrl) Parsec -testParserAttoparsec :: TurtleParserCustom -testParserAttoparsec = TurtleParserCustom (Just mfBaseURITurtle) Nothing Attoparsec +testParserAttoparsec :: String -> TurtleParserCustom +testParserAttoparsec dUrl = TurtleParserCustom (Just mfBaseURITurtle) (Just . T.pack $ dUrl) Attoparsec + +getFilePath :: String -> Node -> String +getFilePath dir (UNode iri) = fixFilePath' iri + where fixFilePath' = (dir <>) . T.unpack . fromJust . T.stripPrefix (unBaseUrl mfBaseURITurtle) +getFilePath _ _ = error "Unexpected node" diff --git a/testsuite/tests/W3C/W3CAssertions.hs b/testsuite/tests/W3C/W3CAssertions.hs index 8cf6a88..2ee45c9 100644 --- a/testsuite/tests/W3C/W3CAssertions.hs +++ b/testsuite/tests/W3C/W3CAssertions.hs @@ -6,6 +6,7 @@ module W3C.W3CAssertions , nodeURI ) where +import Data.Semigroup ((<>)) import qualified Data.Text as T import Data.RDF import qualified Test.HUnit as TU @@ -14,13 +15,13 @@ import W3C.Manifest runManifestTests :: (TestEntry -> TestTree) -> Manifest -> TestTree runManifestTests mfEntryToTest manifest = - testGroup (T.unpack $ description manifest) $ map mfEntryToTest $ entries manifest + testGroup (T.unpack $ description manifest) $ mfEntryToTest <$> entries manifest assertIsIsomorphic :: IO (RDF TList) -> IO (RDF TList) -> IO () assertIsIsomorphic r1 r2 = do gr1 <- r1 gr2 <- r2 - TU.assertBool ("not isomorphic: " ++ show gr1 ++ " compared with " ++ show gr2) (isSame gr1 gr2) -- (isGraphIsomorphic gr1 gr2) + TU.assertBool ("not isomorphic: " <> show gr1 <> " compared with " <> show gr2) (isSame gr1 gr2) -- (isGraphIsomorphic gr1 gr2) where noBlankNodes g = (all noBlanks . expandTriples) g noBlanks (Triple s p o) = not (blankNode s) @@ -41,12 +42,12 @@ assertIsIsomorphic r1 r2 = do assertIsParsed :: IO (Either ParseFailure (RDF TList)) -> TU.Assertion assertIsParsed r1 = do gr1 <- r1 - TU.assertBool ("unable to parse, reason:\n" ++ show gr1) (isParsed gr1) + TU.assertBool ("unable to parse, reason:\n" <> show gr1) (isParsed gr1) assertIsNotParsed :: IO (Either ParseFailure (RDF TList)) -> TU.Assertion assertIsNotParsed r1 = do gr1 <- r1 - TU.assertBool ("parsed unexpectantly:\n" ++ show gr1) (not (isParsed gr1)) + TU.assertBool ("parsed unexpectantly:\n" <> show gr1) (not (isParsed gr1)) isParsed :: Either a b -> Bool isParsed (Left _) = False @@ -54,4 +55,4 @@ isParsed (Right _) = True nodeURI :: Node -> String nodeURI (UNode u) = T.unpack u -nodeURI node = error $ "W3CAssertions: unexpected node in `nodeURI`: " ++ show node +nodeURI node = error $ "W3CAssertions: unexpected node in `nodeURI`: " <> show node