diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 0000000..f6c0b22 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1 @@ +@byteverse/l3c diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml new file mode 100644 index 0000000..967dfd3 --- /dev/null +++ b/.github/workflows/build.yaml @@ -0,0 +1,11 @@ +name: build +on: + pull_request: + branches: + - "*" + +jobs: + call-workflow: + uses: byteverse/.github/.github/workflows/build-matrix.yaml@main + with: + cabal-file: syslog.cabal diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml new file mode 100644 index 0000000..9411962 --- /dev/null +++ b/.github/workflows/release.yaml @@ -0,0 +1,10 @@ +name: release +on: + push: + tags: + - "*" + +jobs: + call-workflow: + uses: byteverse/.github/.github/workflows/release.yaml@main + secrets: inherit diff --git a/.gitignore b/.gitignore index 18f70b9..e0614e0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +.vscode/ dist dist-* cabal-dev diff --git a/CHANGELOG.md b/CHANGELOG.md index 251f625..9898168 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,5 @@ # Revision history for syslog -## 0.1.0.0 -- YYYY-mm-dd +## 0.1.0.0 -- 2024-02-14 -* First version. Released on an unsuspecting world. +* First version. diff --git a/Setup.hs b/Setup.hs deleted file mode 100644 index 9a994af..0000000 --- a/Setup.hs +++ /dev/null @@ -1,2 +0,0 @@ -import Distribution.Simple -main = defaultMain diff --git a/examples/validate.hs b/examples/validate.hs index 5c30182..6cb5d3a 100644 --- a/examples/validate.hs +++ b/examples/validate.hs @@ -1,34 +1,35 @@ -{-# language BangPatterns #-} -{-# language LambdaCase #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE LambdaCase #-} -import Data.Primitive (ByteArray) -import Data.ByteString (ByteString) -import Data.Bool (bool) import Control.Exception -import System.IO.Error (isEOFError) +import Data.Bool (bool) +import Data.ByteString (ByteString) +import Data.Primitive (ByteArray) import Foreign.C.Types (CChar) +import System.IO.Error (isEOFError) -import qualified Data.Primitive as PM -import qualified Data.Primitive.Ptr as PM import qualified Data.ByteString as ByteString import qualified Data.Bytes as Bytes +import qualified Data.Primitive as PM +import qualified Data.Primitive.Ptr as PM import qualified Syslog.Bsd as Bsd main :: IO () main = do - let go !ix = catchJust (bool Nothing (Just ()) . isEOFError) (Just <$> ByteString.getLine) (\() -> pure Nothing) >>= \case - Nothing -> pure () - Just b0 -> do - b1 <- b2b b0 - case Bsd.decode (Bytes.fromByteArray b1) of - Nothing -> fail $ "Decode failure on line " ++ show ix - Just r -> do - print r - go (ix + 1) + let go !ix = + catchJust (bool Nothing (Just ()) . isEOFError) (Just <$> ByteString.getLine) (\() -> pure Nothing) >>= \case + Nothing -> pure () + Just b0 -> do + b1 <- b2b b0 + case Bsd.decode (Bytes.fromByteArray b1) of + Nothing -> fail $ "Decode failure on line " ++ show ix + Just r -> do + print r + go (ix + 1) go 0 b2b :: ByteString -> IO ByteArray -b2b !b = ByteString.useAsCStringLen b $ \(ptr,len) -> do +b2b !b = ByteString.useAsCStringLen b $ \(ptr, len) -> do arr <- PM.newByteArray len PM.copyPtrToMutablePrimArray (castArray arr) 0 ptr len PM.unsafeFreezeByteArray arr diff --git a/fourmolu.yaml b/fourmolu.yaml new file mode 100644 index 0000000..40cd005 --- /dev/null +++ b/fourmolu.yaml @@ -0,0 +1,51 @@ +# Number of spaces per indentation step +indentation: 2 + +# Max line length for automatic line breaking +column-limit: 200 + +# Styling of arrows in type signatures (choices: trailing, leading, or leading-args) +function-arrows: trailing + +# How to place commas in multi-line lists, records, etc. (choices: leading or trailing) +comma-style: leading + +# Styling of import/export lists (choices: leading, trailing, or diff-friendly) +import-export-style: leading + +# Whether to full-indent or half-indent 'where' bindings past the preceding body +indent-wheres: false + +# Whether to leave a space before an opening record brace +record-brace-space: true + +# Number of spaces between top-level declarations +newlines-between-decls: 1 + +# How to print Haddock comments (choices: single-line, multi-line, or multi-line-compact) +haddock-style: multi-line + +# How to print module docstring +haddock-style-module: null + +# Styling of let blocks (choices: auto, inline, newline, or mixed) +let-style: auto + +# How to align the 'in' keyword with respect to the 'let' keyword (choices: left-align, right-align, or no-space) +in-style: right-align + +# Whether to put parentheses around a single constraint (choices: auto, always, or never) +single-constraint-parens: always + +# Output Unicode syntax (choices: detect, always, or never) +unicode: never + +# Give the programmer more choice on where to insert blank lines +respectful: true + +# Fixity information for operators +fixities: [] + +# Module reexports Fourmolu should know about +reexports: [] + diff --git a/src/Syslog/Bsd.hs b/src/Syslog/Bsd.hs index 3090b4e..4edac75 100644 --- a/src/Syslog/Bsd.hs +++ b/src/Syslog/Bsd.hs @@ -1,42 +1,45 @@ -{-# language BangPatterns #-} -{-# language DuplicateRecordFields #-} -{-# language LambdaCase #-} -{-# language NamedFieldPuns #-} -{-# language TypeApplications #-} -{-# language UnboxedTuples #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE DuplicateRecordFields #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE UnboxedTuples #-} --- | Parse RFC 3164 messages. For example: --- --- > <133>Feb 25 14:09:07 webserver syslogd: restart --- > <0>Oct 22 10:52:01 scapegoat.dmz.example.org sched[0]: That's All Folks! --- --- This library assumes that the @TAG@ field described by section 5.3 of --- RFC 3164 is a process name. It also assumes that the optional bracketed --- number that follows it is a process id. This library also addresses three --- common extensions to RFC 3164: --- --- * Some vendors include a year after the timestamp. For example: --- @<14>Oct 15 11:14:59 2019 example.com ...@. When present, the year --- is parsed and provided to the user. --- * Some vendors include a priority that preceeds the process name. For --- example: @<133>Aug 10 09:05:14 my-host notice tmsh[4726]: ...@. The --- Linux man page for @syslog.conf@ lists these options for priority: --- @debug@, @info@, @notice@, @warning@, @warn@, @err@, @error@, @crit@, --- @alert@, @emerg@, @panic@. If a process name begins with any of these --- keywords (followed by a space), the keyword and the trailing space --- are removed from the process name, and the keyword is made available --- in the @priority@ field. --- * Cisco ASAs omit the hostname sometimes. This is totally bizarre and leads --- to messages that looks like: @<190>Jun 08 2022 14:46:28: message@. In --- this case, the hostname is set to the empty string. +{- | Parse RFC 3164 messages. For example: + +> <133>Feb 25 14:09:07 webserver syslogd: restart +> <0>Oct 22 10:52:01 scapegoat.dmz.example.org sched[0]: That's All Folks! + +This library assumes that the @TAG@ field described by section 5.3 of +RFC 3164 is a process name. It also assumes that the optional bracketed +number that follows it is a process id. This library also addresses three +common extensions to RFC 3164: + +* Some vendors include a year after the timestamp. For example: + @<14>Oct 15 11:14:59 2019 example.com ...@. When present, the year + is parsed and provided to the user. +* Some vendors include a priority that preceeds the process name. For + example: @<133>Aug 10 09:05:14 my-host notice tmsh[4726]: ...@. The + Linux man page for @syslog.conf@ lists these options for priority: + @debug@, @info@, @notice@, @warning@, @warn@, @err@, @error@, @crit@, + @alert@, @emerg@, @panic@. If a process name begins with any of these + keywords (followed by a space), the keyword and the trailing space + are removed from the process name, and the keyword is made available + in the @priority@ field. +* Cisco ASAs omit the hostname sometimes. This is totally bizarre and leads + to messages that looks like: @<190>Jun 08 2022 14:46:28: message@. In + this case, the hostname is set to the empty string. +-} module Syslog.Bsd ( -- * Types - Message(..) - , Process(..) - , Timestamp(..) + Message (..) + , Process (..) + , Timestamp (..) + -- * Full Decode , decode , parser + -- * Parsing Fragments , takePriority , takeTimestamp @@ -47,17 +50,17 @@ module Syslog.Bsd import Prelude hiding (id) import Control.Monad (when) -import Data.Bytes.Types (Bytes(Bytes)) import Data.Bytes.Parser (Parser) -import Data.Word (Word8,Word32) +import Data.Bytes.Types (Bytes (Bytes)) +import Data.Word (Word32, Word8) import qualified Chronos import qualified Data.Bytes as Bytes -import qualified Data.Maybe.Unpacked.Numeric.Word32 as Word32 import qualified Data.Bytes.Parser as Parser import qualified Data.Bytes.Parser.Latin as Latin import qualified Data.Bytes.Parser.Unsafe as Unsafe import qualified Data.Bytes.Text.Latin1 as Latin1 +import qualified Data.Maybe.Unpacked.Numeric.Word32 as Word32 data Message = Message { priority :: !Word32 @@ -65,7 +68,8 @@ data Message = Message , hostname :: {-# UNPACK #-} !Bytes , process :: !(Maybe Process) , message :: {-# UNPACK #-} !Bytes - } deriving (Show) + } + deriving (Show) data Timestamp = Timestamp { month :: !Chronos.Month @@ -74,37 +78,40 @@ data Timestamp = Timestamp , minute :: !Word8 , second :: !Word8 , year :: {-# UNPACK #-} !Word32.Maybe - -- ^ Section 5.1 of RFC 3164 notes that some software appends - -- a four-character year after the time of day. Since hostnames - -- cannot start with digits, we can parse this unambiguously. We - -- extend RFC 3164 to handle these nonstandard years. - } deriving (Show) + -- ^ Section 5.1 of RFC 3164 notes that some software appends + -- a four-character year after the time of day. Since hostnames + -- cannot start with digits, we can parse this unambiguously. We + -- extend RFC 3164 to handle these nonstandard years. + } + deriving (Show) data Process = Process { priority :: {-# UNPACK #-} !Bytes - -- ^ Priority is nonstandard. This field is the empty byte sequence - -- when the priority is not present. + -- ^ Priority is nonstandard. This field is the empty byte sequence + -- when the priority is not present. , name :: {-# UNPACK #-} !Bytes , id :: {-# UNPACK #-} !Word32.Maybe - } deriving (Show) + } + deriving (Show) -- | Run the RFC 3164 parser. See 'parser'. decode :: Bytes -> Maybe Message decode = Parser.parseBytesMaybe parser --- | Parse a RFC 3164 message. Note that this is just @takePriority@, --- @takeTimestamp@, @takeHostname, and @takeProcess@ called in sequence, --- followed by skipping whitespace and then treating the remaining input --- as the original message. +{- | Parse a RFC 3164 message. Note that this is just @takePriority@, +@takeTimestamp@, @takeHostname, and @takeProcess@ called in sequence, +followed by skipping whitespace and then treating the remaining input +as the original message. +-} parser :: Parser () s Message parser = do priority <- takePriority () timestamp <- takeTimestamp () - Latin.trySatisfy (==':') >>= \case + Latin.trySatisfy (== ':') >>= \case True -> do Latin.skipChar ' ' message <- Parser.remaining - pure Message{priority,timestamp,hostname=Bytes.empty,process=Nothing,message} + pure Message {priority, timestamp, hostname = Bytes.empty, process = Nothing, message} False -> do hostname <- takeHostname () -- Watchguard includes a serial number and an ISO8601-encoded datetime @@ -113,37 +120,43 @@ parser = do -- an open parenthesis to appear in this position. So, by doing -- this, we do not reject any good logs. Latin.peek' () >>= \case - c | c >= 'A' && c <= 'Z' -> Parser.orElse - ( do Latin.skipWhile (\x -> (x >= 'A' && x <= 'Z') || (x >= '0' && x <= '9')) - Latin.char () ' ' - Latin.char () '(' - Latin.skipDigits1 () - Latin.char () '-' - Latin.skipDigits1 () - Latin.char () '-' - Latin.skipDigits1 () - Latin.char () 'T' - Latin.skipDigits1 () - Latin.char () ':' - Latin.skipDigits1 () - Latin.char () ':' - Latin.skipDigits1 () - Latin.char () ')' - Latin.char () ' ' - ) (pure ()) + c + | c >= 'A' && c <= 'Z' -> + Parser.orElse + ( do + Latin.skipWhile (\x -> (x >= 'A' && x <= 'Z') || (x >= '0' && x <= '9')) + Latin.char () ' ' + Latin.char () '(' + Latin.skipDigits1 () + Latin.char () '-' + Latin.skipDigits1 () + Latin.char () '-' + Latin.skipDigits1 () + Latin.char () 'T' + Latin.skipDigits1 () + Latin.char () ':' + Latin.skipDigits1 () + Latin.char () ':' + Latin.skipDigits1 () + Latin.char () ')' + Latin.char () ' ' + ) + (pure ()) _ -> pure () - process <- Latin.trySatisfy (==':') >>= \case - True -> pure Nothing - False -> do - p <- takeProcess () - pure (Just p) + process <- + Latin.trySatisfy (== ':') >>= \case + True -> pure Nothing + False -> do + p <- takeProcess () + pure (Just p) Latin.skipChar ' ' message <- Parser.remaining - pure Message{priority,timestamp,hostname,process,message} + pure Message {priority, timestamp, hostname, process, message} --- | Consume the angle-bracketed priority. RFC 3164 does not allow --- a space to follow the priority, so this does not consume a --- trailing space. +{- | Consume the angle-bracketed priority. RFC 3164 does not allow +a space to follow the priority, so this does not consume a +trailing space. +-} takePriority :: e -> Parser e s Word32 takePriority e = do Latin.char e '<' @@ -151,17 +164,19 @@ takePriority e = do Latin.char e '>' pure priority --- | Consume the hostname and the space that follows it. Returns --- the hostname. +{- | Consume the hostname and the space that follows it. Returns +the hostname. +-} takeHostname :: e -> Parser e s Bytes takeHostname e = -- TODO: This should actually use a takeWhile1. Latin.takeTrailedBy e ' ' --- | Consume the timestamp and the trailing space character if a trailing --- space exists. Returns the parsed timestamp. This allows two extensions --- to the RFC 3164 datetime format. The year may be present either right --- after the day of the month or after the time of day. +{- | Consume the timestamp and the trailing space character if a trailing +space exists. Returns the parsed timestamp. This allows two extensions +to the RFC 3164 datetime format. The year may be present either right +after the day of the month or after the time of day. +-} takeTimestamp :: e -> Parser e s Timestamp takeTimestamp e = do monthBytes <- Parser.take e 3 @@ -172,9 +187,10 @@ takeTimestamp e = do -- padded with a space. Latin.skipChar1 e ' ' dayRaw <- Latin.decWord8 e - day <- if dayRaw < 32 - then pure (Chronos.DayOfMonth (fromIntegral dayRaw)) - else Parser.fail e + day <- + if dayRaw < 32 + then pure (Chronos.DayOfMonth (fromIntegral dayRaw)) + else Parser.fail e Latin.char e ' ' hourOrYear <- Latin.decWord32 e Latin.any e >>= \case @@ -191,7 +207,7 @@ takeTimestamp e = do second <- Latin.decWord8 e when (second > 59) (Parser.fail e) _ <- Latin.trySatisfy (== ' ') - pure Timestamp{month,day,hour,minute,second,year=Word32.just hourOrYear} + pure Timestamp {month, day, hour, minute, second, year = Word32.just hourOrYear} ':' -> do when (hourOrYear > 23) (Parser.fail e) let hour = fromIntegral @Word32 @Word8 hourOrYear @@ -201,21 +217,23 @@ takeTimestamp e = do second <- Latin.decWord8 e when (second > 59) (Parser.fail e) Latin.trySatisfy (== ' ') >>= \case - False -> pure Timestamp{month,day,hour,minute,second,year=Word32.nothing} + False -> pure Timestamp {month, day, hour, minute, second, year = Word32.nothing} True -> do -- The only good way to allow a year is with backtracking. We do not -- learn until we encounter the space following the decimal number -- whether it was a year or part of a hostname (likely an ip address). Parser.orElse - ( do y <- Latin.decWord32 e - Latin.char e ' ' - pure Timestamp{month,day,hour,minute,second,year=Word32.just y} + ( do + y <- Latin.decWord32 e + Latin.char e ' ' + pure Timestamp {month, day, hour, minute, second, year = Word32.just y} ) - (pure Timestamp{month,day,hour,minute,second,year=Word32.nothing}) + (pure Timestamp {month, day, hour, minute, second, year = Word32.nothing}) _ -> Parser.fail e --- | Take the process name and the process id and consume the colon --- that follows them. Does not consume any space after the colon. +{- | Take the process name and the process id and consume the colon +that follows them. Does not consume any space after the colon. +-} takeProcess :: e -> Parser e s Process takeProcess e = do processStart <- Unsafe.cursor @@ -224,7 +242,7 @@ takeProcess e = do arr <- Unsafe.expose let name0 = Bytes arr processStart ((processEndSucc - 1) - processStart) !(# name, priority #) = case Bytes.split1 0x20 name0 of - Just (pre,post) + Just (pre, post) | Latin1.equals3 'e' 'r' 'r' pre -> (# post, pre #) | Latin1.equals4 'c' 'r' 'i' 't' pre -> (# post, pre #) | Latin1.equals4 'i' 'n' 'f' 'o' pre -> (# post, pre #) @@ -238,11 +256,11 @@ takeProcess e = do | Latin1.equals7 'w' 'a' 'r' 'n' 'i' 'n' 'g' pre -> (# post, pre #) _ -> (# name0, Bytes arr 0 0 #) case hasPid of - False -> pure Process{priority,name,id=Word32.nothing} + False -> pure Process {priority, name, id = Word32.nothing} True -> do pid <- Latin.decWord32 e Latin.char2 e ']' ':' - pure Process{priority,name,id=Word32.just pid} + pure Process {priority, name, id = Word32.just pid} -- Precondition: length of bytes is 3 resolveMonth :: Bytes -> Chronos.Month diff --git a/src/Syslog/Ietf.hs b/src/Syslog/Ietf.hs index b47c079..7df3175 100644 --- a/src/Syslog/Ietf.hs +++ b/src/Syslog/Ietf.hs @@ -1,21 +1,23 @@ -{-# language BangPatterns #-} -{-# language DuplicateRecordFields #-} -{-# language LambdaCase #-} -{-# language NamedFieldPuns #-} -{-# language ScopedTypeVariables #-} -{-# language TypeApplications #-} -{-# language UnboxedTuples #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE DuplicateRecordFields #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE UnboxedTuples #-} --- | Parse RFC 5424 messages. For example (from the spec itself): --- --- > <165>1 2003-10-11T22:14:15.003Z mymachine.example.com --- > evntslog - ID47 [exampleSDID@32473 iut="3" eventSource="Application" --- > eventID="1011"] BOMAn application event log entry... +{- | Parse RFC 5424 messages. For example (from the spec itself): + +> <165>1 2003-10-11T22:14:15.003Z mymachine.example.com +> evntslog - ID47 [exampleSDID@32473 iut="3" eventSource="Application" +> eventID="1011"] BOMAn application event log entry... +-} module Syslog.Ietf ( -- * Types - Message(..) - , Element(..) - , Parameter(..) + Message (..) + , Element (..) + , Parameter (..) + -- * Full Decode , decode , parser @@ -25,20 +27,20 @@ import Prelude hiding (id) import Control.Monad (when) import Control.Monad.ST.Run (runIntByteArrayST) -import Data.Bytes.Types (Bytes(Bytes)) import Data.Bytes.Parser (Parser) -import Data.Word (Word8,Word32,Word64) +import Data.Bytes.Types (Bytes (Bytes)) import Data.Int (Int64) import Data.Primitive (SmallArray) +import Data.Word (Word32, Word64, Word8) import qualified Chronos -import qualified Data.Primitive as PM -import qualified Data.Primitive.Contiguous as C -import qualified Data.Maybe.Unpacked.Numeric.Word32 as Word32 import qualified Data.Bytes.Parser as Parser import qualified Data.Bytes.Parser.Latin as Latin import qualified Data.Bytes.Parser.Unsafe as Unsafe import qualified Data.Bytes.Types +import qualified Data.Maybe.Unpacked.Numeric.Word32 as Word32 +import qualified Data.Primitive as PM +import qualified Data.Primitive.Contiguous as C data Message = Message { priority :: !Word32 @@ -48,21 +50,24 @@ data Message = Message , application :: {-# UNPACK #-} !Bytes , processId :: {-# UNPACK #-} !Word32.Maybe , messageType :: {-# UNPACK #-} !Bytes - -- ^ A missing message type, represented as a hyphen in IETF-flavor - -- syslog, is represented by the empty byte sequence. + -- ^ A missing message type, represented as a hyphen in IETF-flavor + -- syslog, is represented by the empty byte sequence. , structuredData :: {-# UNPACK #-} !(SmallArray Element) , message :: {-# UNPACK #-} !Bytes - } deriving (Show) + } + deriving (Show) data Element = Element { id :: {-# UNPACK #-} !Bytes , parameters :: {-# UNPACK #-} !(SmallArray Parameter) - } deriving (Show) + } + deriving (Show) data Parameter = Parameter { name :: {-# UNPACK #-} !Bytes , value :: {-# UNPACK #-} !Bytes - } deriving (Show) + } + deriving (Show) -- | Run the RFC 5424 parser. See 'parser'. decode :: Bytes -> Maybe Message @@ -78,67 +83,86 @@ parser = do Latin.char () ' ' hostname <- takeKeywordAndSpace () application <- takeKeywordAndSpace () - processId <- Latin.trySatisfy (=='-') >>= \case - True -> do - Latin.char () ' ' - pure Word32.nothing - False -> Parser.orElse - -- This is a hack to smooth over a mistake. The process id - -- can actually be things other than a decimal-encoded number. - -- Sometimes it is 128-bit or 256-bit hexadecimal number. In - -- these cases, we just ignore it. - ( do w <- Latin.decWord32 () - Latin.char () ' ' - pure (Word32.just w) - ) - ( do Latin.skipWhile - (\c -> (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') || (c >= '0' && c <= '9')) - Latin.char () ' ' - pure Word32.nothing - ) - messageType <- Latin.trySatisfy (=='-') >>= \case - True -> do - Latin.char () ' ' - array <- Unsafe.expose - pure Bytes{array,offset=0,length=0} - False -> takeKeywordAndSpace () - structuredData <- Latin.trySatisfy (=='-') >>= \case - True -> pure mempty - False -> takeStructuredData - message <- Parser.isEndOfInput >>= \case - True -> do - arr <- Unsafe.expose - pure Bytes{array=arr,offset=0,length=0} - False -> do - Latin.char () ' ' - Parser.remaining - pure Message - {priority,version,timestamp,hostname,application - ,processId,messageType,structuredData,message - } + processId <- + Latin.trySatisfy (== '-') >>= \case + True -> do + Latin.char () ' ' + pure Word32.nothing + False -> + Parser.orElse + -- This is a hack to smooth over a mistake. The process id + -- can actually be things other than a decimal-encoded number. + -- Sometimes it is 128-bit or 256-bit hexadecimal number. In + -- these cases, we just ignore it. + ( do + w <- Latin.decWord32 () + Latin.char () ' ' + pure (Word32.just w) + ) + ( do + Latin.skipWhile + (\c -> (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') || (c >= '0' && c <= '9')) + Latin.char () ' ' + pure Word32.nothing + ) + messageType <- + Latin.trySatisfy (== '-') >>= \case + True -> do + Latin.char () ' ' + array <- Unsafe.expose + pure Bytes {array, offset = 0, length = 0} + False -> takeKeywordAndSpace () + structuredData <- + Latin.trySatisfy (== '-') >>= \case + True -> pure mempty + False -> takeStructuredData + message <- + Parser.isEndOfInput >>= \case + True -> do + arr <- Unsafe.expose + pure Bytes {array = arr, offset = 0, length = 0} + False -> do + Latin.char () ' ' + Parser.remaining + pure + Message + { priority + , version + , timestamp + , hostname + , application + , processId + , messageType + , structuredData + , message + } takeStructuredData :: Parser () s (SmallArray Element) -takeStructuredData = go 0 [] where +takeStructuredData = go 0 [] + where go :: Int -> [Element] -> Parser () s (SmallArray Element) - go !n !acc = Latin.trySatisfy (=='[') >>= \case - True -> do - id <- takeKeyword - parameters <- takeParameters - let !e = Element{id,parameters} - go (n + 1) (e : acc) - False -> pure $! C.unsafeFromListReverseN n acc + go !n !acc = + Latin.trySatisfy (== '[') >>= \case + True -> do + id <- takeKeyword + parameters <- takeParameters + let !e = Element {id, parameters} + go (n + 1) (e : acc) + False -> pure $! C.unsafeFromListReverseN n acc takeParameters :: Parser () s (SmallArray Parameter) -takeParameters = go 0 [] where +takeParameters = go 0 [] + where go :: Int -> [Parameter] -> Parser () s (SmallArray Parameter) - go !n !acc = Latin.trySatisfy (==']') >>= \case - True -> pure $! C.unsafeFromListReverseN n acc - False -> do - Latin.char () ' ' - name <- takeKeywordAndEquals - value <- takeParameterValue - let !p = Parameter{name,value} - go (n + 1) (p : acc) + go !n !acc = + Latin.trySatisfy (== ']') >>= \case + True -> pure $! C.unsafeFromListReverseN n acc + False -> do + Latin.char () ' ' + name <- takeKeywordAndEquals + value <- takeParameterValue + let !p = Parameter {name, value} + go (n + 1) (p : acc) -- This handles escape sequences correctly. takeParameterValue :: Parser () s Bytes @@ -146,12 +170,14 @@ takeParameterValue = do Latin.char () '"' start <- Unsafe.cursor Parser.skipTrailedBy2 () 0x22 0x5C >>= \case - False -> do -- no backslashes, went all the way to a double quote + False -> do + -- no backslashes, went all the way to a double quote end <- Unsafe.cursor let !len = (end - start) - 1 arr <- Unsafe.expose - pure Bytes{array=arr,offset=start,length=len} - True -> do -- found a backslash, we will need to escape quotes + pure Bytes {array = arr, offset = start, length = len} + True -> do + -- found a backslash, we will need to escape quotes c <- Latin.any () if c == '"' || c == '\\' then pure () @@ -160,23 +186,25 @@ takeParameterValue = do end <- Unsafe.cursor let !len = (end - start) - 1 arr <- Unsafe.expose - let bs = Bytes{array=arr,offset=start,length=len} + let bs = Bytes {array = arr, offset = start, length = len} pure $! removeEscapeSequences bs consumeThroughUnescapedQuote :: Parser () s () -consumeThroughUnescapedQuote = Parser.skipTrailedBy2 () 0x22 0x5C >>= \case - False -> pure () - True -> do - c <- Latin.any () - if c == '"' || c == '\\' - then consumeThroughUnescapedQuote - else Parser.fail () +consumeThroughUnescapedQuote = + Parser.skipTrailedBy2 () 0x22 0x5C >>= \case + False -> pure () + True -> do + c <- Latin.any () + if c == '"' || c == '\\' + then consumeThroughUnescapedQuote + else Parser.fail () --- | Precondition: Every backslash is followed by a double quote or by --- a backslash. +{- | Precondition: Every backslash is followed by a double quote or by +a backslash. +-} removeEscapeSequences :: Bytes -> Bytes -removeEscapeSequences Bytes{array,offset=off0,length=len0} = - let (lengthX,arrayX) = runIntByteArrayST $ do +removeEscapeSequences Bytes {array, offset = off0, length = len0} = + let (lengthX, arrayX) = runIntByteArrayST $ do dst <- PM.newByteArray len0 let go !ixSrc !ixDst !len = case len of 0 -> pure ixDst @@ -197,12 +225,13 @@ removeEscapeSequences Bytes{array,offset=off0,length=len0} = lenDst <- go off0 0 len0 PM.shrinkMutableByteArray dst lenDst dst' <- PM.unsafeFreezeByteArray dst - pure (lenDst,dst') - in Bytes{array=arrayX,length=lengthX,offset=0} + pure (lenDst, dst') + in Bytes {array = arrayX, length = lengthX, offset = 0} --- | Consume the angle-bracketed priority. RFC 5424 does not allow --- a space to follow the priority, so this does not consume a --- trailing space. +{- | Consume the angle-bracketed priority. RFC 5424 does not allow +a space to follow the priority, so this does not consume a +trailing space. +-} takePriority :: e -> Parser e s Word32 takePriority e = do Latin.char e '<' @@ -210,8 +239,9 @@ takePriority e = do Latin.char e '>' pure priority --- | Consume the keyword and the space that follows it. Returns --- the hostname. +{- | Consume the keyword and the space that follows it. Returns +the hostname. +-} takeKeywordAndSpace :: e -> Parser e s Bytes takeKeywordAndSpace e = -- TODO: This should actually use a takeWhile1. @@ -223,8 +253,9 @@ takeKeyword = -- TODO: Should use takeWhile1 Parser.takeWhile (\c -> c /= 0x20) --- | Consume the keyword and the equals sign that follows it. Returns --- the keyword. +{- | Consume the keyword and the equals sign that follows it. Returns +the keyword. +-} takeKeywordAndEquals :: Parser () s Bytes takeKeywordAndEquals = -- TODO: This should actually use a takeWhile1. @@ -246,33 +277,39 @@ takeTimestamp = do minute <- Latin.decWord () Latin.char () ':' sec <- Latin.decWord () - let date = Chronos.Date - (Chronos.Year (fromIntegral year)) - (Chronos.Month (fromIntegral month)) - (Chronos.DayOfMonth (fromIntegral day)) - !nanos <- Latin.trySatisfy (=='.') >>= \case - True -> do - (n,w) <- Parser.measure (Latin.decWord64 ()) - when (n > 9) (Parser.fail ()) - let go !acc !b = case b of - 0 -> acc - _ -> go (acc * 10) (b - 1) - !ns = go w (9 - n) - pure ns - False -> pure 0 - off <- Latin.any () >>= \case - 'Z' -> pure 0 - '+' -> parserOffset - '-' -> do - !off <- parserOffset - pure (negate off) - _ -> Parser.fail () - pure $! Chronos.OffsetDatetime - ( Chronos.Datetime date $ Chronos.TimeOfDay - (fromIntegral hour) - (fromIntegral minute) - (fromIntegral @Word64 @Int64 (fromIntegral sec * 1000000000 + nanos)) - ) (Chronos.Offset off) + let date = + Chronos.Date + (Chronos.Year (fromIntegral year)) + (Chronos.Month (fromIntegral month)) + (Chronos.DayOfMonth (fromIntegral day)) + !nanos <- + Latin.trySatisfy (== '.') >>= \case + True -> do + (n, w) <- Parser.measure (Latin.decWord64 ()) + when (n > 9) (Parser.fail ()) + let go !acc !b = case b of + 0 -> acc + _ -> go (acc * 10) (b - 1) + !ns = go w (9 - n) + pure ns + False -> pure 0 + off <- + Latin.any () >>= \case + 'Z' -> pure 0 + '+' -> parserOffset + '-' -> do + !off <- parserOffset + pure (negate off) + _ -> Parser.fail () + pure $! + Chronos.OffsetDatetime + ( Chronos.Datetime date $ + Chronos.TimeOfDay + (fromIntegral hour) + (fromIntegral minute) + (fromIntegral @Word64 @Int64 (fromIntegral sec * 1000000000 + nanos)) + ) + (Chronos.Offset off) -- Should consume exactly five characters: HH:MM. However, the implementation -- is more generous. diff --git a/syslog.cabal b/syslog.cabal index 7430c09..be5477b 100644 --- a/syslog.cabal +++ b/syslog.cabal @@ -1,51 +1,57 @@ -cabal-version: 2.4 -name: syslog -version: 0.1.0.0 -synopsis: Decode RFC 3164 and RFC 5424 syslog message formats +cabal-version: 2.4 +name: syslog +version: 0.1.0.0 +synopsis: Decode RFC 3164 and RFC 5424 syslog message formats description: Decode syslog messages that were formatted using RFC 3164 (BSD-syslog) or RFC 5424 (IETF-syslog). -bug-reports: https://github.com/layer-3-communications/syslog -license: BSD-3-Clause -license-file: LICENSE -author: Andrew Martin -maintainer: andrew.thaddeus@gmail.com -copyright: 2020 Andrew Martin -category: Data -build-type: Simple -extra-source-files: CHANGELOG.md + +homepage: https://github.com/byteverse/syslog +bug-reports: https://github.com/byteverse/syslog/issues +license: BSD-3-Clause +license-file: LICENSE +author: Andrew Martin +maintainer: amartin@layer3com.com +copyright: 2020 Andrew Martin +category: Data +build-type: Simple +extra-doc-files: CHANGELOG.md +tested-with: GHC ==9.4.8 || ==9.6.3 || ==9.8.1 + +common build-settings + default-language: Haskell2010 + ghc-options: -Wall -Wunused-packages library + import: build-settings exposed-modules: Syslog.Bsd Syslog.Ietf + build-depends: - , base >=4.12 && <5 - , unpacked-maybe-numeric>=0.1.1 - , chronos >=1.1 - , byteslice >=0.2.6 - , bytesmith >=0.3.6 - , contiguous >=0.5.1 - , primitive >=0.7.1 - , run-st >=0.1.1 - hs-source-dirs: src - default-language: Haskell2010 - ghc-options: -O2 -Wall + , base >=4.12 && <5 + , byteslice >=0.2.6 + , bytesmith >=0.3.6 + , chronos >=1.1 + , contiguous >=0.5.1 + , primitive >=0.7.1 + , run-st >=0.1.1 + , unpacked-maybe-numeric >=0.1.1 + + hs-source-dirs: src + ghc-options: -O2 test-suite test - default-language: Haskell2010 - type: exitcode-stdio-1.0 + import: build-settings + type: exitcode-stdio-1.0 hs-source-dirs: test - main-is: Main.hs - ghc-options: -Wall -O2 + main-is: Main.hs build-depends: - , base >=4.12.0.0 && <5 - , byteslice >=0.2 - , bytestring - , bytebuild - , primitive >=0.7.1 + , base >=4.12.0.0 && <5 + , byteslice >=0.2 + , primitive >=0.7.1 , syslog - , tasty >=1.2.3 - , tasty-hunit >=0.10.0.2 - , text >=1.2 - , text-short + +source-repository head + type: git + location: git://github.com/byteverse/syslog.git diff --git a/test/Main.hs b/test/Main.hs index 4fae9d4..4c4cf87 100644 --- a/test/Main.hs +++ b/test/Main.hs @@ -1,11 +1,11 @@ -{-# language DuplicateRecordFields #-} -{-# language NamedFieldPuns #-} +{-# LANGUAGE DuplicateRecordFields #-} +{-# LANGUAGE NamedFieldPuns #-} import Prelude hiding (id) import Data.Bytes (Bytes) import Data.Maybe (isNothing) -import Syslog.Bsd (Message(Message),Process(Process)) +import Syslog.Bsd (Message (Message), Process (Process)) import qualified Data.Bytes as Bytes import qualified Data.Bytes.Text.Latin1 as Latin1 @@ -19,7 +19,7 @@ main = do putStrLn "Test A" case Bsd.decode msgA of Nothing -> fail "Could not decode message A" - Just Message{priority,hostname,process=Just Process{name},message} -> do + Just Message {priority, hostname, process = Just Process {name}, message} -> do assert "priority" (priority == 133) assert "hostname" (hostname == Latin1.fromString "webserver") assert "process_name" (name == Latin1.fromString "syslogd") @@ -28,7 +28,7 @@ main = do putStrLn "Test B" case Bsd.decode msgB of Nothing -> fail "Could not decode message B" - Just Message{priority,hostname,process=Just Process{name},message} -> do + Just Message {priority, hostname, process = Just Process {name}, message} -> do assert "priority" (priority == 0) assert "hostname" (hostname == Latin1.fromString "foo.example.org") assert "process_name" (name == Latin1.fromString "sched") @@ -37,7 +37,7 @@ main = do putStrLn "Test C" case Bsd.decode msgC of Nothing -> fail "Could not decode message C" - Just Message{priority,hostname,process=Just Process{name},message} -> do + Just Message {priority, hostname, process = Just Process {name}, message} -> do assert "priority" (priority == 133) assert "hostname" (hostname == Latin1.fromString "192.0.2.231") assert "process_name" (name == Latin1.fromString "stm") @@ -46,50 +46,50 @@ main = do putStrLn "Test D" case Bsd.decode msgD of Nothing -> fail "Could not decode message D" - Just Message{priority,process,message} -> do + Just Message {priority, process, message} -> do assert "priority" (priority == 26) assert "process_name" (isNothing process) assert "message" (message == Latin1.fromString "ASA log") putStrLn "Test E" case Bsd.decode msgE of Nothing -> fail "Could not decode message E" - Just Message{process} -> case process of - Just Process{priority,name} -> do + Just Message {process} -> case process of + Just Process {priority, name} -> do assert "process_priority" (priority == Latin1.fromString "notice") assert "process_name" (name == Latin1.fromString "tmsh") Nothing -> fail "Message E missing process information" putStrLn "Test F" case Bsd.decode msgF of Nothing -> fail "Could not decode message F" - Just Message{message} -> do + Just Message {message} -> do assert "message" (message == Latin1.fromString "foo") putStrLn "Test G" case Bsd.decode msgG of Nothing -> fail "Could not decode message G" - Just{} -> pure () + Just {} -> pure () putStrLn "Test IETF A" case Ietf.decode ietfA of Nothing -> fail "Could not decode IETF message A" - Just Ietf.Message{version,hostname,structuredData} -> do + Just Ietf.Message {version, hostname, structuredData} -> do assert "version" (version == 1) assert "hostname" (hostname == Latin1.fromString "mymachine.example.com") assert "structured_data_length" (length structuredData == 1) putStrLn "Test IETF B" case Ietf.decode ietfB of Nothing -> fail "Could not decode IETF message B" - Just Ietf.Message{version,hostname,application,messageType,structuredData} -> do + Just Ietf.Message {version, hostname, application, messageType, structuredData} -> do assert "version" (version == 1) assert "hostname" (hostname == Latin1.fromString "FOOBAR-SRX-FWL0") assert "application" (application == Latin1.fromString "RT_FLOW") assert "message_type" (messageType == Latin1.fromString "RT_FLOW_SESSION_CLOSE") assert "structured_data_length" (length structuredData == 1) - let Ietf.Element{id,parameters} = PM.indexSmallArray structuredData 0 + let Ietf.Element {id, parameters} = PM.indexSmallArray structuredData 0 assert "structured_data.id" (id == Latin1.fromString "junos@2636.1.1.1.2.133") assert "structured_data.parameters_length" (length parameters == 32) putStrLn "Test IETF C" case Ietf.decode ietfC of Nothing -> fail "Could not decode IETF message C" - Just Ietf.Message{version,hostname,application,messageType,structuredData,message} -> do + Just Ietf.Message {version, hostname, application, messageType, structuredData, message} -> do assert "version" (version == 1) assert "hostname" (hostname == Latin1.fromString "mymachine.example.com") assert "application" (application == Latin1.fromString "bigapp") @@ -99,11 +99,11 @@ main = do putStrLn "Test IETF D" case Ietf.decode ietfD of Nothing -> fail "Could not decode IETF message D" - Just Ietf.Message{message,structuredData} -> do + Just Ietf.Message {message, structuredData} -> do assert "structured_data_length" (length structuredData == 3) - let Ietf.Element{parameters} = PM.indexSmallArray structuredData 2 + let Ietf.Element {parameters} = PM.indexSmallArray structuredData 2 assert "parameters_length" (length parameters == 1) - let Ietf.Parameter{value} = PM.indexSmallArray parameters 0 + let Ietf.Parameter {value} = PM.indexSmallArray parameters 0 case value == Latin1.fromString "\\foo\\bar.txt" of True -> pure () False -> fail ("structured_data.2.0: " ++ show value) @@ -131,52 +131,62 @@ msgF = Latin1.fromString "<133>Aug 10 07:12:13: foo" msgG = Latin1.fromString "<134>Oct 30 12:31:49 10.22.16.11 logrhythm: " ietfA :: Bytes -ietfA = Latin1.fromString $ concat - [ "<165>1 2003-10-11T22:14:15.003Z mymachine.example.com evntslog - ID47 " - , "[exampleSDID@32473 iut=\"3\" eventSource=\"Application\" eventID=\"1011\"] " - , "BOMAn application event log entry" - ] +ietfA = + Latin1.fromString $ + concat + [ "<165>1 2003-10-11T22:14:15.003Z mymachine.example.com evntslog - ID47 " + , "[exampleSDID@32473 iut=\"3\" eventSource=\"Application\" eventID=\"1011\"] " + , "BOMAn application event log entry" + ] ietfB :: Bytes -ietfB = Latin1.fromString $ concat - [ "<14>1 2020-10-15T17:01:23.466Z FOOBAR-SRX-FWL0 RT_FLOW - RT_FLOW_SESSION_CLOSE " - , "[junos@2636.1.1.1.2.133 reason=\"application failure or action\" " - , "source-address=\"192.0.2.29\" source-port=\"55110\" " - , "destination-address=\"192.0.2.30\" destination-port=\"135\" connection-tag=\"0\" " - , "service-name=\"junos-ms-rpc-tcp\" nat-source-address=\"192.0.2.229\" " - , "nat-source-port=\"55110\" nat-destination-address=\"192.0.2.230\" " - , "nat-destination-port=\"135\" nat-connection-tag=\"0\" src-nat-rule-type=\"N/A\" " - , "src-nat-rule-name=\"N/A\" dst-nat-rule-type=\"N/A\" dst-nat-rule-name=\"N/A\" " - , "protocol-id=\"6\" policy-name=\"EXAMPLE-POLICY\" " - , "source-zone-name=\"MYSRCZONE\" destination-zone-name=\"MYDSTZONE\" " - , "session-id-32=\"14953\" packets-from-client=\"0\" bytes-from-client=\"0\" " - , "packets-from-server=\"0\" bytes-from-server=\"0\" elapsed-time=\"1\" " - , "application=\"UNKNOWN\" nested-application=\"UNKNOWN\" username=\"N/A\" " - , "roles=\"N/A\" packet-incoming-interface=\"ge-0/0/5.0\" encrypted=\"UNKNOWN\"] " - , "session closed application failure or action" - ] +ietfB = + Latin1.fromString $ + concat + [ "<14>1 2020-10-15T17:01:23.466Z FOOBAR-SRX-FWL0 RT_FLOW - RT_FLOW_SESSION_CLOSE " + , "[junos@2636.1.1.1.2.133 reason=\"application failure or action\" " + , "source-address=\"192.0.2.29\" source-port=\"55110\" " + , "destination-address=\"192.0.2.30\" destination-port=\"135\" connection-tag=\"0\" " + , "service-name=\"junos-ms-rpc-tcp\" nat-source-address=\"192.0.2.229\" " + , "nat-source-port=\"55110\" nat-destination-address=\"192.0.2.230\" " + , "nat-destination-port=\"135\" nat-connection-tag=\"0\" src-nat-rule-type=\"N/A\" " + , "src-nat-rule-name=\"N/A\" dst-nat-rule-type=\"N/A\" dst-nat-rule-name=\"N/A\" " + , "protocol-id=\"6\" policy-name=\"EXAMPLE-POLICY\" " + , "source-zone-name=\"MYSRCZONE\" destination-zone-name=\"MYDSTZONE\" " + , "session-id-32=\"14953\" packets-from-client=\"0\" bytes-from-client=\"0\" " + , "packets-from-server=\"0\" bytes-from-server=\"0\" elapsed-time=\"1\" " + , "application=\"UNKNOWN\" nested-application=\"UNKNOWN\" username=\"N/A\" " + , "roles=\"N/A\" packet-incoming-interface=\"ge-0/0/5.0\" encrypted=\"UNKNOWN\"] " + , "session closed application failure or action" + ] ietfC :: Bytes -ietfC = Latin1.fromString $ concat - [ "<165>1 2003-10-11T22:14:15.003Z mymachine.example.com bigapp - - - " - , "hey world" - ] +ietfC = + Latin1.fromString $ + concat + [ "<165>1 2003-10-11T22:14:15.003Z mymachine.example.com bigapp - - - " + , "hey world" + ] ietfD :: Bytes -ietfD = Latin1.fromString $ concat - [ "<38>1 2021-11-18T11:55:55.661764Z 192.0.2.20 SentinelOne " - , "ab1fc131b2f29bc49b09286bb05e0b94e5c36610 1291980691205274618 " - , "[fileName@53163 fileName=\"badcat.exe\"]" - , "[deviceAddress@53163 deviceAddress=\"192.0.2.21\"]" - , "[path@53163 path=\"\\\\foo\\\\bar.txt\"]" - , " bad news" - ] +ietfD = + Latin1.fromString $ + concat + [ "<38>1 2021-11-18T11:55:55.661764Z 192.0.2.20 SentinelOne " + , "ab1fc131b2f29bc49b09286bb05e0b94e5c36610 1291980691205274618 " + , "[fileName@53163 fileName=\"badcat.exe\"]" + , "[deviceAddress@53163 deviceAddress=\"192.0.2.21\"]" + , "[path@53163 path=\"\\\\foo\\\\bar.txt\"]" + , " bad news" + ] ietfE :: Bytes -ietfE = Latin1.fromString $ concat - [ "<14>1 2021-12-10T09:19:19.614-05:00 THE-HOST RT_FLOW - RT_FLOW_SESSION_DENY " - , "[junos@2636.1.1.1.2.40 source-address=\"192.0.2.13\"]" - ] +ietfE = + Latin1.fromString $ + concat + [ "<14>1 2021-12-10T09:19:19.614-05:00 THE-HOST RT_FLOW - RT_FLOW_SESSION_DENY " + , "[junos@2636.1.1.1.2.40 source-address=\"192.0.2.13\"]" + ] ietfF :: Bytes ietfF = Latin1.fromString "<14>1 2023-05-09T16:21:33-04:00 192.0.2.5 - - - - CEF:0"