From 9ca38100eb1b0c93e4a772e945f9a9acbb4a633b Mon Sep 17 00:00:00 2001 From: iphydf Date: Thu, 18 Jan 2024 14:02:55 +0000 Subject: [PATCH] test: Add a test using happy-arbitrary. This produces valid C code that should then be parseable again by the Cimple parser. --- BUILD.bazel | 3 + cimple.cabal | 3 + expand_yacc.pl | 1 + src/Language/Cimple/Parser.y | 26 ++--- src/Language/Cimple/Tokens.hs | 2 +- test/Language/Cimple/ParserSpec.hs | 177 ++++++++++++++++++++++++++++- 6 files changed, 192 insertions(+), 20 deletions(-) diff --git a/BUILD.bazel b/BUILD.bazel index 3962a9e..295415d 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -201,8 +201,11 @@ hspec_test( size = "small", deps = [ ":hs-cimple", + "//hs-happy-arbitrary", + "//third_party/haskell:QuickCheck", "//third_party/haskell:ansi-wl-pprint", "//third_party/haskell:base", + "//third_party/haskell:bytestring", "//third_party/haskell:data-fix", "//third_party/haskell:hspec", "//third_party/haskell:text", diff --git a/cimple.cabal b/cimple.cabal index f2f2012..5b38b36 100644 --- a/cimple.cabal +++ b/cimple.cabal @@ -119,9 +119,12 @@ test-suite testsuite build-tool-depends: hspec-discover:hspec-discover build-depends: ansi-wl-pprint + , QuickCheck , base <5 + , bytestring , cimple , data-fix + , happy-arbitrary , hspec , text , transformers-compat diff --git a/expand_yacc.pl b/expand_yacc.pl index c2e543a..2477be0 100755 --- a/expand_yacc.pl +++ b/expand_yacc.pl @@ -39,6 +39,7 @@ sub show_nonterm { my ($res, $name, $nonterm) = @_; push @$res, "$name :: { $nonterm->{type} }"; + push @$res, "$name"; my @prod_res; for my $prod (@{ $nonterm->{productions} }) { show_production \@prod_res, $prod; diff --git a/src/Language/Cimple/Parser.y b/src/Language/Cimple/Parser.y index 20c533e..16c7e01 100644 --- a/src/Language/Cimple/Parser.y +++ b/src/Language/Cimple/Parser.y @@ -315,8 +315,8 @@ PreprocUndef PreprocConstExpr :: { NonTerm } PreprocConstExpr -: PureExpr(PreprocConstExpr) { $1 } -| 'defined' '(' ID_CONST ')' { Fix $ PreprocDefined $3 } +: 'defined' '(' ID_CONST ')' { Fix $ PreprocDefined $3 } +| PureExpr(PreprocConstExpr) { $1 } MacroParamList :: { [NonTerm] } MacroParamList @@ -355,10 +355,7 @@ Stmts Stmt :: { NonTerm } Stmt -: PreprocIfdef(Stmts) { $1 } -| PreprocIf(Stmts) { $1 } -| PreprocDefine Stmts PreprocUndef { Fix $ PreprocScopedDefine $1 (reverse $2) $3 } -| DeclStmt { $1 } +: DeclStmt { $1 } | CompoundStmt { $1 } | IfStmt { $1 } | ForStmt { $1 } @@ -376,6 +373,9 @@ Stmt | return Expr ';' { Fix $ Return (Just $2) } | switch '(' Expr ')' '{' SwitchCases '}' { Fix $ SwitchStmt $3 (reverse $6) } | Comment { $1 } +| PreprocIfdef(Stmts) { $1 } +| PreprocIf(Stmts) { $1 } +| PreprocDefine Stmts PreprocUndef { Fix $ PreprocScopedDefine $1 (reverse $2) $3 } IfStmt :: { NonTerm } IfStmt @@ -474,10 +474,10 @@ CompoundStmt -- Expressions that are safe for use as macro body without () around it.. PreprocSafeExpr(x) : LiteralExpr { $1 } -| '(' x ')' { Fix $ ParenExpr $2 } -| '(' QualType ')' x %prec CAST { Fix $ CastExpr $2 $4 } -| sizeof '(' Expr ')' { Fix $ SizeofExpr $3 } | sizeof '(' QualType ')' { Fix $ SizeofType $3 } +| sizeof '(' Expr ')' { Fix $ SizeofExpr $3 } +| '(' QualType ')' x %prec CAST { Fix $ CastExpr $2 $4 } +| '(' x ')' { Fix $ ParenExpr $2 } ConstExpr :: { NonTerm } ConstExpr @@ -485,6 +485,10 @@ ConstExpr PureExpr(x) : PreprocSafeExpr(x) { $1 } +| '!' x { Fix $ UnaryExpr UopNot $2 } +| '~' x { Fix $ UnaryExpr UopNeg $2 } +| '-' x %prec NEG { Fix $ UnaryExpr UopMinus $2 } +| '&' x %prec ADDRESS { Fix $ UnaryExpr UopAddress $2 } | x '!=' x { Fix $ BinaryExpr $1 BopNe $3 } | x '==' x { Fix $ BinaryExpr $1 BopEq $3 } | x '||' x { Fix $ BinaryExpr $1 BopOr $3 } @@ -504,10 +508,6 @@ PureExpr(x) | x '>=' x { Fix $ BinaryExpr $1 BopGe $3 } | x '>>' x { Fix $ BinaryExpr $1 BopRsh $3 } | x '?' x ':' x { Fix $ TernaryExpr $1 $3 $5 } -| '!' x { Fix $ UnaryExpr UopNot $2 } -| '~' x { Fix $ UnaryExpr UopNeg $2 } -| '-' x %prec NEG { Fix $ UnaryExpr UopMinus $2 } -| '&' x %prec ADDRESS { Fix $ UnaryExpr UopAddress $2 } LiteralExpr :: { NonTerm } LiteralExpr diff --git a/src/Language/Cimple/Tokens.hs b/src/Language/Cimple/Tokens.hs index a5ba3ff..8848a2a 100644 --- a/src/Language/Cimple/Tokens.hs +++ b/src/Language/Cimple/Tokens.hs @@ -125,7 +125,7 @@ data LexemeClass | ErrorToken | Eof - deriving (Enum, Bounded, Ord, Eq, Show, Generic) + deriving (Enum, Bounded, Ord, Eq, Show, Read, Generic) instance FromJSON LexemeClass instance ToJSON LexemeClass diff --git a/test/Language/Cimple/ParserSpec.hs b/test/Language/Cimple/ParserSpec.hs index 984f830..a86eda2 100644 --- a/test/Language/Cimple/ParserSpec.hs +++ b/test/Language/Cimple/ParserSpec.hs @@ -1,13 +1,176 @@ +{-# OPTIONS_GHC -Wwarn #-} {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE Strict #-} module Language.Cimple.ParserSpec where -import Data.Fix (Fix (..)) -import Test.Hspec (Spec, describe, it, shouldBe, - shouldSatisfy) +import qualified Data.ByteString.Lazy as LBS +import Data.Fix (Fix (..)) +import Data.Text (Text) +import qualified Data.Text as Text +import Language.Cimple (AlexPosn (..), Lexeme (..), + LexemeClass (..), NodeF (..), + Scope (..)) +import qualified Language.Cimple as Cimple +import Language.Cimple.IO (parseText) +import qualified Language.Happy as Happy +import Language.Happy.Arbitrary (Config, defConfig, genTokens) +import Test.Hspec (Spec, describe, it, shouldBe, + shouldNotBe, shouldSatisfy) +import Test.QuickCheck (Gen, forAll) -import Language.Cimple (AlexPosn (..), Lexeme (..), - LexemeClass (..), NodeF (..), Scope (..)) -import Language.Cimple.IO (parseText) + +sampleToken :: LexemeClass -> Text +sampleToken c = case c of + IdConst -> "ID_CONST" + IdFuncType -> "func_cb" + IdStdType -> "uint32_t" + IdSueType -> "Sue_Type" + IdVar -> "var" + KwBreak -> "break" + KwCase -> "case" + KwConst -> "const" + KwContinue -> "continue" + KwDefault -> "default" + KwDo -> "do" + KwElse -> "else" + KwEnum -> "enum" + KwExtern -> "extern" + KwFor -> "for" + KwGnuPrintf -> "gnu_printf" + KwGoto -> "goto" + KwIf -> "if" + KwNonNull -> "non_null" + KwNullable -> "nullable" + KwReturn -> "return" + KwSizeof -> "sizeof" + KwStatic -> "static" + KwStaticAssert -> "static_assert" + KwStruct -> "struct" + KwSwitch -> "switch" + KwTypedef -> "typedef" + KwUnion -> "union" + KwVla -> "VLA" + KwVoid -> "void" + KwWhile -> "while" + LitFalse -> "false" + LitTrue -> "true" + LitChar -> "'a'" + LitInteger -> "123" + LitString -> "\"str\"" + LitSysInclude -> "" + PctAmpersand -> "&" + PctAmpersandAmpersand -> "&&" + PctAmpersandEq -> "&=" + PctArrow -> "->" + PctAsterisk -> "*" + PctAsteriskEq -> "*=" + PctCaret -> "^" + PctCaretEq -> "^=" + PctColon -> ":" + PctComma -> "," + PctEllipsis -> "..." + PctEMark -> "!" + PctEMarkEq -> "!=" + PctEq -> "=" + PctEqEq -> "==" + PctGreater -> ">" + PctGreaterEq -> ">=" + PctGreaterGreater -> ">>" + PctGreaterGreaterEq -> ">>=" + PctLBrace -> "{\n" + PctLBrack -> "[" + PctLess -> "<" + PctLessEq -> "<=" + PctLessLess -> "<<" + PctLessLessEq -> "<<=" + PctLParen -> "(" + PctMinus -> "-" + PctMinusEq -> "-=" + PctMinusMinus -> "--" + PctPeriod -> "." + PctPercent -> "%" + PctPercentEq -> "%=" + PctPipe -> "|" + PctPipeEq -> "|=" + PctPipePipe -> "||" + PctPlus -> "+" + PctPlusEq -> "+=" + PctPlusPlus -> "++" + PctQMark -> "?" + PctRBrace -> "}" + PctRBrack -> "]" + PctRParen -> ")" + PctSemicolon -> ";\n" + PctSlash -> "/" + PctSlashEq -> "/=" + PctTilde -> "~" + PpDefine -> "\n#define" + PpDefined -> "\n#defined" + PpElif -> "\n#elif" + PpElse -> "\n#else" + PpEndif -> "\n#endif" + PpIf -> "\n#if" + PpIfdef -> "\n#ifdef" + PpIfndef -> "\n#ifndef" + PpInclude -> "\n#include" + PpNewline -> "\n" + PpUndef -> "\n#undef" + CmtBlock -> "/**" + CmtCommand -> "@param" + CmtAttr -> "[out]" + CmtEndDocSection -> "/** @} */" + CmtPrefix -> "//" + CmtIndent -> "*" + CmtStart -> "/*" + CmtStartCode -> "/*!" + CmtStartBlock -> "/***" + CmtStartDoc -> "/**" + CmtStartDocSection -> "/** @{" + CmtSpdxCopyright -> "Copyright ©" + CmtSpdxLicense -> "SPDX-License-Identifier:" + CmtCode -> "@code" + CmtWord -> "hello" + CmtRef -> "`ref`" + CmtEnd -> "*/\n" + IgnStart -> "\n//!TOKSTYLE-\n" + IgnBody -> "ignored stuff" + IgnEnd -> "\n//!TOKSTYLE+\n" + + ErrorToken -> "!!ERROR!!" + Eof -> "!!EOF!!" + + +config :: Config LexemeClass +config = defConfig parseToken + where + parseToken :: Text -> LexemeClass + parseToken = + read + . Text.unpack + . (!! 2) + . concatMap (filter (not . Text.null) . Text.splitOn "\t") + . Text.splitOn " " + +grammar :: Maybe Happy.Grammar +grammar = do + source <- Cimple.source + case Happy.runAlex (LBS.fromStrict source) Happy.parseGrammar of + Left err -> error err + Right ok -> return ok + +arbitraryCode :: Happy.Grammar -> Gen Text +arbitraryCode g = + Text.intercalate " " . map sampleToken <$> genTokens config "TranslationUnit" g + +arbitrarySpec :: Spec +arbitrarySpec = case grammar of + Nothing -> return () + Just g -> + it "can handle arbitrary code" $ + forAll (arbitraryCode g) $ \code -> do + case parseText code of + Right _ -> return () + Left err -> err `shouldNotBe` "" isRight1 :: Either a [b] -> Bool @@ -18,6 +181,8 @@ isRight1 _ = False spec :: Spec spec = do describe "C parsing" $ do + arbitrarySpec + it "should parse a simple function" $ do let ast = parseText "int a(void) { return 3; }" ast `shouldSatisfy` isRight1