From 9ca38100eb1b0c93e4a772e945f9a9acbb4a633b Mon Sep 17 00:00:00 2001
From: iphydf <iphydf@users.noreply.github.com>
Date: Thu, 18 Jan 2024 14:02:55 +0000
Subject: [PATCH] test: Add a test using happy-arbitrary.

This produces valid C code that should then be parseable again by the
Cimple parser.
---
 BUILD.bazel                        |   3 +
 cimple.cabal                       |   3 +
 expand_yacc.pl                     |   1 +
 src/Language/Cimple/Parser.y       |  26 ++---
 src/Language/Cimple/Tokens.hs      |   2 +-
 test/Language/Cimple/ParserSpec.hs | 177 ++++++++++++++++++++++++++++-
 6 files changed, 192 insertions(+), 20 deletions(-)

diff --git a/BUILD.bazel b/BUILD.bazel
index 3962a9e..295415d 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -201,8 +201,11 @@ hspec_test(
     size = "small",
     deps = [
         ":hs-cimple",
+        "//hs-happy-arbitrary",
+        "//third_party/haskell:QuickCheck",
         "//third_party/haskell:ansi-wl-pprint",
         "//third_party/haskell:base",
+        "//third_party/haskell:bytestring",
         "//third_party/haskell:data-fix",
         "//third_party/haskell:hspec",
         "//third_party/haskell:text",
diff --git a/cimple.cabal b/cimple.cabal
index f2f2012..5b38b36 100644
--- a/cimple.cabal
+++ b/cimple.cabal
@@ -119,9 +119,12 @@ test-suite testsuite
   build-tool-depends: hspec-discover:hspec-discover
   build-depends:
       ansi-wl-pprint
+    , QuickCheck
     , base                 <5
+    , bytestring
     , cimple
     , data-fix
+    , happy-arbitrary
     , hspec
     , text
     , transformers-compat
diff --git a/expand_yacc.pl b/expand_yacc.pl
index c2e543a..2477be0 100755
--- a/expand_yacc.pl
+++ b/expand_yacc.pl
@@ -39,6 +39,7 @@ sub show_nonterm {
    my ($res, $name, $nonterm) = @_;
 
    push @$res, "$name :: { $nonterm->{type} }";
+   push @$res, "$name";
    my @prod_res;
    for my $prod (@{ $nonterm->{productions} }) {
       show_production \@prod_res, $prod;
diff --git a/src/Language/Cimple/Parser.y b/src/Language/Cimple/Parser.y
index 20c533e..16c7e01 100644
--- a/src/Language/Cimple/Parser.y
+++ b/src/Language/Cimple/Parser.y
@@ -315,8 +315,8 @@ PreprocUndef
 
 PreprocConstExpr :: { NonTerm }
 PreprocConstExpr
-:	PureExpr(PreprocConstExpr)				{ $1 }
-|	'defined' '(' ID_CONST ')'				{ Fix $ PreprocDefined $3 }
+:	'defined' '(' ID_CONST ')'				{ Fix $ PreprocDefined $3 }
+|	PureExpr(PreprocConstExpr)				{ $1 }
 
 MacroParamList :: { [NonTerm] }
 MacroParamList
@@ -355,10 +355,7 @@ Stmts
 
 Stmt :: { NonTerm }
 Stmt
-:	PreprocIfdef(Stmts)					{ $1 }
-|	PreprocIf(Stmts)					{ $1 }
-|	PreprocDefine Stmts PreprocUndef			{ Fix $ PreprocScopedDefine $1 (reverse $2) $3 }
-|	DeclStmt						{ $1 }
+:	DeclStmt						{ $1 }
 |	CompoundStmt						{ $1 }
 |	IfStmt							{ $1 }
 |	ForStmt							{ $1 }
@@ -376,6 +373,9 @@ Stmt
 |	return Expr ';'						{ Fix $ Return (Just $2) }
 |	switch '(' Expr ')' '{' SwitchCases '}'			{ Fix $ SwitchStmt $3 (reverse $6) }
 |	Comment							{ $1 }
+|	PreprocIfdef(Stmts)					{ $1 }
+|	PreprocIf(Stmts)					{ $1 }
+|	PreprocDefine Stmts PreprocUndef			{ Fix $ PreprocScopedDefine $1 (reverse $2) $3 }
 
 IfStmt :: { NonTerm }
 IfStmt
@@ -474,10 +474,10 @@ CompoundStmt
 -- Expressions that are safe for use as macro body without () around it..
 PreprocSafeExpr(x)
 :	LiteralExpr						{ $1 }
-|	'(' x ')'						{ Fix $ ParenExpr $2 }
-|	'(' QualType ')' x %prec CAST				{ Fix $ CastExpr $2 $4 }
-|	sizeof '(' Expr ')'					{ Fix $ SizeofExpr $3 }
 |	sizeof '(' QualType ')'					{ Fix $ SizeofType $3 }
+|	sizeof '(' Expr ')'					{ Fix $ SizeofExpr $3 }
+|	'(' QualType ')' x %prec CAST				{ Fix $ CastExpr $2 $4 }
+|	'(' x ')'						{ Fix $ ParenExpr $2 }
 
 ConstExpr :: { NonTerm }
 ConstExpr
@@ -485,6 +485,10 @@ ConstExpr
 
 PureExpr(x)
 :	PreprocSafeExpr(x)					{ $1 }
+|	'!' x							{ Fix $ UnaryExpr UopNot $2 }
+|	'~' x							{ Fix $ UnaryExpr UopNeg $2 }
+|	'-' x %prec NEG						{ Fix $ UnaryExpr UopMinus $2 }
+|	'&' x %prec ADDRESS					{ Fix $ UnaryExpr UopAddress $2 }
 |	x '!=' x						{ Fix $ BinaryExpr $1 BopNe $3 }
 |	x '==' x						{ Fix $ BinaryExpr $1 BopEq $3 }
 |	x '||' x						{ Fix $ BinaryExpr $1 BopOr $3 }
@@ -504,10 +508,6 @@ PureExpr(x)
 |	x '>=' x						{ Fix $ BinaryExpr $1 BopGe $3 }
 |	x '>>' x						{ Fix $ BinaryExpr $1 BopRsh $3 }
 |	x '?' x ':' x						{ Fix $ TernaryExpr $1 $3 $5 }
-|	'!' x							{ Fix $ UnaryExpr UopNot $2 }
-|	'~' x							{ Fix $ UnaryExpr UopNeg $2 }
-|	'-' x %prec NEG						{ Fix $ UnaryExpr UopMinus $2 }
-|	'&' x %prec ADDRESS					{ Fix $ UnaryExpr UopAddress $2 }
 
 LiteralExpr :: { NonTerm }
 LiteralExpr
diff --git a/src/Language/Cimple/Tokens.hs b/src/Language/Cimple/Tokens.hs
index a5ba3ff..8848a2a 100644
--- a/src/Language/Cimple/Tokens.hs
+++ b/src/Language/Cimple/Tokens.hs
@@ -125,7 +125,7 @@ data LexemeClass
 
     | ErrorToken
     | Eof
-    deriving (Enum, Bounded, Ord, Eq, Show, Generic)
+    deriving (Enum, Bounded, Ord, Eq, Show, Read, Generic)
 
 instance FromJSON LexemeClass
 instance ToJSON LexemeClass
diff --git a/test/Language/Cimple/ParserSpec.hs b/test/Language/Cimple/ParserSpec.hs
index 984f830..a86eda2 100644
--- a/test/Language/Cimple/ParserSpec.hs
+++ b/test/Language/Cimple/ParserSpec.hs
@@ -1,13 +1,176 @@
+{-# OPTIONS_GHC -Wwarn #-}
 {-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE Strict            #-}
 module Language.Cimple.ParserSpec where
 
-import           Data.Fix           (Fix (..))
-import           Test.Hspec         (Spec, describe, it, shouldBe,
-                                     shouldSatisfy)
+import qualified Data.ByteString.Lazy     as LBS
+import           Data.Fix                 (Fix (..))
+import           Data.Text                (Text)
+import qualified Data.Text                as Text
+import           Language.Cimple          (AlexPosn (..), Lexeme (..),
+                                           LexemeClass (..), NodeF (..),
+                                           Scope (..))
+import qualified Language.Cimple          as Cimple
+import           Language.Cimple.IO       (parseText)
+import qualified Language.Happy           as Happy
+import           Language.Happy.Arbitrary (Config, defConfig, genTokens)
+import           Test.Hspec               (Spec, describe, it, shouldBe,
+                                           shouldNotBe, shouldSatisfy)
+import           Test.QuickCheck          (Gen, forAll)
 
-import           Language.Cimple    (AlexPosn (..), Lexeme (..),
-                                     LexemeClass (..), NodeF (..), Scope (..))
-import           Language.Cimple.IO (parseText)
+
+sampleToken :: LexemeClass -> Text
+sampleToken c = case c of
+    IdConst               -> "ID_CONST"
+    IdFuncType            -> "func_cb"
+    IdStdType             -> "uint32_t"
+    IdSueType             -> "Sue_Type"
+    IdVar                 -> "var"
+    KwBreak               -> "break"
+    KwCase                -> "case"
+    KwConst               -> "const"
+    KwContinue            -> "continue"
+    KwDefault             -> "default"
+    KwDo                  -> "do"
+    KwElse                -> "else"
+    KwEnum                -> "enum"
+    KwExtern              -> "extern"
+    KwFor                 -> "for"
+    KwGnuPrintf           -> "gnu_printf"
+    KwGoto                -> "goto"
+    KwIf                  -> "if"
+    KwNonNull             -> "non_null"
+    KwNullable            -> "nullable"
+    KwReturn              -> "return"
+    KwSizeof              -> "sizeof"
+    KwStatic              -> "static"
+    KwStaticAssert        -> "static_assert"
+    KwStruct              -> "struct"
+    KwSwitch              -> "switch"
+    KwTypedef             -> "typedef"
+    KwUnion               -> "union"
+    KwVla                 -> "VLA"
+    KwVoid                -> "void"
+    KwWhile               -> "while"
+    LitFalse              -> "false"
+    LitTrue               -> "true"
+    LitChar               -> "'a'"
+    LitInteger            -> "123"
+    LitString             -> "\"str\""
+    LitSysInclude         -> "<stdio.h>"
+    PctAmpersand          -> "&"
+    PctAmpersandAmpersand -> "&&"
+    PctAmpersandEq        -> "&="
+    PctArrow              -> "->"
+    PctAsterisk           -> "*"
+    PctAsteriskEq         -> "*="
+    PctCaret              -> "^"
+    PctCaretEq            -> "^="
+    PctColon              -> ":"
+    PctComma              -> ","
+    PctEllipsis           -> "..."
+    PctEMark              -> "!"
+    PctEMarkEq            -> "!="
+    PctEq                 -> "="
+    PctEqEq               -> "=="
+    PctGreater            -> ">"
+    PctGreaterEq          -> ">="
+    PctGreaterGreater     -> ">>"
+    PctGreaterGreaterEq   -> ">>="
+    PctLBrace             -> "{\n"
+    PctLBrack             -> "["
+    PctLess               -> "<"
+    PctLessEq             -> "<="
+    PctLessLess           -> "<<"
+    PctLessLessEq         -> "<<="
+    PctLParen             -> "("
+    PctMinus              -> "-"
+    PctMinusEq            -> "-="
+    PctMinusMinus         -> "--"
+    PctPeriod             -> "."
+    PctPercent            -> "%"
+    PctPercentEq          -> "%="
+    PctPipe               -> "|"
+    PctPipeEq             -> "|="
+    PctPipePipe           -> "||"
+    PctPlus               -> "+"
+    PctPlusEq             -> "+="
+    PctPlusPlus           -> "++"
+    PctQMark              -> "?"
+    PctRBrace             -> "}"
+    PctRBrack             -> "]"
+    PctRParen             -> ")"
+    PctSemicolon          -> ";\n"
+    PctSlash              -> "/"
+    PctSlashEq            -> "/="
+    PctTilde              -> "~"
+    PpDefine              -> "\n#define"
+    PpDefined             -> "\n#defined"
+    PpElif                -> "\n#elif"
+    PpElse                -> "\n#else"
+    PpEndif               -> "\n#endif"
+    PpIf                  -> "\n#if"
+    PpIfdef               -> "\n#ifdef"
+    PpIfndef              -> "\n#ifndef"
+    PpInclude             -> "\n#include"
+    PpNewline             -> "\n"
+    PpUndef               -> "\n#undef"
+    CmtBlock              -> "/**"
+    CmtCommand            -> "@param"
+    CmtAttr               -> "[out]"
+    CmtEndDocSection      -> "/** @} */"
+    CmtPrefix             -> "//"
+    CmtIndent             -> "*"
+    CmtStart              -> "/*"
+    CmtStartCode          -> "/*!"
+    CmtStartBlock         -> "/***"
+    CmtStartDoc           -> "/**"
+    CmtStartDocSection    -> "/** @{"
+    CmtSpdxCopyright      -> "Copyright ©"
+    CmtSpdxLicense        -> "SPDX-License-Identifier:"
+    CmtCode               -> "@code"
+    CmtWord               -> "hello"
+    CmtRef                -> "`ref`"
+    CmtEnd                -> "*/\n"
+    IgnStart              -> "\n//!TOKSTYLE-\n"
+    IgnBody               -> "ignored stuff"
+    IgnEnd                -> "\n//!TOKSTYLE+\n"
+
+    ErrorToken            -> "!!ERROR!!"
+    Eof                   -> "!!EOF!!"
+
+
+config :: Config LexemeClass
+config = defConfig parseToken
+  where
+    parseToken :: Text -> LexemeClass
+    parseToken =
+        read
+        . Text.unpack
+        . (!! 2)
+        . concatMap (filter (not . Text.null) . Text.splitOn "\t")
+        . Text.splitOn " "
+
+grammar :: Maybe Happy.Grammar
+grammar = do
+    source <- Cimple.source
+    case Happy.runAlex (LBS.fromStrict source) Happy.parseGrammar of
+        Left err -> error err
+        Right ok -> return ok
+
+arbitraryCode :: Happy.Grammar -> Gen Text
+arbitraryCode g =
+    Text.intercalate " " . map sampleToken <$> genTokens config "TranslationUnit" g
+
+arbitrarySpec :: Spec
+arbitrarySpec = case grammar of
+    Nothing -> return ()
+    Just g ->
+        it "can handle arbitrary code" $
+            forAll (arbitraryCode g) $ \code -> do
+                case parseText code of
+                    Right _  -> return ()
+                    Left err -> err `shouldNotBe` ""
 
 
 isRight1 :: Either a [b] -> Bool
@@ -18,6 +181,8 @@ isRight1 _           = False
 spec :: Spec
 spec = do
     describe "C parsing" $ do
+        arbitrarySpec
+
         it "should parse a simple function" $ do
             let ast = parseText "int a(void) { return 3; }"
             ast `shouldSatisfy` isRight1