Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add bracketing to lexer #68

Merged
merged 25 commits into from
Dec 23, 2024
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
dd87240
refactor: Add bracketing; kill withFC
croyzor Jun 27, 2024
1ad6d60
kill withFC
croyzor Oct 1, 2024
a5b4df9
Revert pullAndJuxt changes
croyzor Oct 1, 2024
39d101b
Drive-by: Update kind printing and remove Row kind
croyzor Oct 1, 2024
81b49bc
test: Add extra let binding tests
croyzor Oct 1, 2024
a4c47dc
Update golden tests
croyzor Oct 1, 2024
93e43c6
Fix warnings
croyzor Oct 1, 2024
48612d4
Wee cleanup
croyzor Oct 1, 2024
e4842a6
drive-by: Fix karlheinz list application
croyzor Oct 2, 2024
3c2be6f
fix: Don't allow trailing tokens when parsing between brackets
croyzor Oct 2, 2024
16b1c85
Merge remote-tracking branch 'origin/main' into refactor/parser-wc
croyzor Nov 5, 2024
4e9ac06
Propogate errors from parsers when going under brackets
croyzor Nov 6, 2024
a4997dc
Merge remote-tracking branch 'origin/main' into refactor/parser-wc
croyzor Nov 6, 2024
06f671e
Fix vector FCs
croyzor Nov 6, 2024
063634f
Merge remote-tracking branch 'origin/main' into refactor/parser-wc
croyzor Dec 17, 2024
c7eab1f
apply lints
croyzor Dec 17, 2024
859a144
Review comments
croyzor Dec 19, 2024
937ef01
Merge remote-tracking branch 'origin/main' into refactor/parser-wc
croyzor Dec 19, 2024
4806481
[refactor] Combine `brackets` and `within` (#73)
acl-cqc Dec 23, 2024
48e231b
Merge branch 'main' into refactor/parser-wc
croyzor Dec 23, 2024
7df8f65
Merge branch 'main' into refactor/parser-wc
croyzor Dec 23, 2024
44e1c3e
Merge opener and closer into openClose
croyzor Dec 23, 2024
ebf139f
Remove comments in BToken show instance
croyzor Dec 23, 2024
625451d
Uncurry
croyzor Dec 23, 2024
6cef983
run hlint
croyzor Dec 23, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions brat/Brat/Error.hs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
module Brat.Error (ParseError(..)
,LengthConstraintF(..), LengthConstraint
,BracketErrMsg(..)
,ErrorMsg(..)
,Error(..), showError
,SrcErr(..)
Expand All @@ -9,6 +10,7 @@
) where

import Brat.FC
import Data.Bracket
import Brat.Syntax.Port (PortName)

import Data.List (intercalate)
Expand All @@ -26,6 +28,28 @@

type LengthConstraint = LengthConstraintF Int

data BracketErrMsg
= EOFInBracket BracketType -- FC in enclosing `Err` should point to the open bracket
-- FC here is opening; closing FC in the enclosing `Err`
| OpenCloseMismatch (FC, BracketType) BracketType
| UnexpectedClose BracketType

instance Show BracketErrMsg where
show (EOFInBracket b) = "File ended before this " ++ showOpen b ++ " was closed"
show (OpenCloseMismatch (openFC, bOpen) bClose) = unwords ["This"
,showClose bClose
,"doesn't match the"
,showOpen bOpen
,"at"
,show openFC
]
show (UnexpectedClose b) = unwords ["There is no"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is pretty much an OpenCloseMismatch, without an opening (or where the opening is beginning of file) - consider combining the two, i.e.OpenCloseMismatch (Maybe (FC, BracketType)) BracketType. Or not, the conceptual similarity could just be confusing, up to you...

,showOpen b
,"for this"
,showClose b
,"to close"
]

data ErrorMsg
= TypeErr String
-- Term, Expected type, Actual type
Expand Down Expand Up @@ -83,6 +107,7 @@
-- The argument is the row of unused connectors
| ThunkLeftOvers String
| ThunkLeftUnders String
| BracketErr BracketErrMsg

instance Show ErrorMsg where
show (TypeErr x) = "Type error: " ++ x
Expand Down Expand Up @@ -166,6 +191,7 @@
show UnreachableBranch = "Branch cannot be reached"
show (ThunkLeftOvers overs) = "Expected function to address all inputs, but " ++ overs ++ " wasn't used"
show (ThunkLeftUnders unders) = "Expected function to return additional values of type: " ++ unders
show (BracketErr msg) = show msg


data Error = Err { fc :: Maybe FC
Expand Down Expand Up @@ -213,8 +239,8 @@
ls = lines contents
in case endLineN - startLineN of
0 -> [ls!!startLineN, highlightSection startCol endCol]
n | n > 0 -> let (first:rest) = drop (startLineN - 1) $ take (endLineN + 1) ls

Check warning on line 242 in brat/Brat/Error.hs

View workflow job for this annotation

GitHub Actions / build

Pattern match(es) are non-exhaustive

Check warning on line 242 in brat/Brat/Error.hs

View workflow job for this annotation

GitHub Actions / build

Pattern match(es) are non-exhaustive
(last:rmid) = reverse rest

Check warning on line 243 in brat/Brat/Error.hs

View workflow job for this annotation

GitHub Actions / build

Pattern match(es) are non-exhaustive

Check warning on line 243 in brat/Brat/Error.hs

View workflow job for this annotation

GitHub Actions / build

Pattern match(es) are non-exhaustive
in [first, highlightSection startCol (length first)]
++ (reverse rmid >>= (\l -> [l, highlightSection 0 (length l)]))
++ [last, highlightSection 0 endCol]
Expand Down
6 changes: 6 additions & 0 deletions brat/Brat/FC.hs
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,9 @@ fcOf (WC fc _) = fc
-- TODO: Remove this
dummyFC :: a -> WC a
dummyFC = WC (FC (Pos 0 0) (Pos 0 0))

spanFC :: FC -> FC -> FC
spanFC afc bfc = FC (start afc) (end bfc)

spanFCOf :: WC a -> WC b -> FC
spanFCOf (WC afc _) (WC bfc _) = spanFC afc bfc
107 changes: 107 additions & 0 deletions brat/Brat/Lexer/Bracketed.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
module Brat.Lexer.Bracketed (BToken(..), brackets) where

import Data.Bracket
import Brat.Error (BracketErrMsg(..), Error(Err), ErrorMsg(..))
import Brat.FC
import Brat.Lexer.Token
import Bwd

import Text.Megaparsec (PosState(..), SourcePos(..), TraversableStream(..), VisualStream(..))
import Text.Megaparsec.Pos (mkPos)

opener :: Tok -> Maybe BracketType
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suggest

enum OpenClose = Opening(BracketType) | Closing(BracketType) -- maybe there's a better name
openClose :: Tok -> Maybe OpenClose

Then you can do foo ... | Just (Opening b) <- openClose(blah) = ... and so on, and you get a much better case openClose ... of

Then zap both opener and closer, or if you really want, you can do

opener t | Just(Opening b) <- openClose t  = Just(b)
opener _ = Nothing

Could also make OpenClose incorporate the Maybe directly inside it, i.e. Open(BracketType) | Close(BracketType) | Neither

opener LParen = Just Paren
opener LSquare = Just Square
opener LBrace = Just Brace
opener _ = Nothing

closer :: Tok -> Maybe BracketType
closer RParen = Just Paren
closer RSquare = Just Square
closer RBrace = Just Brace
closer _ = Nothing

-- Well bracketed tokens
data BToken
= Bracketed FC BracketType [BToken]
| FlatTok Token
deriving (Eq, Ord)

btokLen :: BToken -> Int
btokLen (FlatTok tok) = length (show tok)
btokLen (Bracketed _ _ bs) = sum (btokLen <$> bs) + 2

instance Show BToken where
show (FlatTok t) = show t
show (Bracketed _ b ts) = showOpen b ++ show ts ++ showClose b
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

show ts here will insert commas between the elements of ts (a list), right? Shouldn't we also insert commas after the opening bracket and before the closing bracket? And presumably we do something about the comma token itself...

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or use showTokens ts rather than show ts, and then there'll be no separators added between tokens?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, so I realize you can't showTokens ts here because that needs a Proxy argument but nonetheless, this inserts a comma between the tokens - is that what you want? I tried changing show ts to concatMap show ts which is similar but without the inserted commas and all tests still pass, suggesting we have no coverage of this so we may not even have been aware of the show-inserted commas???


instance VisualStream [BToken] where
showTokens _ = concatMap show
tokensLength _ = sum . fmap btokLen

instance TraversableStream [BToken] where
reachOffsetNoLine i pos = let fileName = sourceName (pstateSourcePos pos)
(Pos line col, rest) = skipChars (i - pstateOffset pos + 1) (pstateInput pos)
in pos
{ pstateInput = rest
, pstateOffset = max (pstateOffset pos) i
, pstateSourcePos = SourcePos fileName (mkPos line) (mkPos col)
}
where
skipChars :: Int -> [BToken] -> (Pos, [BToken])
skipChars 0 inp@(Bracketed fc _ _:_) = (start fc, inp)

Check warning on line 52 in brat/Brat/Lexer/Bracketed.hs

View workflow job for this annotation

GitHub Actions / build

Pattern match(es) are non-exhaustive

Check warning on line 52 in brat/Brat/Lexer/Bracketed.hs

View workflow job for this annotation

GitHub Actions / build

Pattern match(es) are non-exhaustive
skipChars 0 inp@(FlatTok t:_) = (start (fc t), inp)
skipChars i ((Bracketed fc b bts):rest) =
let Pos closeLine closeCol = (end fc)

Check warning on line 55 in brat/Brat/Lexer/Bracketed.hs

View workflow job for this annotation

GitHub Actions / hlint

Suggestion in module Brat.Lexer.Bracketed: Redundant bracket ▫︎ Found: "let\n Pos closeLine closeCol = (end fc)\n closeFC\n = FC (Pos closeLine (closeCol - 1)) (Pos closeLine closeCol)\nin\n skipChars\n (i - 1) (bts ++ [FlatTok (Token closeFC (closeTok b))] ++ rest)" ▫︎ Perhaps: "let\n Pos closeLine closeCol = end fc\n closeFC\n = FC (Pos closeLine (closeCol - 1)) (Pos closeLine closeCol)\nin\n skipChars\n (i - 1) (bts ++ [FlatTok (Token closeFC (closeTok b))] ++ rest)"
closeFC = FC (Pos closeLine (closeCol - 1)) (Pos closeLine closeCol)
in skipChars (i - 1) (bts ++ [FlatTok (Token closeFC (closeTok b))] ++ rest)
skipChars i (FlatTok t:rest)
| i >= tokenLen t = skipChars (i - tokenLen t) rest
| otherwise = (start (fc t), FlatTok t:rest)

closeTok Paren = RParen
closeTok Square = RSquare
closeTok Brace = RBrace

eofErr :: FC -> BracketType -> Error
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I suggested you curry this earlier and now I'm gonna make the reverse suggestion - sorry! But the "unit" of bracket, in openCloseMismatchErr, is the (FC, BracketType) and I think it has to be that way, so we should probably use that here and in unexpectedCloseErr too - this would simplify brackets as you would then be able to let-bind locals of type (FC, BracketType)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think I'm really getting it, but have uncurried anyway

eofErr fc b = Err (Just fc) (BracketErr (EOFInBracket b))

openCloseMismatchErr :: (FC, BracketType) -> (FC, BracketType) -> Error
openCloseMismatchErr open (fcClose, bClose)
= Err (Just fcClose) (BracketErr (OpenCloseMismatch open bClose))

unexpectedCloseErr :: FC -> BracketType -> Error
unexpectedCloseErr fc b = Err (Just fc) (BracketErr (UnexpectedClose b))

-- Parse between two brackets of the same type
within :: (FC, BracketType) -- The nearest opening bracket to the left of us
acl-cqc marked this conversation as resolved.
Show resolved Hide resolved
-> Bwd BToken -- The tokens that we've passed since that open bracket
-> [Token] -- The tokens to the right of us, unparsed
-> Either Error (FC -- The location of the closing bracket
,Bwd BToken -- The tokens between the open and close
,[Token] -- Tokens after the closing bracket
)
within (openFC, b) _ [] = Left $ eofErr openFC b
within ctx@(_, b) acc (t:ts)
| Just b' <- closer (_tok t) = if b' == b
then pure (fc t, acc, ts)
else Left $ openCloseMismatchErr ctx (fc t, b')
| Just b' <- opener (_tok t) = do
let innerOpenFC = fc t
(innerCloseFC, xs, ts) <- within (innerOpenFC, b') B0 ts
let fc = spanFC innerOpenFC innerCloseFC
within ctx (acc :< Bracketed fc b' (xs <>> [])) ts
| otherwise = within ctx (acc :< FlatTok t) ts

brackets :: [Token] -> Either Error [BToken]
brackets ts = (<>> []) <$> bracketsWorker B0 ts
where
bracketsWorker :: Bwd BToken -> [Token] -> Either Error (Bwd BToken)
bracketsWorker acc [] = pure acc
bracketsWorker acc (t:ts)
| Just b <- opener (_tok t) = do
(closeFC, xs, ts) <- within (fc t, b) B0 ts
let enclosingFC = spanFC (fc t) closeFC
bracketsWorker (acc :< Bracketed enclosingFC b (xs <>> [])) ts
| Just b <- closer (_tok t) = Left $ unexpectedCloseErr (fc t) b
| otherwise = bracketsWorker (acc :< FlatTok t) ts
4 changes: 2 additions & 2 deletions brat/Brat/Lexer/Flat.hs
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ tok = try (char '(' $> LParen)
<|> try (char ')' $> RParen)
<|> try (char '{' $> LBrace)
<|> try (char '}' $> RBrace)
<|> try (char '[' $> LBracket)
<|> try (char ']' $> RBracket)
<|> try (char '[' $> LSquare)
<|> try (char ']' $> RSquare)
<|> try (Underscore <$ string "_")
<|> try (Quoted <$> (char '"' *> printChar `manyTill` char '"'))
<|> try (FloatLit <$> float)
Expand Down
15 changes: 9 additions & 6 deletions brat/Brat/Lexer/Token.hs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
module Brat.Lexer.Token (Tok(..), Token(..), Keyword(..)) where
module Brat.Lexer.Token (Tok(..), Token(..), Keyword(..), tokenLen) where

import Brat.FC

Expand All @@ -21,8 +21,8 @@ data Tok
| RParen
| LBrace
| RBrace
| LBracket
| RBracket
| LSquare
| RSquare
| Semicolon
| Into
| Comma
Expand Down Expand Up @@ -66,8 +66,8 @@ instance Show Tok where
show RParen = ")"
show LBrace = "{"
show RBrace = "}"
show LBracket = "["
show RBracket = "]"
show LSquare = "["
show RSquare = "]"
show Semicolon = ";"
show Into = "|>"
show Comma = ","
Expand Down Expand Up @@ -102,7 +102,8 @@ instance Eq Token where
(Token fc t) == (Token fc' t') = t == t' && fc == fc'

instance Show Token where
show (Token _ t) = show t ++ " "
show (Token _ t) = show t

instance Ord Token where
compare (Token (FC st nd) _) (Token (FC st' nd') _) = if st == st'
then compare nd nd'
Expand All @@ -128,6 +129,8 @@ instance Show Keyword where
tokLen :: Tok -> Int
tokLen = length . show

tokenLen = tokLen . _tok

instance VisualStream [Token] where
tokensLength _ = sum . fmap (\(Token _ t) -> tokLen t)
showTokens _ = concatMap show
Loading
Loading