Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add bracketing to lexer #68

Merged
merged 25 commits into from
Dec 23, 2024
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
dd87240
refactor: Add bracketing; kill withFC
croyzor Jun 27, 2024
1ad6d60
kill withFC
croyzor Oct 1, 2024
a5b4df9
Revert pullAndJuxt changes
croyzor Oct 1, 2024
39d101b
Drive-by: Update kind printing and remove Row kind
croyzor Oct 1, 2024
81b49bc
test: Add extra let binding tests
croyzor Oct 1, 2024
a4c47dc
Update golden tests
croyzor Oct 1, 2024
93e43c6
Fix warnings
croyzor Oct 1, 2024
48612d4
Wee cleanup
croyzor Oct 1, 2024
e4842a6
drive-by: Fix karlheinz list application
croyzor Oct 2, 2024
3c2be6f
fix: Don't allow trailing tokens when parsing between brackets
croyzor Oct 2, 2024
16b1c85
Merge remote-tracking branch 'origin/main' into refactor/parser-wc
croyzor Nov 5, 2024
4e9ac06
Propogate errors from parsers when going under brackets
croyzor Nov 6, 2024
a4997dc
Merge remote-tracking branch 'origin/main' into refactor/parser-wc
croyzor Nov 6, 2024
06f671e
Fix vector FCs
croyzor Nov 6, 2024
063634f
Merge remote-tracking branch 'origin/main' into refactor/parser-wc
croyzor Dec 17, 2024
c7eab1f
apply lints
croyzor Dec 17, 2024
859a144
Review comments
croyzor Dec 19, 2024
937ef01
Merge remote-tracking branch 'origin/main' into refactor/parser-wc
croyzor Dec 19, 2024
4806481
[refactor] Combine `brackets` and `within` (#73)
acl-cqc Dec 23, 2024
48e231b
Merge branch 'main' into refactor/parser-wc
croyzor Dec 23, 2024
7df8f65
Merge branch 'main' into refactor/parser-wc
croyzor Dec 23, 2024
44e1c3e
Merge opener and closer into openClose
croyzor Dec 23, 2024
ebf139f
Remove comments in BToken show instance
croyzor Dec 23, 2024
625451d
Uncurry
croyzor Dec 23, 2024
6cef983
run hlint
croyzor Dec 23, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions brat/Brat/Error.hs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
module Brat.Error (ParseError(..)
,LengthConstraintF(..), LengthConstraint
,BracketErrMsg(..)
,ErrorMsg(..)
,Error(..), showError
,SrcErr(..)
Expand All @@ -9,6 +10,7 @@
) where

import Brat.FC
import Data.Bracket
import Brat.Syntax.Port (PortName)

import Data.List (intercalate)
Expand All @@ -26,6 +28,28 @@

type LengthConstraint = LengthConstraintF Int

data BracketErrMsg
= EOFInBracket BracketType -- FC in enclosing `Err` should point to the open bracket
-- FC here is opening; closing FC in the enclosing `Err`
| OpenCloseMismatch (FC, BracketType) BracketType
| UnexpectedClose BracketType

instance Show BracketErrMsg where
show (EOFInBracket b) = "File ended before this " ++ showOpen b ++ " was closed"
show (OpenCloseMismatch (openFC, bOpen) bClose) = unwords ["This"
,showClose bClose
,"doesn't match the"
,showOpen bOpen
,"at"
,show openFC
]
show (UnexpectedClose b) = unwords ["There is no"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is pretty much an OpenCloseMismatch, without an opening (or where the opening is beginning of file) - consider combining the two, i.e.OpenCloseMismatch (Maybe (FC, BracketType)) BracketType. Or not, the conceptual similarity could just be confusing, up to you...

,showOpen b
,"for this"
,showClose b
,"to close"
]

data ErrorMsg
= TypeErr String
-- Term, Expected type, Actual type
Expand Down Expand Up @@ -83,6 +107,7 @@
-- The argument is the row of unused connectors
| ThunkLeftOvers String
| ThunkLeftUnders String
| BracketErr BracketErrMsg

instance Show ErrorMsg where
show (TypeErr x) = "Type error: " ++ x
Expand Down Expand Up @@ -166,6 +191,7 @@
show UnreachableBranch = "Branch cannot be reached"
show (ThunkLeftOvers overs) = "Expected function to address all inputs, but " ++ overs ++ " wasn't used"
show (ThunkLeftUnders unders) = "Expected function to return additional values of type: " ++ unders
show (BracketErr msg) = show msg


data Error = Err { fc :: Maybe FC
Expand Down Expand Up @@ -213,8 +239,8 @@
ls = lines contents
in case endLineN - startLineN of
0 -> [ls!!startLineN, highlightSection startCol endCol]
n | n > 0 -> let (first:rest) = drop (startLineN - 1) $ take (endLineN + 1) ls

Check warning on line 242 in brat/Brat/Error.hs

View workflow job for this annotation

GitHub Actions / build

Pattern match(es) are non-exhaustive

Check warning on line 242 in brat/Brat/Error.hs

View workflow job for this annotation

GitHub Actions / build

Pattern match(es) are non-exhaustive
(last:rmid) = reverse rest

Check warning on line 243 in brat/Brat/Error.hs

View workflow job for this annotation

GitHub Actions / build

Pattern match(es) are non-exhaustive

Check warning on line 243 in brat/Brat/Error.hs

View workflow job for this annotation

GitHub Actions / build

Pattern match(es) are non-exhaustive
in [first, highlightSection startCol (length first)]
++ (reverse rmid >>= (\l -> [l, highlightSection 0 (length l)]))
++ [last, highlightSection 0 endCol]
Expand Down
6 changes: 6 additions & 0 deletions brat/Brat/FC.hs
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,9 @@ fcOf (WC fc _) = fc
-- TODO: Remove this
dummyFC :: a -> WC a
dummyFC = WC (FC (Pos 0 0) (Pos 0 0))

spanFC :: FC -> FC -> FC
spanFC afc bfc = FC (start afc) (end bfc)

spanFCOf :: WC a -> WC b -> FC
spanFCOf (WC afc _) (WC bfc _) = spanFC afc bfc
100 changes: 100 additions & 0 deletions brat/Brat/Lexer/Bracketed.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
module Brat.Lexer.Bracketed (BToken(..), brackets) where

import Data.Bracket
import Brat.Error (BracketErrMsg(..), Error(Err), ErrorMsg(..))
import Brat.FC
import Brat.Lexer.Token

import Data.List.NonEmpty (NonEmpty(..))
import Data.Bifunctor (first)
import Text.Megaparsec (PosState(..), SourcePos(..), TraversableStream(..), VisualStream(..))
import Text.Megaparsec.Pos (mkPos)

opener :: Tok -> Maybe BracketType
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suggest

enum OpenClose = Opening(BracketType) | Closing(BracketType) -- maybe there's a better name
openClose :: Tok -> Maybe OpenClose

Then you can do foo ... | Just (Opening b) <- openClose(blah) = ... and so on, and you get a much better case openClose ... of

Then zap both opener and closer, or if you really want, you can do

opener t | Just(Opening b) <- openClose t  = Just(b)
opener _ = Nothing

Could also make OpenClose incorporate the Maybe directly inside it, i.e. Open(BracketType) | Close(BracketType) | Neither

opener LParen = Just Paren
opener LSquare = Just Square
opener LBrace = Just Brace
opener _ = Nothing

closer :: Tok -> Maybe BracketType
closer RParen = Just Paren
closer RSquare = Just Square
closer RBrace = Just Brace
closer _ = Nothing

-- Well bracketed tokens
data BToken
= Bracketed FC BracketType [BToken]
| FlatTok Token
deriving (Eq, Ord)

btokLen :: BToken -> Int
btokLen (FlatTok tok) = length (show tok)
btokLen (Bracketed _ _ bs) = sum (btokLen <$> bs) + 2

instance Show BToken where
show (FlatTok t) = show t
show (Bracketed _ b ts) = showOpen b ++ show ts ++ showClose b
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

show ts here will insert commas between the elements of ts (a list), right? Shouldn't we also insert commas after the opening bracket and before the closing bracket? And presumably we do something about the comma token itself...

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or use showTokens ts rather than show ts, and then there'll be no separators added between tokens?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, so I realize you can't showTokens ts here because that needs a Proxy argument but nonetheless, this inserts a comma between the tokens - is that what you want? I tried changing show ts to concatMap show ts which is similar but without the inserted commas and all tests still pass, suggesting we have no coverage of this so we may not even have been aware of the show-inserted commas???


instance VisualStream [BToken] where
showTokens _ = concatMap show
tokensLength _ = sum . fmap btokLen

instance TraversableStream [BToken] where
reachOffsetNoLine i pos = let fileName = sourceName (pstateSourcePos pos)
(Pos line col, rest) = skipChars (i - pstateOffset pos + 1) (pstateInput pos)
in pos
{ pstateInput = rest
, pstateOffset = max (pstateOffset pos) i
, pstateSourcePos = SourcePos fileName (mkPos line) (mkPos col)
}
where
skipChars :: Int -> [BToken] -> (Pos, [BToken])
skipChars 0 inp@(Bracketed fc _ _:_) = (start fc, inp)

Check warning on line 53 in brat/Brat/Lexer/Bracketed.hs

View workflow job for this annotation

GitHub Actions / build

Pattern match(es) are non-exhaustive

Check warning on line 53 in brat/Brat/Lexer/Bracketed.hs

View workflow job for this annotation

GitHub Actions / build

Pattern match(es) are non-exhaustive
skipChars 0 inp@(FlatTok t:_) = (start (fc t), inp)
skipChars i ((Bracketed fc b bts):rest) =
let Pos closeLine closeCol = end fc
closeFC = FC (Pos closeLine (closeCol - 1)) (Pos closeLine closeCol)
in skipChars (i - 1) (bts ++ [FlatTok (Token closeFC (closeTok b))] ++ rest)
skipChars i (FlatTok t:rest)
| i >= tokenLen t = skipChars (i - tokenLen t) rest
| otherwise = (start (fc t), FlatTok t:rest)

closeTok Paren = RParen
closeTok Square = RSquare
closeTok Brace = RBrace

eofErr :: FC -> BracketType -> Error
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I suggested you curry this earlier and now I'm gonna make the reverse suggestion - sorry! But the "unit" of bracket, in openCloseMismatchErr, is the (FC, BracketType) and I think it has to be that way, so we should probably use that here and in unexpectedCloseErr too - this would simplify brackets as you would then be able to let-bind locals of type (FC, BracketType)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think I'm really getting it, but have uncurried anyway

eofErr fc b = Err (Just fc) (BracketErr (EOFInBracket b))

openCloseMismatchErr :: (FC, BracketType) -> (FC, BracketType) -> Error
openCloseMismatchErr open (fcClose, bClose)
= Err (Just fcClose) (BracketErr (OpenCloseMismatch open bClose))

unexpectedCloseErr :: FC -> BracketType -> Error
unexpectedCloseErr fc b = Err (Just fc) (BracketErr (UnexpectedClose b))

brackets :: [Token] -> Either Error [BToken]
brackets ts = helper ts >>= \case
(res, Nothing) -> pure res
(_, Just (b, t:|_)) -> Left $ unexpectedCloseErr (fc t) b
where
-- Given a list of tokens, either
-- (success) return [BToken] consisting of the prefix of the input [Token] in which all opened brackets are closed,
-- and any remaining [Token] beginning with a closer that does not match any opener in the input
-- (either Nothing = no remaining tokens; or tokens with the BracketType that the first token closes)
-- (failure) return an error, if a bracket opened in the input, is either not closed (EOF) or does not match the closer
helper :: [Token] -> Either Error ([BToken], Maybe (BracketType, NonEmpty Token))
helper [] = pure ([], Nothing)
helper (t:ts)
| Just b <- opener (_tok t) = let openFC = fc t in helper ts >>= \case
(_, Nothing) -> Left $ eofErr openFC b
(within, Just (b', r :| rs)) ->
let closeFC = fc r
enclosingFC = spanFC openFC closeFC
in if b == b' then
first (Bracketed enclosingFC b within:) <$> helper rs
else
Left $ openCloseMismatchErr (openFC, b) (closeFC, b')
| Just b <- closer (_tok t) = pure ([], Just (b, t :| ts)) -- return closer for caller
| otherwise = first (FlatTok t:) <$> helper ts
4 changes: 2 additions & 2 deletions brat/Brat/Lexer/Flat.hs
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ tok = try (char '(' $> LParen)
<|> try (char ')' $> RParen)
<|> try (char '{' $> LBrace)
<|> try (char '}' $> RBrace)
<|> try (char '[' $> LBracket)
<|> try (char ']' $> RBracket)
<|> try (char '[' $> LSquare)
<|> try (char ']' $> RSquare)
<|> try (Underscore <$ string "_")
<|> try (Quoted <$> (char '"' *> printChar `manyTill` char '"'))
<|> try (FloatLit <$> float)
Expand Down
15 changes: 9 additions & 6 deletions brat/Brat/Lexer/Token.hs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
module Brat.Lexer.Token (Tok(..), Token(..), Keyword(..)) where
module Brat.Lexer.Token (Tok(..), Token(..), Keyword(..), tokenLen) where

import Brat.FC

Expand All @@ -21,8 +21,8 @@ data Tok
| RParen
| LBrace
| RBrace
| LBracket
| RBracket
| LSquare
| RSquare
| Semicolon
| Into
| Comma
Expand Down Expand Up @@ -66,8 +66,8 @@ instance Show Tok where
show RParen = ")"
show LBrace = "{"
show RBrace = "}"
show LBracket = "["
show RBracket = "]"
show LSquare = "["
show RSquare = "]"
show Semicolon = ";"
show Into = "|>"
show Comma = ","
Expand Down Expand Up @@ -102,7 +102,8 @@ instance Eq Token where
(Token fc t) == (Token fc' t') = t == t' && fc == fc'

instance Show Token where
show (Token _ t) = show t ++ " "
show (Token _ t) = show t

instance Ord Token where
compare (Token (FC st nd) _) (Token (FC st' nd') _) = if st == st'
then compare nd nd'
Expand All @@ -128,6 +129,8 @@ instance Show Keyword where
tokLen :: Tok -> Int
tokLen = length . show

tokenLen = tokLen . _tok

instance VisualStream [Token] where
tokensLength _ = sum . fmap (\(Token _ t) -> tokLen t)
showTokens _ = concatMap show
Loading
Loading