From 61dffefe0c2cd20d5b1a48cdbb0c7448dd88b1c1 Mon Sep 17 00:00:00 2001 From: mikeb26 <83850730+mikeb26@users.noreply.github.com> Date: Sat, 23 Nov 2024 19:58:44 -0500 Subject: [PATCH] Fix PGN parsing of games w/ nested variations (#125) * Fix PGN parsing of games w/ nested variations Existing moveListWithComments() contains a parsing bug when the pgn in question contains a game with nested variations. In this case the moveListTokenRe regex doesn't work as expected and results in attempts to parse an illegal game. To demonstrate the problem this commit adds a TestScannerWithNested() test case along with a 0013.pgn fixture which is an export of a lichess study. This commit also fixes the problem by adding a stripVariations() preprocess step in moveListWithComments(). I have a subsequent commit coming which adds a Scanner option to include variations in the game list when processing a pgn. * simplified tests --------- Co-authored-by: Logan Spears --- fixtures/pgns/0013.pgn | 39 +++++++++++++++++++++++++++++ pgn.go | 56 +++++++++++++++++++++++++++++++++++++++--- pgn_test.go | 14 ++++++++--- 3 files changed, 103 insertions(+), 6 deletions(-) create mode 100644 fixtures/pgns/0013.pgn diff --git a/fixtures/pgns/0013.pgn b/fixtures/pgns/0013.pgn new file mode 100644 index 0000000..8397e87 --- /dev/null +++ b/fixtures/pgns/0013.pgn @@ -0,0 +1,39 @@ +[Event "Test Nested Variations for https://github.com/notnil/chess: Smith-Morra Accepted; Paulsen Formation"] +[Site "https://lichess.org/study/lOO87pCX/BZELlaSA"] +[Result "*"] +[UTCDate "2023.05.26"] +[UTCTime "16:09:59"] +[Variant "Standard"] +[ECO "B21"] +[Opening "Sicilian Defense: Smith-Morra Gambit Accepted, Paulsen Formation"] +[Annotator "https://lichess.org/@/bughouse26"] + +1. e4 c5 2. d4 cxd4 3. c3 dxc3 4. Nxc3 Nc6 5. Nf3 e6 6. Bc4 a6 7. O-O b5 { move comment with with braces ? } 8. Bb3 * + + +[Event "Test Nested Variations for https://github.com/notnil/chess: Smith-Morra Declined; Alapin Formation"] +[Site "https://lichess.org/study/lOO87pCX/fC2Vw8Fz"] +[Result "*"] +[UTCDate "2023.05.26"] +[UTCTime "16:13:20"] +[Variant "Standard"] +[ECO "B22"] +[Opening "Sicilian Defense: Alapin Variation, Smith-Morra Declined"] +[Annotator "https://lichess.org/@/bughouse26"] + +1. e4 c5 2. d4 cxd4 3. c3 Nf6 4. e5 Nd5 5. Nf3 Nc6 6. cxd4 d6 { chapter 2 main line move comment with parens ( test ) } 7. Bc4 Nb6 (7... dxe5 { chapter 2 variation move comment with unmatched paren ( } 8. dxe5) 8. Bb5 * + + +[Event "Test Nested Variations for https://github.com/notnil/chess: Smith-Morra Declined; Push Variation"] +[Site "https://lichess.org/study/lOO87pCX/XaAI645Y"] +[Result "*"] +[UTCDate "2023.05.26"] +[UTCTime "16:16:43"] +[Variant "Standard"] +[ECO "B21"] +[Opening "Sicilian Defense: Smith-Morra Gambit Declined, Push Variation"] +[Annotator "https://lichess.org/@/bughouse26"] + +1. e4 c5 2. d4 cxd4 3. c3 d3 4. Bxd3 Nc6 { chapter 3 main line move comment } 5. Nf3 g6 6. O-O (6. c4 Bg7 { chapter 3 variation 1 comment } 7. O-O) 6... Bg7 7. Qe2 d6 (7... Nf6 8. Rd1 { chapter 3 variation 2 comment } 8... O-O (8... d6 9. e5 dxe5 { chapter 3 nested variation comment } 10. Bxg6 Qc7 { chapter 3 double nested variation comment with parens and moves ( 1. e4 e5 2. Nf3 ) } (10... Qxd1+ 11. Qxd1) 11. Bc2) 9. e5) 8. Rd1 Qc7 9. Na3 a6 10. Bf4 Nf6 11. e5 dxe5 (11... Nh5 12. exd6) (11... Nd5 12. exd6) * + + diff --git a/pgn.go b/pgn.go index e22c689..c14b9b7 100644 --- a/pgn.go +++ b/pgn.go @@ -152,7 +152,11 @@ func (a multiDecoder) Decode(pos *Position, s string) (*Move, error) { func decodePGN(pgn string) (*Game, error) { tagPairs := getTagPairs(pgn) - moveComments, outcome := moveListWithComments(pgn) + moveComments, outcome, err := moveListWithComments(pgn) + if err != nil { + return nil, err + } + gameFuncs := []func(*Game){} for _, tp := range tagPairs { if strings.ToLower(tp.Key) == "fen" { @@ -179,6 +183,7 @@ func decodePGN(pgn string) (*Game, error) { g.comments = append(g.comments, move.Comments) } g.outcome = outcome + return g, nil } @@ -233,10 +238,15 @@ type moveWithComment struct { var moveListTokenRe = regexp.MustCompile(`(?:\d+\.)|(O-O(?:-O)?|\w*[abcdefgh][12345678]\w*(?:=[QRBN])?(?:\+|#)?)|(?:\{([^}]*)\})|(?:\([^)]*\))|(\*|0-1|1-0|1\/2-1\/2)`) -func moveListWithComments(pgn string) ([]moveWithComment, Outcome) { +func moveListWithComments(pgn string) ([]moveWithComment, Outcome, error) { pgn = stripTagPairs(pgn) var outcome Outcome moves := []moveWithComment{} + // moveListTokenRe doesn't work w/ nested variations + pgn, err := stripVariations(pgn) + if err != nil { + return moves, outcome, err + } for _, match := range moveListTokenRe.FindAllStringSubmatch(pgn, -1) { move, commentText, outcomeText := match[1], match[2], match[3] @@ -257,7 +267,7 @@ func moveListWithComments(pgn string) ([]moveWithComment, Outcome) { moves = append(moves, moveWithComment{MoveStr: move}) } } - return moves, outcome + return moves, outcome, nil } func stripTagPairs(pgn string) string { @@ -271,3 +281,43 @@ func stripTagPairs(pgn string) string { } return strings.Join(cp, "\n") } + +func stripVariations(pgn string) (string, error) { + var ret strings.Builder + + variationDepth := 0 + inCommentSection := false + + for _, c := range pgn { + if c == '{' { + if inCommentSection { + return "", fmt.Errorf("chess: pgn decode mismatched { in variation: %v", pgn) + } + inCommentSection = true + } else if c == '}' { + if !inCommentSection { + return "", fmt.Errorf("chess: pgn decode mismatched } in variation: %v", pgn) + } + inCommentSection = false + } + if !inCommentSection && c == '(' { + variationDepth++ + continue + } + if !inCommentSection && c == ')' { + if variationDepth <= 0 { + return "", fmt.Errorf("chess: pgn decode mismatched parenthesis in variation: %v", pgn) + } + variationDepth-- + continue + } + if variationDepth == 0 { + _, err := ret.WriteRune(c) + if err != nil { + return "", err + } + } + } + + return ret.String(), nil +} diff --git a/pgn_test.go b/pgn_test.go index beec82d..eeab62d 100644 --- a/pgn_test.go +++ b/pgn_test.go @@ -136,7 +136,12 @@ func TestWriteComments(t *testing.T) { } func TestScanner(t *testing.T) { - for _, fname := range []string{"fixtures/pgns/0006.pgn", "fixtures/pgns/0007.pgn"} { + m := map[string]int{ + "fixtures/pgns/0006.pgn": 5, + "fixtures/pgns/0007.pgn": 5, + "fixtures/pgns/0013.pgn": 3, + } + for fname, count := range m { f, err := os.Open(fname) if err != nil { panic(err) @@ -146,10 +151,13 @@ func TestScanner(t *testing.T) { games := []*Game{} for scanner.Scan() { game := scanner.Next() + if len(game.Moves()) == 0 { + continue + } games = append(games, game) } - if len(games) != 5 { - t.Fatalf(fname+" expected 5 games but got %d", len(games)) + if len(games) != count { + t.Fatalf(fname+" expected %d games but got %d", count, len(games)) } } }