diff --git a/pgn.go b/pgn.go index 416eef3..35af3c9 100644 --- a/pgn.go +++ b/pgn.go @@ -13,16 +13,28 @@ import ( // from concatenated PGN files. It is designed to // replace GamesFromPGN in order to handle very large // PGN database files such as https://database.lichess.org/. +type ScannerOpts struct { + ExpandVariations bool +} type Scanner struct { scanr *bufio.Scanner - game *Game + games []*Game err error + opts ScannerOpts } -// NewScanner returns a new scanner. +// NewScanner returns a new scanner with default options func NewScanner(r io.Reader) *Scanner { + defaultOpts := ScannerOpts{ExpandVariations: false} + + return NewScannerWithOptions(r, defaultOpts) +} + +// NewScanner returns a new scanner with explicit options +func NewScannerWithOptions(r io.Reader, o ScannerOpts) *Scanner { scanr := bufio.NewScanner(r) - return &Scanner{scanr: scanr} + g := make([]*Game, 0) + return &Scanner{scanr: scanr, opts: o, games: g} } type scanState int @@ -43,15 +55,18 @@ func (s *Scanner) Scan() bool { return false } s.err = nil + if len(s.games) > 0 { + return true + } var sb strings.Builder state := notInPGN - setGame := func() bool { - game, err := decodePGN(sb.String()) + setGames := func() bool { + games, err := decodePGNs(sb.String(), s.opts.ExpandVariations) if err != nil { s.err = err return false } - s.game = game + s.games = games return true } for { @@ -62,7 +77,7 @@ func (s *Scanner) Scan() bool { if s.err == nil { s.err = io.EOF } - return setGame() + return setGames() } line := strings.TrimSpace(s.scanr.Text()) isTagPair := strings.HasPrefix(line, "[") @@ -81,7 +96,7 @@ func (s *Scanner) Scan() bool { sb.WriteString(line + "\n") case inMoves: if line == "" { - return setGame() + return setGames() } sb.WriteString(line + "\n") } @@ -90,7 +105,14 @@ func (s *Scanner) Scan() bool { // Next returns the game from the most recent Scan. func (s *Scanner) Next() *Game { - return s.game + if len(s.games) == 0 { + return nil + } + + g := s.games[0] + s.games = s.games[1:] + + return g } // Err returns an error encountered during scanning. @@ -151,8 +173,21 @@ func (a multiDecoder) Decode(pos *Position, s string) (*Move, error) { } func decodePGN(pgn string) (*Game, error) { + gameList, err := decodePGNs(pgn, false) + if err != nil { + return nil, err + } + if len(gameList) != 1 { + return nil, fmt.Errorf("chess: pgn decode error unexpected game count %v", len(gameList)) + } + + return gameList[0], nil +} + +func decodePGNs(pgn string, expandVariations bool) ([]*Game, error) { + ret := []*Game{} tagPairs := getTagPairs(pgn) - moveComments, outcome, err := moveListWithComments(pgn) + moveListSet, err := moveListSetWithComments(pgn, expandVariations) if err != nil { return nil, err } @@ -169,23 +204,28 @@ func decodePGN(pgn string) (*Game, error) { } } gameFuncs = append(gameFuncs, TagPairs(tagPairs)) - g := NewGame(gameFuncs...) - g.ignoreAutomaticDraws = true - decoder := multiDecoder([]Decoder{AlgebraicNotation{}, LongAlgebraicNotation{}, UCINotation{}}) - for _, move := range moveComments { - m, err := decoder.Decode(g.Position(), move.MoveStr) - if err != nil { - return nil, fmt.Errorf("chess: pgn decode error %s on move %d", err.Error(), g.Position().moveCount) - } - if err := g.Move(m); err != nil { - return nil, fmt.Errorf("chess: pgn invalid move error %s on move %d", err.Error(), g.Position().moveCount) + + for idx, ml := range moveListSet.moveLists { + g := NewGame(gameFuncs...) + g.ignoreAutomaticDraws = true + decoder := multiDecoder([]Decoder{AlgebraicNotation{}, LongAlgebraicNotation{}, UCINotation{}}) + for _, move := range ml.moves { + m, err := decoder.Decode(g.Position(), move.MoveStr) + if err != nil { + return nil, fmt.Errorf("chess: pgn decode error %s on variation %d move %d", err.Error(), idx, g.Position().moveCount) + } + if err := g.Move(m); err != nil { + return nil, fmt.Errorf("chess: pgn invalid move error %s on variation %d move %d", err.Error(), idx, g.Position().moveCount) + } + g.comments = g.comments[:len(g.comments)-1] + g.comments = append(g.comments, move.Comments) } - g.comments = g.comments[:len(g.comments)-1] - g.comments = append(g.comments, move.Comments) + g.outcome = ml.outcome + + ret = append(ret, g) } - g.outcome = outcome - return g, nil + return ret, nil } func encodePGN(g *Game) string { @@ -237,16 +277,43 @@ type moveWithComment struct { Comments []string } +type moveListAndOutcome struct { + moves []moveWithComment + outcome Outcome +} + +type moveListSet struct { + moveLists []moveListAndOutcome +} + var moveListTokenRe = regexp.MustCompile(`(?:\d+\.)|(O-O(?:-O)?|\w*[abcdefgh][12345678]\w*(?:=[QRBN])?(?:\+|#)?)|(?:\{([^}]*)\})|(?:\([^)]*\))|(\*|0-1|1-0|1\/2-1\/2)`) -func moveListWithComments(pgn string) ([]moveWithComment, Outcome, error) { +func moveListSetWithComments(pgn string, expandVariations bool) (moveListSet, error) { + ret := moveListSet{ + moveLists: []moveListAndOutcome{}, + } + + if !expandVariations { + ml, err := moveListWithCommentsNoExpand(pgn) + if err != nil { + return ret, err + } + ret.moveLists = append(ret.moveLists, ml) + return ret, nil + } + + return moveListSetExpanded(pgn) +} + +func moveListWithCommentsNoExpand(pgn string) (moveListAndOutcome, error) { pgn = stripTagPairs(pgn) - var outcome Outcome - moves := []moveWithComment{} + ret := moveListAndOutcome{ + moves: []moveWithComment{}, + } // moveListTokenRe doesn't work w/ nested variations pgn, err := stripVariations(pgn) if err != nil { - return moves, outcome, err + return ret, err } for _, match := range moveListTokenRe.FindAllStringSubmatch(pgn, -1) { @@ -256,19 +323,79 @@ func moveListWithComments(pgn string) ([]moveWithComment, Outcome, error) { } if outcomeText != "" { - outcome = Outcome(outcomeText) + ret.outcome = Outcome(outcomeText) break } if commentText != "" { - moves[len(moves)-1].Comments = append(moves[len(moves)-1].Comments, strings.TrimSpace(commentText)) + ret.moves[len(ret.moves)-1].Comments = append(ret.moves[len(ret.moves)-1].Comments, strings.TrimSpace(commentText)) } if move != "" { - moves = append(moves, moveWithComment{MoveStr: move}) + ret.moves = append(ret.moves, moveWithComment{MoveStr: move}) + } + } + return ret, nil +} + +var moveNumRe = regexp.MustCompile(`(?:\d+\.+)?(.*)`) + +func moveListSetExpanded(pgn string) (moveListSet, error) { + firstGame := moveListAndOutcome{ + moves: []moveWithComment{}, + } + ret := moveListSet{ + moveLists: []moveListAndOutcome{firstGame}, + } + + pgn = stripTagPairs(pgn) + // remove comments @todo need to add comments back in + pgn = removeSection("{", "}", pgn) + // remove line breaks + pgn = strings.Replace(pgn, "\n", " ", -1) + pgn = strings.ReplaceAll(pgn, "(", "( ") + pgn = strings.ReplaceAll(pgn, ")", " )") + + moveListIdx := 0 + moveListIdxStack := make([]int, 0) + list := strings.Split(pgn, " ") + + for _, move := range list { + move = strings.TrimSpace(move) + switch move { + case string(NoOutcome), string(WhiteWon), string(BlackWon), string(Draw): + ret.moveLists[moveListIdx].outcome = Outcome(move) + case "": + case "(": + // begin new variation + moveListIdxStack = append(moveListIdxStack, moveListIdx) + newIdx := len(ret.moveLists) + numMoves := len(ret.moveLists[moveListIdx].moves) - 1 + newGame := moveListAndOutcome{} + newGame.moves = make([]moveWithComment, numMoves) + copy(newGame.moves, ret.moveLists[moveListIdx].moves) + ret.moveLists = append(ret.moveLists, newGame) + moveListIdx = newIdx + + case ")": + // end current variation + stackSize := len(moveListIdxStack) + if stackSize == 0 { + return ret, fmt.Errorf("Failed to parse variation") + } + moveListIdx = moveListIdxStack[stackSize-1] + moveListIdxStack = moveListIdxStack[:stackSize-1] + default: + results := moveNumRe.FindStringSubmatch(move) + tmp := moveWithComment{} + if len(results) == 2 && results[1] != "" { + tmp.MoveStr = results[1] + ret.moveLists[moveListIdx].moves = append(ret.moveLists[moveListIdx].moves, tmp) + } } } - return moves, outcome, nil + + return ret, nil } func stripTagPairs(pgn string) string { @@ -322,3 +449,14 @@ func stripVariations(pgn string) (string, error) { return ret.String(), nil } + +func removeSection(leftChar, rightChar, s string) string { + r := regexp.MustCompile(leftChar + ".*?" + rightChar) + for { + i := r.FindStringIndex(s) + if i == nil { + return s + } + s = s[0:i[0]] + s[i[1]:] + } +} diff --git a/pgn_test.go b/pgn_test.go index 36d3232..d4361ee 100644 --- a/pgn_test.go +++ b/pgn_test.go @@ -1,6 +1,8 @@ package chess import ( + "fmt" + "io" "io/ioutil" "os" "strings" @@ -201,6 +203,80 @@ func TestScannerWithFromPosFENs(t *testing.T) { } } +func TestScannerWithFromPosFENsExpanded(t *testing.T) { + finalPositions := []string{ + "rnbqkbnr/pp2pppp/2p5/3p4/3PP3/5P2/PPP3PP/RNBQKBNR b KQkq - 0 3", + "r2qkb1r/pp1n1ppp/2p2n2/4p3/2BPP1b1/2P2N2/PP4PP/RNBQ1RK1 b kq - 0 8", + "rnbqkbnr/pp3ppp/2p5/8/2BpP3/5N2/PPP3PP/RNBQK2R b KQkq - 1 6", + "rnbqk2r/pp2nppp/2p1p3/3p4/1b1PP3/2NB1P2/PPPB2PP/R2QK1NR b KQkq - 5 6", + "rnbqkb1r/pp3ppp/2p1pn2/3pP3/3P4/2N2P2/PPP3PP/R1BQKBNR b KQkq - 0 5", + "rnbqk1nr/pp2ppbp/2p3p1/3p4/3PP3/2N1BP2/PPP3PP/R2QKBNR b KQkq - 3 5", + "rnb1kbnr/pp3ppp/1qp5/8/3NP3/2N5/PPP3PP/R1BQKB1R b KQkq - 0 7", + } + fname := "fixtures/pgns/0014.pgn" + f, err := os.Open(fname) + if err != nil { + panic(err) + } + defer f.Close() + + scannerOpts := ScannerOpts{ExpandVariations: true} + scanner := NewScannerWithOptions(f, scannerOpts) + games := []*Game{} + for idx := 0; scanner.Scan(); { + game := scanner.Next() + if len(game.moves) == 0 { + continue + } + finalPos := game.Position().String() + if finalPos != finalPositions[idx] { + t.Fatalf(fname+" game %v expected final pos %v but got %v", idx, + finalPositions[idx], finalPos) + } + games = append(games, game) + idx++ + } + if len(games) != len(finalPositions) { + t.Fatalf(fname+" expected %v games but got %v", len(finalPositions), + len(games)) + } +} + +func TestScannerWithNestedAndExpand(t *testing.T) { + fname := "fixtures/pgns/0013.pgn" + f, err := os.Open(fname) + if err != nil { + panic(err) + } + defer f.Close() + + scannerOpts := ScannerOpts{ExpandVariations: true} + scanner := NewScannerWithOptions(f, scannerOpts) + games := []*Game{} + for scanner.Scan() { + err = scanner.Err() + if err != nil && err != io.EOF { + t.Fatalf(fname+" Unexpected non-nil/non-EOF err %v", err) + } + game := scanner.Next() + moveList := game.Moves() + if len(moveList) == 0 { + continue + } + games = append(games, game) + } + err = scanner.Err() + if err != io.EOF { + t.Fatalf(fname+" Unexpected non-EOF err %v", err) + } + if len(games) != 10 { + for idx, g := range games { + fmt.Printf("Parsed game %v: %v\n\n", idx, g) + } + t.Fatalf(fname+" expected 10 games but got %d", len(games)) + } +} + func BenchmarkPGN(b *testing.B) { pgn := mustParsePGN("fixtures/pgns/0001.pgn") b.ResetTimer()