From 4d00f33408fac151f343a385dd53f8446263551a Mon Sep 17 00:00:00 2001 From: Pieter Robberechts Date: Tue, 30 Dec 2025 10:10:21 +0100 Subject: [PATCH 1/3] fix(statsbomb): freeze frame transformation Fixes two bugs in the coordinate transformation of freeze frames: - due to an indentation error, freeze frame coordinates were only transformed if an orientation change was needed - since the ball coordinates in the freeze frame were taken from the already transformed event, the ball coordinates were transformed twice Fixes #464 --- .../domain/services/transformers/dataset.py | 6 +- .../event/statsbomb/deserializer.py | 61 ++++++++++++------- .../serializers/event/statsbomb/helpers.py | 25 ++++++-- kloppy/tests/test_statsbomb.py | 14 +++-- 4 files changed, 70 insertions(+), 36 deletions(-) diff --git a/kloppy/domain/services/transformers/dataset.py b/kloppy/domain/services/transformers/dataset.py index 5de757e54..5e8eda82a 100644 --- a/kloppy/domain/services/transformers/dataset.py +++ b/kloppy/domain/services/transformers/dataset.py @@ -115,8 +115,6 @@ def change_point_dimensions( point_base = self._from_pitch_dimensions.to_metric_base( point, pitch_length=base_pitch_length, pitch_width=base_pitch_width ) - print(point_base) - print(self._to_pitch_dimensions.from_metric_base) point_to = self._to_pitch_dimensions.from_metric_base( point=point_base, pitch_length=base_pitch_length, @@ -329,8 +327,8 @@ def transform_event(self, event: Event) -> Event: ): event = self.__flip_event(event) - if event.freeze_frame: - event.freeze_frame = self.transform_frame(event.freeze_frame) + if event.freeze_frame: + event.freeze_frame = self.transform_frame(event.freeze_frame) return event diff --git a/kloppy/infra/serializers/event/statsbomb/deserializer.py b/kloppy/infra/serializers/event/statsbomb/deserializer.py index 076e24b97..e8794a36e 100644 --- a/kloppy/infra/serializers/event/statsbomb/deserializer.py +++ b/kloppy/infra/serializers/event/statsbomb/deserializer.py @@ -12,6 +12,7 @@ Orientation, Period, Player, + PositionType, Provider, Team, ) @@ -77,6 +78,29 @@ def deserialize( ) for event in new_events: if self.should_include_event(event): + if "freeze_frame" in event.raw_event.get("shot", {}): + event.freeze_frame = parse_freeze_frame( + freeze_frame=event.raw_event["shot"][ + "freeze_frame" + ], + home_team=teams[0], + away_team=teams[1], + event=event, + fidelity_version=data_version.shot_fidelity_version, + ) + if ( + not event.freeze_frame + and event.event_id in three_sixty_data + ): + freeze_frame = three_sixty_data[event.event_id] + event.freeze_frame = parse_freeze_frame( + freeze_frame=freeze_frame["freeze_frame"], + home_team=teams[0], + away_team=teams[1], + event=event, + fidelity_version=data_version.xy_fidelity_version, + visible_area=freeze_frame["visible_area"], + ) # Transform event to the coordinate system event = self.transformer.transform_event(event) events.append(event) @@ -102,29 +126,24 @@ def deserialize( **additional_metadata, ) dataset = EventDataset(metadata=metadata, records=events) + # We can now update GK identities in the freeze frames + # because we know the positions of the GKs at the event times for event in dataset: - if "freeze_frame" in event.raw_event.get("shot", {}): - event.freeze_frame = self.transformer.transform_frame( - parse_freeze_frame( - freeze_frame=event.raw_event["shot"]["freeze_frame"], - home_team=teams[0], - away_team=teams[1], - event=event, - fidelity_version=data_version.shot_fidelity_version, - ) - ) - if not event.freeze_frame and event.event_id in three_sixty_data: - freeze_frame = three_sixty_data[event.event_id] - event.freeze_frame = self.transformer.transform_frame( - parse_freeze_frame( - freeze_frame=freeze_frame["freeze_frame"], - home_team=teams[0], - away_team=teams[1], - event=event, - fidelity_version=data_version.xy_fidelity_version, - visible_area=freeze_frame["visible_area"], + if not event.freeze_frame: + continue + + new_players_data = {} + for player, data in event.freeze_frame.players_data.items(): + if player.attributes.get("goalkeeper", False): + actual_gk = player.team.get_player_by_position( + position=PositionType.Goalkeeper, + time=event.time, ) - ) + new_players_data[actual_gk] = data + else: + new_players_data[player] = data + + event.freeze_frame.players_data = new_players_data return dataset def load_data(self, inputs: StatsBombInputs): diff --git a/kloppy/infra/serializers/event/statsbomb/helpers.py b/kloppy/infra/serializers/event/statsbomb/helpers.py index 757e33c7d..b682790c4 100644 --- a/kloppy/infra/serializers/event/statsbomb/helpers.py +++ b/kloppy/infra/serializers/event/statsbomb/helpers.py @@ -10,7 +10,6 @@ PlayerData, Point, Point3D, - PositionType, Team, ) from kloppy.domain.services.frame_factory import create_frame @@ -107,18 +106,34 @@ def parse_freeze_frame( def get_player_from_freeze_frame(player_data, team, i): if "player" in player_data: - return team.get_player_by_id(player_data["player"]["id"]) - elif player_data.get("actor"): + home_player = home_team.get_player_by_id( + player_data["player"]["id"] + ) + if home_player: + return home_player + away_player = away_team.get_player_by_id( + player_data["player"]["id"] + ) + if away_player: + return away_player + + if player_data.get("actor"): return event.player elif player_data.get("keeper"): - return team.get_player_by_position( - position=PositionType.Goalkeeper, time=event.time + # We can later identify the goalkeeper by their position + # if we know the formation, but for now we just flag them + return Player( + player_id=f"T{team.team_id}-E{event.event_id}-{i}", + team=team, + jersey_no=None, + attributes={"goalkeeper": True}, ) else: return Player( player_id=f"T{team.team_id}-E{event.event_id}-{i}", team=team, jersey_no=None, + attributes={"goalkeeper": False}, ) for i, freeze_frame_player in enumerate(freeze_frame): diff --git a/kloppy/tests/test_statsbomb.py b/kloppy/tests/test_statsbomb.py index e7e6e8646..5364227d1 100644 --- a/kloppy/tests/test_statsbomb.py +++ b/kloppy/tests/test_statsbomb.py @@ -610,13 +610,20 @@ def test_correct_normalized_deserialization(self): pass_event = dataset.get_event_by_id( "8022c113-e349-4b0b-b4a7-a3bb662535f8" ) + assert ( + pass_event.coordinates.x + == pass_event.freeze_frame.ball_coordinates.x + ) + assert ( + pass_event.coordinates.y + == pass_event.freeze_frame.ball_coordinates.y + ) coordinates_per_team = defaultdict(list) for ( player, coordinates, ) in pass_event.freeze_frame.players_coordinates.items(): coordinates_per_team[player.team.name].append(coordinates) - print(coordinates_per_team) assert coordinates_per_team == { "Belgium": [ Point(x=0.30230680550305883, y=0.5224074534269804), @@ -1233,11 +1240,6 @@ def test_player_position(self, base_dir): event_data=base_dir / "files/statsbomb_event.json", ) - for item in dataset.aggregate("minutes_played", include_position=True): - print( - f"{item.player} {item.player.player_id}- {item.start_time} - {item.end_time} - {item.duration} - {item.position}" - ) - home_team, away_team = dataset.metadata.teams period1, period2 = dataset.metadata.periods From 65477cccb7e168f3161d61c2f198199e0ccd199b Mon Sep 17 00:00:00 2001 From: Pieter Robberechts Date: Wed, 7 Jan 2026 17:12:58 +0100 Subject: [PATCH 2/3] fix visualization test --- kloppy/tests/test_statsbomb.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kloppy/tests/test_statsbomb.py b/kloppy/tests/test_statsbomb.py index 5364227d1..ecc0ef544 100644 --- a/kloppy/tests/test_statsbomb.py +++ b/kloppy/tests/test_statsbomb.py @@ -51,7 +51,6 @@ from kloppy.infra.serializers.event.statsbomb.helpers import parse_str_ts import kloppy.infra.serializers.event.statsbomb.specification as SB -ENABLE_PLOTTING = True API_URL = "https://raw.githubusercontent.com/statsbomb/open-data/master/data/" @@ -359,7 +358,10 @@ def test_freeze_frame_shot(self, dataset: EventDataset, base_dir: Path): def get_color(player): if player.team == shot_event.player.team: return "#b94b75" - elif player.starting_position.position_id == "1": + elif ( + player.starting_position.position_group + == PositionType.Goalkeeper + ): return "#c15ca5" else: return "#7f63b8" From fdccf107b3318a936cc489d7bd6e715f4ba43ce6 Mon Sep 17 00:00:00 2001 From: Pieter Robberechts Date: Wed, 7 Jan 2026 17:21:37 +0100 Subject: [PATCH 3/3] tests: make test_with_visualization reusable --- kloppy/tests/conftest.py | 9 +++++++++ kloppy/tests/test_statsbomb.py | 22 ++++++++-------------- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/kloppy/tests/conftest.py b/kloppy/tests/conftest.py index a0b6dffec..2a199ea33 100644 --- a/kloppy/tests/conftest.py +++ b/kloppy/tests/conftest.py @@ -1,5 +1,6 @@ """Module to store common fixtures.""" +import os from pathlib import Path import pytest @@ -8,3 +9,11 @@ @pytest.fixture(scope="session") def base_dir() -> Path: return Path(__file__).parent + + +@pytest.fixture(scope="session") +def with_visualization(base_dir): + enable_viz = os.environ.get("KLOPPY_TESTWITHVIZ") == "1" + if enable_viz: + (base_dir / "outputs").mkdir(exist_ok=True) + return enable_viz diff --git a/kloppy/tests/test_statsbomb.py b/kloppy/tests/test_statsbomb.py index ecc0ef544..574efad00 100644 --- a/kloppy/tests/test_statsbomb.py +++ b/kloppy/tests/test_statsbomb.py @@ -1,6 +1,5 @@ from collections import defaultdict from datetime import datetime, timedelta, timezone -import os from pathlib import Path from typing import cast @@ -54,15 +53,6 @@ API_URL = "https://raw.githubusercontent.com/statsbomb/open-data/master/data/" -def test_with_visualization(): - if ( - "KLOPPY_TESTWITHVIZ" in os.environ - and os.environ["KLOPPY_TESTWITHVIZ"] == "1" - ): - return True - return False - - @pytest.fixture(scope="module") def dataset() -> EventDataset: """Load StatsBomb data for Belgium - Portugal at Euro 2020""" @@ -317,7 +307,9 @@ def test_synthetic_out_events(self, dataset: EventDataset): assert ball_out_events[0].ball_state == BallState.DEAD - def test_freeze_frame_shot(self, dataset: EventDataset, base_dir: Path): + def test_freeze_frame_shot( + self, dataset: EventDataset, base_dir: Path, with_visualization: bool + ): """Test if shot freeze-frame is properly parsed and attached to shot events""" shot_event = dataset.get_event_by_id( "a5c60797-631e-418a-9f24-1e9779cb2b42" @@ -343,7 +335,7 @@ def test_freeze_frame_shot(self, dataset: EventDataset, base_dir: Path): 91.45, 28.15 ) - if test_with_visualization(): + if with_visualization: import matplotlib.pyplot as plt from mplsoccer import VerticalPitch @@ -428,7 +420,9 @@ def get_color(player): base_dir / "outputs" / "test_statsbomb_freeze_frame_shot.png" ) - def test_freeze_frame_360(self, dataset: EventDataset, base_dir: Path): + def test_freeze_frame_360( + self, dataset: EventDataset, base_dir: Path, with_visualization: bool + ): """Test if 360 freeze-frame is properly parsed and attached to shot events""" pass_event = dataset.get_event_by_id( "8022c113-e349-4b0b-b4a7-a3bb662535f8" @@ -504,7 +498,7 @@ def test_freeze_frame_360(self, dataset: EventDataset, base_dir: Path): abs=1e-2, ) - if test_with_visualization(): + if with_visualization: import matplotlib.pyplot as plt from mplsoccer import Pitch