Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
a4a46ff
docs(Events): Add entry for ShotResult.OWN_GOAL
tcp Mar 2, 2025
1688758
Merge branch 'PySport:master' into master
tcp May 14, 2025
a3a1cc0
Merge branch 'PySport:master' into master
tcp May 30, 2025
1e3497c
feat(pff): add load_event to PFF provider api
tcp May 30, 2025
12fa547
feat(pff): add deserializer
tcp May 15, 2025
132142d
feat(pff): remove dead imports, add types in deserializer
tcp May 19, 2025
c09b61b
wip: feat(pff): PFF specs
tcp May 19, 2025
ce9b45a
fix(pff): update type hints, better arg names
tcp Jun 3, 2025
13af611
wip: fix(pff): generate dataset
tcp Jun 3, 2025
9dd7020
chore: remove print
tcp Jun 3, 2025
15d93ed
wip: feat(pff): build a generic dataset
tcp Jun 3, 2025
acb5344
wip: feat(pff): add SUBSTITUTION event
tcp Jun 3, 2025
1c89b01
wip: feat(pff): add events
tcp Jun 3, 2025
b013f67
wip: feat(pff): add event classes
tcp Jun 3, 2025
83a6185
wip: tbc
tcp Jun 3, 2025
939812f
lint: black
tcp Jun 6, 2025
30c2449
feat(pff): add starting coordinates
tcp Jun 6, 2025
a4f125f
feat(pff): add shot outcome
tcp Jun 6, 2025
3579b01
feat(pff): pff to kloppy body part mapping
tcp Jun 6, 2025
be3519d
feat(pff): add function to collect qualifiers
tcp Jun 6, 2025
3016389
feat(pff): pff to kloppy set pieces mapping
tcp Jun 6, 2025
e9807bd
feat(pff): add body part and set piece qualifiers to shots
tcp Jun 6, 2025
1298894
fix: related events
tcp Jun 6, 2025
7c4a036
moving general qualis to evt/possevt classes
tcp Jun 6, 2025
84ce849
shot outcomes, see how this feels
tcp Jun 6, 2025
95a6ed3
pass/cross mesh
tcp Jun 6, 2025
792b6ca
small tidbits
tcp Jun 9, 2025
f09118f
small update to shot outcomes and result
tcp Jun 9, 2025
6545bcf
shortcut to game and possession events
tcp Jun 9, 2025
31fba3c
update to pass outcome
tcp Jun 9, 2025
6229548
feat(pff): duels
tcp Jun 9, 2025
c0ab46f
feat(pff): update carry
tcp Jun 9, 2025
b735fdb
feat(pff): handle foul event
tcp Jun 9, 2025
facb56f
feat(pff): handle ball receipt
tcp Jun 9, 2025
02e69cf
fix imports and minor refactor
tcp Jun 9, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 62 additions & 12 deletions kloppy/_providers/pff.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,25 @@
from kloppy.domain import Optional, TrackingDataset
from kloppy.domain import TrackingDataset, EventDataset
from kloppy.domain.services.event_factory import EventFactory
from kloppy.infra.serializers.tracking.pff import (
PFF_TrackingDeserializer,
PFF_TrackingInputs,
PFFTrackingDeserializer,
PFFTrackingInputs,
)
from kloppy.infra.serializers.event.pff import (
PFFEventDeserializer,
PFFEventInputs,
)
from kloppy.io import FileLike, open_as_file
from kloppy.config import get_config


def load_tracking(
meta_data: FileLike,
roster_meta_data: FileLike,
raw_data: FileLike,
sample_rate: Optional[float] = None,
limit: Optional[int] = None,
coordinates: Optional[str] = None,
only_alive: Optional[bool] = True,
sample_rate: float | None = None,
limit: int | None = None,
coordinates: str | None = None,
only_alive: bool | None = True,
) -> TrackingDataset:
"""
Load and deserialize tracking data from the provided metadata, roster metadata, and raw data files.
Expand All @@ -30,19 +36,63 @@ def load_tracking(
Returns:
TrackingDataset: A deserialized TrackingDataset object containing the processed tracking data.
"""
deserializer = PFF_TrackingDeserializer(
deserializer = PFFTrackingDeserializer(
sample_rate=sample_rate,
limit=limit,
coordinate_system=coordinates,
only_alive=only_alive,
)
with open_as_file(meta_data) as meta_data_fp, open_as_file(
roster_meta_data
) as roster_meta_data_fp, open_as_file(raw_data) as raw_data_fp:
with (
open_as_file(meta_data) as meta_data_fp,
open_as_file(roster_meta_data) as roster_meta_data_fp,
open_as_file(raw_data) as raw_data_fp,
):
return deserializer.deserialize(
inputs=PFF_TrackingInputs(
inputs=PFFTrackingInputs(
meta_data=meta_data_fp,
roster_meta_data=roster_meta_data_fp,
raw_data=raw_data_fp,
)
)


def load_event(
metadata: FileLike,
players: FileLike,
raw_event_data: FileLike,
event_types: list[str] | None = None,
coordinates: str | None = None,
event_factory: EventFactory | None = None,
additional_metadata: dict = {},
) -> EventDataset:
"""
Load PFF event data into a [`EventDataset`][kloppy.domain.models.event.EventDataset]

Parameters:
match_metadata (FileLike): A file-like object containing metadata about the match.
roster_metadata (FileLike): filename of json containing the lineup information
raw_event_data (FileLike): filename of json containing the events
event_types (List[str], optional): A list of event types to filter the events. If None, all events are included. Defaults to None.
coordinates (str, optional): The coordinate system to use for the tracking data. Defaults to None.
event_factory: (EventFactory, optional): An optional event factory to use for creating events. If None, the default event factory is used. Defaults to None.
additional_metadata (dict, optional): Additional metadata to include in the deserialization process. Defaults to an empty dictionary.
"""
deserializer = PFFEventDeserializer(
event_types=event_types,
coordinate_system=coordinates,
event_factory=event_factory or get_config("event_factory"),
)

with (
open_as_file(metadata) as metadata_fp,
open_as_file(players) as players_fp,
open_as_file(raw_event_data) as raw_event_data_fp,
):
return deserializer.deserialize(
inputs=PFFEventInputs(
metadata=metadata_fp,
players=players_fp,
raw_event_data=raw_event_data_fp,
),
additional_metadata=additional_metadata,
)
8 changes: 8 additions & 0 deletions kloppy/infra/serializers/event/pff/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"""Convert PFF event stream data to a kloppy EventDataset."""

from .deserializer import PFFEventDeserializer, PFFEventInputs

__all__ = [
"PFFEventDeserializer",
"PFFEventInputs",
]
172 changes: 172 additions & 0 deletions kloppy/infra/serializers/event/pff/deserializer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
from datetime import timedelta
import json
import logging
from itertools import zip_longest
from typing import IO, NamedTuple

from kloppy.domain import (
DatasetFlag,
EventDataset,
FormationType,
Ground,
Metadata,
Orientation,
Period,
Player,
Provider,
Team,
)
from kloppy.domain.models.event import Event, EventType
from kloppy.domain.models.pitch import PitchDimensions, Point
from kloppy.exceptions import DeserializationError
from kloppy.infra.serializers.event.deserializer import EventDataDeserializer
from kloppy.utils import performance_logging

from . import specification as PFF

logger = logging.getLogger(__name__)


class PFFEventInputs(NamedTuple):
metadata: IO[bytes]
players: IO[bytes]
raw_event_data: IO[bytes]


class PFFEventDeserializer(EventDataDeserializer[PFFEventInputs]):
@property
def provider(self) -> Provider:
return Provider.PFF

def deserialize(
self, inputs: PFFEventInputs, additional_metadata: dict
) -> EventDataset:
# Intialize coordinate system transformer
self.transformer = self.get_transformer()

with performance_logging("load data", logger=logger):
metadata = json.load(inputs.metadata)
players = json.load(inputs.players)
raw_events = self.load_raw_events(inputs.raw_event_data)

with performance_logging("parse teams ans players", logger=logger):
teams = self.create_teams_and_players(metadata, players)

with performance_logging("parse periods", logger=logger):
periods = self.create_periods(raw_events)

with performance_logging("parse events", logger=logger):
events = []
for raw_event in raw_events.values():
new_events = raw_event.set_refs(
periods, teams, raw_events
).deserialize(self.event_factory)
for event in new_events:
if self.should_include_event(event):
event = self.transformer.transform_event(event)
events.append(event)

pff_metadata = Metadata(
teams=teams,
periods=periods,
# TODO: get pitch dimensions from a event
pitch_dimensions=self.transformer.get_to_coordinate_system().pitch_dimensions,
frame_rate=None,
orientation=Orientation.ACTION_EXECUTING_TEAM,
flags=DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE,
score=None,
provider=Provider.PFF,
coordinate_system=self.transformer.get_to_coordinate_system(),
**additional_metadata,
)
dataset = EventDataset(metadata=pff_metadata, records=events)

# TODO: add freeze frames

return dataset

def load_raw_events(
self, raw_event_data: IO[bytes]
) -> dict[str, PFF.EVENT]:
raw_events = {}
events = json.load(raw_event_data)
events = sorted(events, key=lambda x: x['eventTime'])
for event in events:
event_id = (
f"{event['gameEventId']}_{event['possessionEventId']}_{event['gameEvents']['gameEventType']}_{event['eventTime']}"
if event["possessionEventId"] is not None
else f"{event['gameEventId']}"
)
raw_events[event_id] = PFF.event_decoder(event)
return raw_events

def create_teams_and_players(self, metadata, players):
def create_team(team_id, team_name, ground_type):
team = Team(
team_id=str(team_id),
name=team_name,
ground=ground_type,
)

team.players = [
Player(
player_id=entry["player"]["id"],
team=team,
name=entry["player"]["nickname"],
jersey_no=int(entry["shirtNumber"]),
# started=entry['started'],
starting_position=PFF.position_types_mapping[
entry["positionGroupType"]
],
)
for entry in players
if entry["team"]["id"] == team_id
]

return team

home_team = metadata["homeTeam"]
away_team = metadata["awayTeam"]

home = create_team(home_team["id"], home_team["name"], Ground.HOME)
away = create_team(away_team["id"], away_team["name"], Ground.AWAY)
return [home, away]

def create_periods(self, raw_events: dict[str, PFF.EVENT]) -> list[Period]:
half_start_events = {}
half_end_events = {}

for event in raw_events.values():
event_type = PFF.EVENT_TYPE(
event.raw_event["gameEvents"]["gameEventType"]
)
period = event.raw_event["gameEvents"]["period"]

if event_type in [
PFF.EVENT_TYPE.FIRST_HALF_KICKOFF,
PFF.EVENT_TYPE.SECOND_HALF_KICKOFF,
PFF.EVENT_TYPE.THIRD_HALF_KICKOFF,
PFF.EVENT_TYPE.FOURTH_HALF_KICKOFF,
]:
half_start_events[period] = event.raw_event
elif event_type == PFF.EVENT_TYPE.END_OF_HALF:
half_end_events[period] = event.raw_event

periods = []

for start_event, end_event in zip_longest(
half_start_events.values(), half_end_events.values()
):
if start_event is None or end_event is None:
raise DeserializationError(
"Failed to determine start and end time of periods."
)

period = Period(
id=int(start_event["gameEvents"]["period"]),
start_timestamp=timedelta(seconds=start_event["startTime"]),
end_timestamp=timedelta(seconds=end_event["startTime"]),
)
periods.append(period)

return periods
Loading
Loading