Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
multirun/
outputs/
.DS_Store
.idea/
.cursor/
Expand Down
11 changes: 11 additions & 0 deletions configs/appearance.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
defaults:
- the_global_config
- resources: default.yaml
- dataset: dancetrack_appearance.yaml
- train: id.yaml
- eval: default.yaml
- model_config: mm_appearance.yaml
- path: default.yaml

experiment_name: exp74a-fromExp73-HalvedLrDoubleEpochs
dataset_name: DanceTrack
11 changes: 11 additions & 0 deletions configs/bbox_only.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
defaults:
- the_global_config
- resources: default.yaml
- dataset: dancetrack_bbox.yaml
- train: batch.yaml
- eval: default.yaml
- model_config: mm_bboxes.yaml
- path: default.yaml

experiment_name: exp74b-fromExp73-HalvedLrDoubleEpochs
dataset_name: DanceTrack
10 changes: 10 additions & 0 deletions configs/dataset/augmentations/appearance.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
_target_: mot_jepa.datasets.dataset.augmentations.base.CompositionAugmentation
augmentations:
- _target_: mot_jepa.datasets.dataset.augmentations.video.PointOcclusionAugmentations
drop_ratio: 0.3
- _target_: mot_jepa.datasets.dataset.augmentations.video.LeftOrRightOcclusionAugmentations
drop_ratio: 0.2
- _target_: mot_jepa.datasets.dataset.augmentations.video.IdentitySwitchAugmentation
switch_ratio: 0.3
- _target_: mot_jepa.datasets.dataset.augmentations.appearance.AppearanceNoiseAugmentation
alpha: 0.5
4 changes: 4 additions & 0 deletions configs/dataset/augmentations/default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,7 @@ augmentations:
switch_ratio: 0.3
- _target_: mot_jepa.datasets.dataset.augmentations.appearance.AppearanceNoiseAugmentation
alpha: 0.5
- _target_: mot_jepa.datasets.dataset.augmentations.video.SmartIdentitySwitchAugmentation
switch_ratio: 0.5
iou_threshold: 0.5
max_switch_ratio: 0.5
12 changes: 0 additions & 12 deletions configs/dataset/augmentations/smart.yaml

This file was deleted.

1 change: 1 addition & 0 deletions configs/dataset/dancetrack.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ defaults:
- transform: scaled_bbox_keypoints.yaml
- augmentations: default.yaml
- feature_extractor: pred_bbox_keypoints_appearance.yaml
- sampler: scene_sampler.yaml

index:
type: mot
Expand Down
19 changes: 19 additions & 0 deletions configs/dataset/dancetrack_appearance.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
defaults:
- transform: scaled_bbox_keypoints.yaml
- augmentations: default.yaml
- feature_extractor: pred_appearance.yaml

index:
type: mot
params:
paths:
- /media/home/DanceTrack-orig/

n_tracks: 40
clip_length: 50
min_clip_tracks: 1
clip_sampling_step: 1
val_clip_sampling_step: 1

sampler: null
use_batch_sampler: false
19 changes: 19 additions & 0 deletions configs/dataset/dancetrack_bbox.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
defaults:
- transform: scaled_bbox_keypoints.yaml
- augmentations: default.yaml
- feature_extractor: pred_bbox.yaml

index:
type: mot
params:
paths:
- /media/home/DanceTrack-orig/

n_tracks: 40
clip_length: 50
min_clip_tracks: 1
clip_sampling_step: 1
val_clip_sampling_step: 1

sampler: null
use_batch_sampler: false
19 changes: 19 additions & 0 deletions configs/dataset/dancetrack_keypoints.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
defaults:
- transform: scaled_bbox_keypoints.yaml
- augmentations: default.yaml
- feature_extractor: pred_keypoints.yaml

index:
type: mot
params:
paths:
- /media/home/DanceTrack-orig/

n_tracks: 40
clip_length: 50
min_clip_tracks: 1
clip_sampling_step: 1
val_clip_sampling_step: 1

sampler: null
use_batch_sampler: false
6 changes: 6 additions & 0 deletions configs/dataset/feature_extractor/pred_appearance.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
extractor_type: pred_bbox
extractor_params:
prediction_path: /media/home/cameltrack-states/extracted-features
extra_false_positives: true
feature_names:
- appearance
6 changes: 6 additions & 0 deletions configs/dataset/feature_extractor/pred_keypoints.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
extractor_type: pred_bbox
extractor_params:
prediction_path: /media/home/cameltrack-states/extracted-features
extra_false_positives: true
feature_names:
- keypoints
4 changes: 2 additions & 2 deletions configs/dataset/sampler/scene_sampler.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
_target_: 'mot_jepa.datasets.dataset.sampler.scene_sampler.SceneBatchSamplerWithRepeat.from_dataset'
n_scenes: 3
n_frames: 2
n_scenes: 4
n_frames: 8
1 change: 1 addition & 0 deletions configs/dataset/transform/identity.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
_target_: mot_jepa.datasets.dataset.transform.IdentityTransform
19 changes: 19 additions & 0 deletions configs/dataset/transform/scaled_bbox_keypoints_v2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
_target_: mot_jepa.datasets.dataset.transform.ComposeTransform
transforms:
- _target_: mot_jepa.datasets.dataset.transform.BBoxXYWHtoXYXY
keep_wh: true
- _target_: mot_jepa.datasets.dataset.transform.BBoxMinMaxScaling
- _target_: mot_jepa.datasets.dataset.transform.FeatureFODStandardization
coord_mean:
bbox: [0.5, 0.5, 0.5, 0.5, 0.00, 0.00, 0.5]
keypoints: [[35, [0.5]]]
coord_std:
bbox: [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0]
keypoints: [[17, [0.1, 0.1]], 1.0]
fod_mean:
bbox: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0,0.0]
keypoints: [[35, [0.0]]]
fod_std:
bbox: [0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 1.0]
keypoints: [[17, [0.05, 0.05]], 1.0]
fod_time_scaled: true
4 changes: 2 additions & 2 deletions configs/default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ defaults:
- the_global_config
- resources: default.yaml
- dataset: dancetrack.yaml
- train: default.yaml
- train: mm.yaml
- eval: default.yaml
- model_config: mm_bboxes_keypoints_appearance.yaml
- path: default.yaml

experiment_name: exp44-fromExp41-BiggerAppearanceHiddenDim
experiment_name: exp74-fromExp73-HalvedLrDoubleEpochs
dataset_name: DanceTrack
11 changes: 11 additions & 0 deletions configs/keypoints.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
defaults:
- the_global_config
- resources: default.yaml
- dataset: dancetrack_keypoints.yaml
- train: batch.yaml
- eval: default.yaml
- model_config: mm_keypoints.yaml
- path: default.yaml

experiment_name: exp74k-fromExp73-HalvedLrDoubleEpochs
dataset_name: DanceTrack
23 changes: 23 additions & 0 deletions configs/model_config/mm_appearance.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
_target_: mot_jepa.architectures.tdcp.core.build_mm_tdcp_model
mm_dim: 512
common_params:
hidden_dim: 256
dropout: 0.1
track_encoder_n_heads: 8
track_encoder_n_layers: 2
track_encoder_ffn_dim: 512
projector_intermediate_dim: 512
interaction_encoder_enable: true
interaction_encoder_n_heads: 8
interaction_encoder_n_layers: 2
interaction_encoder_ffn_dim: 512
per_feature_params:
appearance:
hidden_dim: 512
feature_encoder_type: parts_appearance
feature_encoder_params:
emb_size: 128
hidden_dim: 512
track_encoder_enable_motion_encoder: false
aggregator_type: sum
aggregator_params: {}
7 changes: 5 additions & 2 deletions configs/model_config/mm_bboxes.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
_target_: mot_jepa.architectures.tdcp.core.build_mm_tdcp_model
mm_dim: 256
common_params:
hidden_dim: 256
dropout: 0.1
Expand All @@ -12,6 +13,8 @@ common_params:
interaction_encoder_ffn_dim: 512
per_feature_params:
bbox:
input_dim: 5
feature_encoder_type: motion
feature_encoder_params:
input_dim: 5
aggregator_type: sum
aggregator_params: {}
aggregator_params: {}
21 changes: 15 additions & 6 deletions configs/model_config/mm_bboxes_keypoints_appearance.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,21 @@ per_feature_params:
emb_size: 128
hidden_dim: 512
track_encoder_enable_motion_encoder: false
aggregator_type: query
aggregator_type: transformer
aggregator_params:
hidden_dim: ${model_config.mm_dim}
num_heads: 8
n_heads: 8
n_layers: 2
dropout: 0.1

per_feature_checkpoint:
bbox: /media/home/MOT-JEPA-outputs/experiments/DanceTrack/exp74b-fromExp73-HalvedLrDoubleEpochs/checkpoints/last.pt
keypoints: /media/home/MOT-JEPA-outputs/experiments/DanceTrack/exp74k-fromExp73-HalvedLrDoubleEpochs/checkpoints/last.pt
appearance: /media/home/MOT-JEPA-outputs/experiments/DanceTrack/exp74a-fromExp73-HalvedLrDoubleEpochs/checkpoints/last.pt

drop_mm_probas:
bbox: 0.1
keypoints: 0.1
appearance: 0.4
object_interaction_encoder_enable: true
object_interaction_encoder_params:
hidden_dim: ${model_config.mm_dim}
n_heads: 8
n_layers: 2
dropout: 0.1
20 changes: 20 additions & 0 deletions configs/model_config/mm_keypoints.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
_target_: mot_jepa.architectures.tdcp.core.build_mm_tdcp_model
mm_dim: 256
common_params:
hidden_dim: 256
dropout: 0.1
track_encoder_n_heads: 8
track_encoder_n_layers: 2
track_encoder_ffn_dim: 512
projector_intermediate_dim: 512
interaction_encoder_enable: true
interaction_encoder_n_heads: 8
interaction_encoder_n_layers: 2
interaction_encoder_ffn_dim: 512
per_feature_params:
keypoints:
feature_encoder_type: motion
feature_encoder_params:
input_dim: 35
aggregator_type: sum
aggregator_params: {}
4 changes: 2 additions & 2 deletions configs/resources/default.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
batch_size: 12
val_batch_size: 6
batch_size: 8
val_batch_size: 4
accelerator: 'cuda:0'
num_workers: 12
15 changes: 15 additions & 0 deletions configs/train/base.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
max_epochs: 20
gradient_clip: 1.0
mixed_precision: true

loss_config:
_target_: mot_jepa.trainer.losses.infonce.IDLevelInfoNCE

optimizer_config:
_target_: torch.optim.AdamW
lr: 5e-5
weight_decay: 1e-2

scheduler_config:
_target_: mot_jepa.trainer.scheduler.create_warmup_cosine_annealing_scheduler
n_warmup_epochs: 2
6 changes: 6 additions & 0 deletions configs/train/batch.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
defaults:
- base.yaml
- _self_

loss_config:
_target_: mot_jepa.trainer.losses.infonce.BatchLevelInfoNCE
6 changes: 6 additions & 0 deletions configs/train/id.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
defaults:
- base.yaml
- _self_

loss_config:
_target_: mot_jepa.trainer.losses.infonce.IDLevelInfoNCE
20 changes: 8 additions & 12 deletions configs/train/default.yaml → configs/train/mm.yaml
Original file line number Diff line number Diff line change
@@ -1,23 +1,19 @@
max_epochs: 10
gradient_clip: null
mixed_precision: true
defaults:
- base.yaml
- _self_

loss_config:
_target_: mot_jepa.trainer.losses.infonce.MultiFeatureLoss
mm_loss:
_target_: mot_jepa.trainer.losses.infonce.ClipLevelInfoNCE
_target_: mot_jepa.trainer.losses.infonce.IDLevelInfoNCE
per_feature_losses:
bbox:
_target_: mot_jepa.trainer.losses.infonce.BatchLevelInfoNCE
keypoints:
_target_: mot_jepa.trainer.losses.infonce.BatchLevelInfoNCE
appearance:
_target_: mot_jepa.trainer.losses.infonce.IDLevelInfoNCE

optimizer_config:
_target_: torch.optim.Adam
lr: 1e-4

scheduler_config:
_target_: mot_jepa.trainer.scheduler.create_warmup_cosine_annealing_scheduler
n_warmup_epochs: 1
per_feature_weights:
bbox: 0.3
keypoints: 0.3
appearance: 0.3
10 changes: 10 additions & 0 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,14 @@ RUN pip install uv
RUN uv init
RUN uv sync

RUN apt-get install -y git python3.10-venv
RUN git clone https://github.com/Megvii-BaseDetection/YOLOX /YOLOX
WORKDIR /YOLOX
RUN python3 -m venv venv
RUN /bin/bash -c "source /YOLOX/venv/bin/activate && pip install -U pip setuptools"
RUN /bin/bash -c "source /YOLOX/venv/bin/activate && pip install torch torchvision torchaudio"
RUN /bin/bash -c "source /YOLOX/venv/bin/activate && pip install --no-build-isolation -v -e ."

WORKDIR /work

CMD ["bash"]
Loading
Loading