Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions configs/appearance.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ defaults:
- the_global_config
- resources: default.yaml
- dataset: dancetrack_appearance.yaml
- train: id.yaml
- train: bce.yaml
- eval: default.yaml
- model_config: mm_appearance.yaml
- model_config: mm_tdsp_appearance.yaml
- path: default.yaml

experiment_name: exp74a-fromExp73-HalvedLrDoubleEpochs
experiment_name: exp107a-fromExp105-ScaleUp
dataset_name: DanceTrack
6 changes: 3 additions & 3 deletions configs/bbox_only.yaml → configs/bbox.yaml
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ defaults:
- the_global_config
- resources: default.yaml
- dataset: dancetrack_bbox.yaml
- train: batch.yaml
- train: bce.yaml
- eval: default.yaml
- model_config: mm_bboxes.yaml
- model_config: mm_tdsp_bboxes.yaml
- path: default.yaml

experiment_name: exp74b-fromExp73-HalvedLrDoubleEpochs
experiment_name: exp108b-fromExp107-BboxEmbDim512
dataset_name: DanceTrack
2 changes: 1 addition & 1 deletion configs/dataset/augmentations/default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ augmentations:
- _target_: mot_jepa.datasets.dataset.augmentations.video.IdentitySwitchAugmentation
switch_ratio: 0.3
- _target_: mot_jepa.datasets.dataset.augmentations.appearance.AppearanceNoiseAugmentation
alpha: 0.5
alpha: 0.40
- _target_: mot_jepa.datasets.dataset.augmentations.video.SmartIdentitySwitchAugmentation
switch_ratio: 0.5
iou_threshold: 0.5
Expand Down
6 changes: 3 additions & 3 deletions configs/dataset/dancetrack.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ defaults:
- transform: scaled_bbox_keypoints.yaml
- augmentations: default.yaml
- feature_extractor: pred_bbox_keypoints_appearance.yaml
- sampler: scene_sampler.yaml
# - sampler: scene_sampler_within_range.yaml

index:
type: mot
Expand All @@ -11,10 +11,10 @@ index:
- /media/home/DanceTrack-orig/

n_tracks: 40
clip_length: 30
clip_length: 50
min_clip_tracks: 1
clip_sampling_step: 1
val_clip_sampling_step: 1

sampler: null
use_batch_sampler: false
use_batch_sampler: false
1 change: 1 addition & 0 deletions configs/dataset/dancetrack_appearance.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ defaults:
- transform: scaled_bbox_keypoints.yaml
- augmentations: default.yaml
- feature_extractor: pred_appearance.yaml
# - sampler: scene_sampler_within_range.yaml

index:
type: mot
Expand Down
5 changes: 4 additions & 1 deletion configs/dataset/feature_extractor/pred_appearance.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,7 @@ extractor_params:
prediction_path: /media/home/cameltrack-states/extracted-features
extra_false_positives: true
feature_names:
- appearance
- appearance

random_appearance_jitter_ratio: 0.0
random_appearance_jitter_range: 0
4 changes: 2 additions & 2 deletions configs/dataset/sampler/scene_sampler.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
_target_: 'mot_jepa.datasets.dataset.sampler.scene_sampler.SceneBatchSamplerWithRepeat.from_dataset'
n_scenes: 4
n_frames: 8
n_scenes: 12
n_frames: 1
3 changes: 3 additions & 0 deletions configs/dataset/sampler/scene_sampler_within_range.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
_target_: 'mot_jepa.datasets.dataset.sampler.scene_sampler.OneSceneWithRangeSampler.from_dataset'
n_scenes: 5
n_frames: 2
21 changes: 18 additions & 3 deletions configs/default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,25 @@ defaults:
- the_global_config
- resources: default.yaml
- dataset: dancetrack.yaml
- train: mm.yaml
- train: bce.yaml
- eval: default.yaml
- model_config: mm_bboxes_keypoints_appearance.yaml
- model_config: mm_tdsp_bboxes_keypoints_appearance.yaml
- path: default.yaml

experiment_name: exp74-fromExp73-HalvedLrDoubleEpochs
experiment_name: exp111-fromExp110-LinearSumDecoder
dataset_name: DanceTrack

resources:
batch_size: 8

train:
max_epochs: 10

optimizer_config:
_target_: torch.optim.AdamW
lr: 1e-5
weight_decay: 1e-3

scheduler_config:
_target_: mot_jepa.trainer.scheduler.create_warmup_cosine_annealing_scheduler
n_warmup_epochs: 1
6 changes: 3 additions & 3 deletions configs/keypoints.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ defaults:
- the_global_config
- resources: default.yaml
- dataset: dancetrack_keypoints.yaml
- train: batch.yaml
- train: bce.yaml
- eval: default.yaml
- model_config: mm_keypoints.yaml
- model_config: mm_tdsp_keypoints.yaml
- path: default.yaml

experiment_name: exp74k-fromExp73-HalvedLrDoubleEpochs
experiment_name: exp108k-fromExp107-BboxEmbDim512
dataset_name: DanceTrack
2 changes: 1 addition & 1 deletion configs/model_config/mm_appearance.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ common_params:
track_encoder_n_layers: 2
track_encoder_ffn_dim: 512
projector_intermediate_dim: 512
interaction_encoder_enable: true
interaction_encoder_enable: false
interaction_encoder_n_heads: 8
interaction_encoder_n_layers: 2
interaction_encoder_ffn_dim: 512
Expand Down
2 changes: 1 addition & 1 deletion configs/model_config/mm_bboxes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ common_params:
track_encoder_n_layers: 2
track_encoder_ffn_dim: 512
projector_intermediate_dim: 512
interaction_encoder_enable: true
interaction_encoder_enable: false
interaction_encoder_n_heads: 8
interaction_encoder_n_layers: 2
interaction_encoder_ffn_dim: 512
Expand Down
2 changes: 1 addition & 1 deletion configs/model_config/mm_bboxes_keypoints.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ common_params:
track_encoder_n_layers: 2
track_encoder_ffn_dim: 512
projector_intermediate_dim: 512
interaction_encoder_enable: true
interaction_encoder_enable: false
interaction_encoder_n_heads: 8
interaction_encoder_n_layers: 2
interaction_encoder_ffn_dim: 512
Expand Down
8 changes: 4 additions & 4 deletions configs/model_config/mm_bboxes_keypoints_appearance.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ common_params:
track_encoder_n_layers: 2
track_encoder_ffn_dim: 512
projector_intermediate_dim: 512
interaction_encoder_enable: true
interaction_encoder_enable: false
interaction_encoder_n_heads: 8
interaction_encoder_n_layers: 2
interaction_encoder_ffn_dim: 512
Expand Down Expand Up @@ -35,9 +35,9 @@ aggregator_params:
dropout: 0.1

per_feature_checkpoint:
bbox: /media/home/MOT-JEPA-outputs/experiments/DanceTrack/exp74b-fromExp73-HalvedLrDoubleEpochs/checkpoints/last.pt
keypoints: /media/home/MOT-JEPA-outputs/experiments/DanceTrack/exp74k-fromExp73-HalvedLrDoubleEpochs/checkpoints/last.pt
appearance: /media/home/MOT-JEPA-outputs/experiments/DanceTrack/exp74a-fromExp73-HalvedLrDoubleEpochs/checkpoints/last.pt
bbox: /media/home/MOT-JEPA-outputs/experiments/DanceTrack/exp93b-fromExp90-ClipLevelBCE/checkpoints/last.pt
keypoints: /media/home/MOT-JEPA-outputs/experiments/DanceTrack/exp93k-fromExp90-ClipLevelBCE/checkpoints/last.pt
appearance: /media/home/MOT-JEPA-outputs/experiments/DanceTrack/exp98a-fromExp97-EmbDim512/checkpoints/last.pt

object_interaction_encoder_enable: true
object_interaction_encoder_params:
Expand Down
2 changes: 1 addition & 1 deletion configs/model_config/mm_keypoints.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ common_params:
track_encoder_n_layers: 2
track_encoder_ffn_dim: 512
projector_intermediate_dim: 512
interaction_encoder_enable: true
interaction_encoder_enable: false
interaction_encoder_n_heads: 8
interaction_encoder_n_layers: 2
interaction_encoder_ffn_dim: 512
Expand Down
33 changes: 33 additions & 0 deletions configs/model_config/mm_tdsp_appearance.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
_target_: mot_jepa.architectures.tdcp.core.build_mm_tdsp_model
mm_dim: 1024
similarity_prediction_head_hidden_dim: 512

sph_common_params:
hidden_dim: 512
sph_per_feature_params:
appearance:
hidden_dim: 512

common_params:
hidden_dim: 256
dropout: 0.1
track_encoder_n_heads: 8
track_encoder_n_layers: 4
track_encoder_ffn_dim: 512
projector_intermediate_dim: 512
interaction_encoder_enable: true
interaction_encoder_n_heads: 8
interaction_encoder_n_layers: 4
interaction_encoder_ffn_dim: 512
per_feature_params:
appearance:
hidden_dim: 512
feature_encoder_type: parts_appearance
feature_encoder_params:
emb_size: 128
hidden_dim: 512
track_encoder_enable_motion_encoder: false
track_encoder_ffn_dim: 1024
interaction_encoder_ffn_dim: 1024
aggregator_type: sum
aggregator_params: {}
28 changes: 28 additions & 0 deletions configs/model_config/mm_tdsp_bboxes.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
_target_: mot_jepa.architectures.tdcp.core.build_mm_tdsp_model
mm_dim: 1024
similarity_prediction_head_hidden_dim: 512

sph_common_params:
hidden_dim: 512
sph_per_feature_params:
bbox:
hidden_dim: 512

common_params:
hidden_dim: 512
dropout: 0.1
track_encoder_n_heads: 8
track_encoder_n_layers: 4
track_encoder_ffn_dim: 1024
projector_intermediate_dim: 512
interaction_encoder_enable: true
interaction_encoder_n_heads: 8
interaction_encoder_n_layers: 4
interaction_encoder_ffn_dim: 1024
per_feature_params:
bbox:
feature_encoder_type: motion
feature_encoder_params:
input_dim: 5
aggregator_type: sum
aggregator_params: {}
61 changes: 61 additions & 0 deletions configs/model_config/mm_tdsp_bboxes_keypoints_appearance.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
_target_: mot_jepa.architectures.tdcp.core.build_mm_tdsp_model
mm_dim: 1024
similarity_prediction_head_hidden_dim: 1024

sph_common_params:
hidden_dim: 512
sph_per_feature_params:
bbox:
hidden_dim: 512
keypoints:
hidden_dim: 512
appearance:
hidden_dim: 512

common_params:
hidden_dim: 512
dropout: 0.1
track_encoder_n_heads: 8
track_encoder_n_layers: 4
track_encoder_ffn_dim: 1024
projector_intermediate_dim: 512
interaction_encoder_enable: true
interaction_encoder_n_heads: 8
interaction_encoder_n_layers: 4
interaction_encoder_ffn_dim: 1024
per_feature_params:
bbox:
feature_encoder_type: motion
feature_encoder_params:
input_dim: 5
keypoints:
feature_encoder_type: motion
feature_encoder_params:
input_dim: 35
appearance:
hidden_dim: 512
feature_encoder_type: parts_appearance
feature_encoder_params:
emb_size: 128
hidden_dim: 512
track_encoder_enable_motion_encoder: false
track_encoder_ffn_dim: 1024
interaction_encoder_ffn_dim: 1024
aggregator_type: sum # transformer
aggregator_params: {}
# hidden_dim: ${model_config.mm_dim}
# n_heads: 8
# n_layers: 4
# dropout: 0.1

per_feature_checkpoint:
bbox: /media/home/MOT-JEPA-outputs/experiments/DanceTrack/exp108b-fromExp107-BboxEmbDim512/checkpoints/last.pt
keypoints: /media/home/MOT-JEPA-outputs/experiments/DanceTrack/exp108k-fromExp107-BboxEmbDim512/checkpoints/last.pt
appearance: /media/home/MOT-JEPA-outputs/experiments/DanceTrack/exp107a-fromExp105-ScaleUp/checkpoints/last.pt

object_interaction_encoder_enable: true
object_interaction_encoder_params:
hidden_dim: ${model_config.mm_dim}
n_heads: 8
n_layers: 2
dropout: 0.1
28 changes: 28 additions & 0 deletions configs/model_config/mm_tdsp_keypoints.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
_target_: mot_jepa.architectures.tdcp.core.build_mm_tdsp_model
mm_dim: 1024
similarity_prediction_head_hidden_dim: 512

sph_common_params:
hidden_dim: 512
sph_per_feature_params:
keypoints:
hidden_dim: 512

common_params:
hidden_dim: 512
dropout: 0.1
track_encoder_n_heads: 8
track_encoder_n_layers: 4
track_encoder_ffn_dim: 1024
projector_intermediate_dim: 512
interaction_encoder_enable: true
interaction_encoder_n_heads: 8
interaction_encoder_n_layers: 4
interaction_encoder_ffn_dim: 1024
per_feature_params:
keypoints:
feature_encoder_type: motion
feature_encoder_params:
input_dim: 35
aggregator_type: sum
aggregator_params: {}
6 changes: 3 additions & 3 deletions configs/resources/default.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
batch_size: 8
val_batch_size: 4
batch_size: 32
val_batch_size: 16
accelerator: 'cuda:0'
num_workers: 12
num_workers: 20
8 changes: 8 additions & 0 deletions configs/train/bce.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
defaults:
- base.yaml
- _self_

loss_config:
_target_: mot_jepa.trainer.losses.bce.ClipLevelBCE
pos_weight: 10.0
assoc_threshold: 1e-2
6 changes: 6 additions & 0 deletions configs/train/clip.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
defaults:
- base.yaml
- _self_

loss_config:
_target_: mot_jepa.trainer.losses.infonce.ClipLevelInfoNCE
8 changes: 4 additions & 4 deletions configs/train/mm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@ defaults:
loss_config:
_target_: mot_jepa.trainer.losses.infonce.MultiFeatureLoss
mm_loss:
_target_: mot_jepa.trainer.losses.infonce.IDLevelInfoNCE
_target_: mot_jepa.trainer.losses.infonce.ClipLevelInfoNCE
per_feature_losses:
bbox:
_target_: mot_jepa.trainer.losses.infonce.BatchLevelInfoNCE
_target_: mot_jepa.trainer.losses.infonce.ClipLevelInfoNCE
keypoints:
_target_: mot_jepa.trainer.losses.infonce.BatchLevelInfoNCE
_target_: mot_jepa.trainer.losses.infonce.ClipLevelInfoNCE
appearance:
_target_: mot_jepa.trainer.losses.infonce.IDLevelInfoNCE
_target_: mot_jepa.trainer.losses.infonce.ClipLevelInfoNCE
per_feature_weights:
bbox: 0.3
keypoints: 0.3
Expand Down
Loading
Loading