Intellindust-AI-Lab · 369pro · Apr 21, 2025 · May 12, 2025 · May 25, 2025 · May 27, 2025
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -0,0 +1,23 @@
+{
+    // 使用 IntelliSense 了解相关属性。 
+    // 悬停以查看现有属性的描述。
+    // 欲了解更多信息，请访问: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "Python 调试程序: 当前文件",
+            "type": "debugpy",
+            "request": "launch",
+            "program": "/etc/miniconda3/envs/py3_10/lib/python3.10/site-packages/torch/distributed/run.py",
+            "console": "integratedTerminal",
+            "justMyCode": true,
+            "args": [
+                "--master_port=7777",
+                "--nproc_per_node=1",
+                "/data/WZW/DEIM/train.py",
+                "-c", "configs/test/deim_hgnetv2_s_visdrone.yml",
+                "--seed=0"
+            ]
+        }
+    ]
+}
diff --git a/configs/base/dataloader_dfine.yml b/configs/base/dataloader_dfine.yml
@@ -0,0 +1,39 @@
+
+train_dataloader:
+  dataset:
+    transforms:
+      ops:
+        - {type: RandomPhotometricDistort, p: 0.5}
+        - {type: RandomZoomOut, fill: 0}
+        - {type: RandomIoUCrop, p: 0.8}
+        - {type: SanitizeBoundingBoxes, min_size: 1}
+        - {type: RandomHorizontalFlip}
+        - {type: Resize, size: [640, 640], }
+        - {type: SanitizeBoundingBoxes, min_size: 1}
+        - {type: ConvertPILImage, dtype: 'float32', scale: True}
+        - {type: ConvertBoxes, fmt: 'cxcywh', normalize: True}
+      policy:
+        name: stop_epoch
+        epoch: 72 # epoch in [71, ~) stop `ops`
+        ops: ['RandomPhotometricDistort', 'RandomZoomOut', 'RandomIoUCrop']
+
+  collate_fn:
+    type: BatchImageCollateFunction
+    base_size: 640
+    base_size_repeat: 3
+    stop_epoch: 72 # epoch in [72, ~) stop `multiscales`
+
+  shuffle: True
+  total_batch_size: 32 # total batch size equals to 32 (4 * 8)
+  num_workers: 4
+
+
+val_dataloader:
+  dataset:
+    transforms:
+      ops:
+        - {type: Resize, size: [640, 640], }
+        - {type: ConvertPILImage, dtype: 'float32', scale: True}
+  shuffle: False
+  total_batch_size: 64
+  num_workers: 4
diff --git a/configs/base/dataloader_rtdetrv2.yml b/configs/base/dataloader_rtdetrv2.yml
@@ -0,0 +1,38 @@
+
+train_dataloader: 
+  dataset: 
+    transforms:
+      ops:
+        - {type: RandomPhotometricDistort, p: 0.5}
+        - {type: RandomZoomOut, fill: 0}
+        - {type: RandomIoUCrop, p: 0.8}
+        - {type: SanitizeBoundingBoxes, min_size: 1}
+        - {type: RandomHorizontalFlip}
+        - {type: Resize, size: [640, 640], }
+        - {type: SanitizeBoundingBoxes, min_size: 1}
+        - {type: ConvertPILImage, dtype: 'float32', scale: True}   
+        - {type: ConvertBoxes, fmt: 'cxcywh', normalize: True}
+      policy:
+        name: stop_epoch
+        epoch: 71 # epoch in [71, ~) stop `ops`
+        ops: ['RandomPhotometricDistort', 'RandomZoomOut', 'RandomIoUCrop']
+
+  collate_fn:
+    type: BatchImageCollateFunction
+    scales: [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800]
+    stop_epoch: 71 # epoch in [71, ~) stop `multiscales`
+
+  shuffle: True
+  total_batch_size: 16 # total batch size equals to 16 (4 * 4)
+  num_workers: 4
+
+
+val_dataloader:
+  dataset: 
+    transforms:
+      ops: 
+        - {type: Resize, size: [640, 640]}
+        - {type: ConvertPILImage, dtype: 'float32', scale: True}   
+  shuffle: False
+  total_batch_size: 32
+  num_workers: 4
diff --git a/configs/base/dfine_hgnetv2.yml b/configs/base/dfine_hgnetv2.yml
@@ -21,7 +21,7 @@ flat_epoch: 4000000
 no_aug_epoch: 0
 
 HGNetv2:
-  pretrained: True
+  pretrained: False
   local_model_dir: ../RT-DETR-main/D-FINE/weight/hgnetv2/
 
 HybridEncoder:

diff --git a/configs/dataset/custom_detection.yml b/configs/dataset/custom_detection.yml
@@ -11,8 +11,8 @@ train_dataloader:
   type: DataLoader
   dataset:
     type: CocoDetection
-    img_folder: /data/yourdataset/train
-    ann_file: /data/yourdataset/train/train.json
+    img_folder: /data/WZW/visdrone2019/VisDrone2019-DET-train/images
+    ann_file: /data/WZW/visdrone2019/VisDrone2019-DET-train/train.json
     return_masks: False
     transforms:
       type: Compose
@@ -28,8 +28,8 @@ val_dataloader:
   type: DataLoader
   dataset:
     type: CocoDetection
-    img_folder: /data/yourdataset/val
-    ann_file: /data/yourdataset/val/val.json
+    img_folder: /data/WZW/visdrone2019/VisDrone2019-DET-val/images
+    ann_file: /data/WZW/visdrone2019/VisDrone2019-DET-val/val.json
     return_masks: False
     transforms:
       type: Compose

diff --git a/configs/dataset/visdrone_detection.yml b/configs/dataset/visdrone_detection.yml
@@ -0,0 +1,41 @@
+task: detection
+
+evaluator:
+  type: CocoEvaluator
+  iou_types: ['bbox', ]
+
+num_classes: 10 # your dataset classes
+remap_mscoco_category: False
+
+train_dataloader:
+  type: DataLoader
+  dataset:
+    type: CocoDetection
+    img_folder: /data/WZW/visdrone2019/VisDrone2019-DET-train/images      # 修改全局路径,以下同理
+    ann_file: /data/WZW/visdrone2019/VisDrone2019-DET-train/train.json
+    return_masks: False
+    transforms:
+      type: Compose
+      ops: ~
+  shuffle: True
+  num_workers: 4
+  drop_last: True
+  collate_fn:
+    type: BatchImageCollateFunction
+
+
+val_dataloader:
+  type: DataLoader
+  dataset:
+    type: CocoDetection
+    img_folder: /data/WZW/visdrone2019/VisDrone2019-DET-val/images
+    ann_file: /data/WZW/visdrone2019/VisDrone2019-DET-val/val.json
+    return_masks: False
+    transforms:
+      type: Compose
+      ops: ~
+  shuffle: False
+  num_workers: 4
+  drop_last: False
+  collate_fn:
+    type: BatchImageCollateFunction
diff --git a/configs/deim/deim_hgnetv2_cgfm_n_custom.yml b/configs/deim/deim_hgnetv2_cgfm_n_custom.yml
@@ -0,0 +1,63 @@
+__include__: [
+  '../dfine/dfine_hgnetv2_n_custom.yml',
+  '../base/deim.yml'
+]
+
+print_freq: 20
+output_dir: ./outputs/deim_hgnetv2_n_custom
+
+# 再往上找到base层的encoder进行修改(dfine_hgnetv2)
+DEIM:
+  encoder: HybridEncoder_CGFM
+# 此处是修改 dfine_hgnetv2_n_custom.yml
+HybridEncoder_CGFM:
+  in_channels: [512, 1024]
+  feat_strides: [16, 32]
+
+  # intra
+  hidden_dim: 128
+  # 对应上面feat_strides的32倍下采样
+  use_encoder_idx: [1]
+  dim_feedforward: 512
+
+  # cross
+  expansion: 0.34
+  depth_mult: 0.5
+
+optimizer:
+  type: AdamW
+  params:
+    -
+      params: '^(?=.*backbone)(?!.*norm|bn).*$'
+      lr: 0.0004
+    -
+      params: '^(?=.*backbone)(?=.*norm|bn).*$'
+      lr: 0.0004
+      weight_decay: 0.
+    -
+      params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
+      weight_decay: 0.
+
+  lr: 0.0008
+  betas: [0.9, 0.999]
+  weight_decay: 0.0001
+
+# Increase to search for the optimal ema
+epoches: 160 # 148 + 12
+
+## Our LR-Scheduler
+flat_epoch: 78    # 4 + epoch // 2, e.g., 40 = 4 + 72 / 2
+no_aug_epoch: 12
+lr_gamma: 1.0
+
+## Our DataAug
+train_dataloader: 
+  dataset: 
+    transforms:
+      policy:
+        epoch: [4, 78, 148]   # list 
+
+  collate_fn:
+    mixup_epochs: [4, 78]
+    stop_epoch: 148
+    base_size_repeat: ~
diff --git a/configs/deim/deim_hgnetv2_n_custom.yml b/configs/deim/deim_hgnetv2_n_custom.yml
@@ -0,0 +1,45 @@
+__include__: [
+  '../dfine/dfine_hgnetv2_n_custom.yml',
+  '../base/deim.yml'
+]
+
+print_freq: 20
+output_dir: ./outputs/deim_hgnetv2_n_custom
+
+optimizer:
+  type: AdamW
+  params:
+    -
+      params: '^(?=.*backbone)(?!.*norm|bn).*$'
+      lr: 0.0004
+    -
+      params: '^(?=.*backbone)(?=.*norm|bn).*$'
+      lr: 0.0004
+      weight_decay: 0.
+    -
+      params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
+      weight_decay: 0.
+
+  lr: 0.0008
+  betas: [0.9, 0.999]
+  weight_decay: 0.0001
+
+# Increase to search for the optimal ema
+epoches: 160 # 148 + 12
+
+## Our LR-Scheduler
+flat_epoch: 78    # 4 + epoch // 2, e.g., 40 = 4 + 72 / 2
+no_aug_epoch: 12
+lr_gamma: 1.0
+
+## Our DataAug
+train_dataloader: 
+  dataset: 
+    transforms:
+      policy:
+        epoch: [4, 78, 148]   # list 
+
+  collate_fn:
+    mixup_epochs: [4, 78]
+    stop_epoch: 148
+    base_size_repeat: ~
diff --git a/configs/deim_dfine/deim_hgnetv2_s_visdrone.yml b/configs/deim_dfine/deim_hgnetv2_s_visdrone.yml
@@ -0,0 +1,46 @@
+__include__: [
+  './dfine_hgnetv2_s_coco.yml',
+  '../base/deim.yml'
+]
+
+print_freq: 100
+output_dir: ./outputs/deim_hgnetv2_s_visdrone
+
+
+
+optimizer:
+  type: AdamW
+  params: 
+    - 
+      params: '^(?=.*backbone)(?!.*bn).*$'
+      lr: 0.0002
+    - 
+      params: '^(?=.*(?:norm|bn)).*$'     # except bias
+      weight_decay: 0.
+
+  lr: 0.0004
+  betas: [0.9, 0.999]
+  weight_decay: 0.0001
+
+
+# Increase to search for the optimal ema
+epoches: 132 # 120 + 4n
+
+## Our LR-Scheduler
+flat_epoch: 64    # 4 + epoch // 2, e.g., 40 = 4 + 72 / 2
+no_aug_epoch: 12
+
+## Our DataAug
+train_dataloader: 
+  dataset: 
+    transforms:
+      policy:
+        epoch: [4, 64, 120]   # list 
+  total_batch_size: 4
+
+  collate_fn:
+    mixup_epochs: [4, 64]
+    stop_epoch: 120
+
+val_dataloader:
+  total_batch_size: 8
diff --git a/configs/deim_dfine/dfine_hgnetv2_s_coco.yml b/configs/deim_dfine/dfine_hgnetv2_s_coco.yml
@@ -1,5 +1,6 @@
 __include__: [
-  '../dataset/coco_detection.yml',
+  # '../dataset/coco_detection.yml',
+  '/data/WZW/DEIM/configs/dataset/visdrone_detection.yml',
   '../runtime.yml',
   '../base/dataloader.yml',
   '../base/optimizer.yml',