ksreenivasan · ksreenivasan · Apr 26, 2022 · Apr 26, 2022 · Apr 26, 2022 · Apr 26, 2022
diff --git a/args_helper.py b/args_helper.py
@@ -878,43 +878,59 @@ def parse_arguments(self, jupyter_mode=False):
             default=0,
             help="Use mixed precision or not"
         )
-        parser.add_argument('--transformer_emsize', type=int, default=200,
-                    help='size of word embeddings')
-        parser.add_argument('--transformer_nhid', type=int, default=200,
-                    help='number of hidden units per layer')
-        parser.add_argument('--transformer_nlayers', type=int, default=2,
-                    help='number of layers')
-        parser.add_argument('--transformer_clip', type=float, default=0.25,
-                    help='gradient clipping')
-        parser.add_argument('--transformer_bptt', type=int, default=35,
-                    help='sequence length')
-        parser.add_argument('--transformer_dropout', type=float, default=0.2,
-                    help='dropout applied to layers (0 = no dropout)')
-        parser.add_argument('--transformer_nhead', type=int, default=2,
-                    help='the number of heads in the encoder/decoder of the transformer model')
-
+        parser.add_argument('--transformer_emsize',
+            type=int, default=200,
+            help='size of word embeddings'
+        )
+        parser.add_argument('--transformer_nhid',
+            type=int,
+            default=200,
+            help='number of hidden units per layer'
+        )
+        parser.add_argument('--transformer_nlayers',
+            type=int,
+            default=2,
+            help='number of layers'
+        )
+        parser.add_argument('--transformer_clip',
+            type=float,
+            default=0.25,
+            help='gradient clipping'
+        )
+        parser.add_argument('--transformer_bptt',
+            type=int,
+            default=35,
+            help='sequence length'
+        )
+        parser.add_argument('--transformer_dropout',
+            type=float,
+            default=0.2,
+            help='dropout applied to layers (0 = no dropout)'
+        )
+        parser.add_argument('--transformer_nhead',
+            type=int,
+            default=2,
+            help='the number of heads in the encoder/decoder of the transformer model'
+        )
         parser.add_argument(
             "--only-sanity",
             action="store_true",
             default=False,
             help="Only run sanity checks on the files in specific directory or subdirectories"
         )
-
         parser.add_argument(
             "--invert-sanity-check",
             action="store_true",
             default=False,
             help="Enable this to run the inverted sanity check (for HC)"
         )
-
         parser.add_argument(
             "--sanity-folder",
             default=None,
             type=str,
             metavar="PATH",
             help="directory(s) to access for only sanity check",
         )
-
         parser.add_argument(
             "--sr-version",
             default=1,
@@ -927,6 +943,12 @@ def parse_arguments(self, jupyter_mode=False):
             default=False,
             help="Enable this use full train data and not leave anything for validation"
         )
+        parser.add_argument(
+            "--port",
+            default=29500,
+            type=int,
+            help="Specify port to use for DDP",
+        )
 
         if jupyter_mode:
             args = parser.parse_args("")

diff --git a/cifar_exec.sh b/cifar_exec.sh
@@ -64,11 +64,11 @@ BLOCK
 #:<<BLOCK
 # Using validation to figure out hyperparams
 # NOTE: make sure to delete/comment subfolder from the config file or else it may not work
-conf_file="configs/training/resnet32/cifar100_resnet32_training"
+conf_file="configs/hypercube/resnet20/resnet20_sparsity_1_44_unflagT_real"
 conf_end=".yml"
-log_root="resnet32_wt_"
+log_root="ddp_resnet20_sp1_4_"
 log_end="_log"
-subfolder_root="resnet32_wt_"
+subfolder_root="ddp_resnet20_sp1_4_"
 
 for trial in 1
 do

diff --git a/configs/ddp_debug/conf1.yml b/configs/ddp_debug/conf1.yml
@@ -0,0 +1,63 @@
+# subfolder: target_sparsity_0_59_unflagT_real
+
+# Hypercube optimization
+algo: 'hc_iter'
+iter_period: 5
+
+# Architecture
+arch: resnet20
+
+# ===== Dataset ===== #
+dataset: CIFAR10
+name: resnet20_quantized_iter_hc
+
+# ===== Learning Rate Policy ======== #
+optimizer: sgd
+lr: 0.1 #0.01
+lr_policy: cosine_lr #constant_lr #multistep_lr
+fine_tune_lr: 0.01
+fine_tune_lr_policy: multistep_lr
+
+# ===== Network training config ===== #
+epochs: 20 
+wd: 0.0 
+momentum: 0.9
+batch_size: 512
+
+# ===== Sparsity =========== #
+conv_type: SubnetConv
+bn_type: NonAffineBatchNorm
+freeze_weights: True
+prune_type: BottomK
+# enter target sparsity here
+target_sparsity: 20
+# decide if you want to "unflag"
+unflag_before_finetune: True
+init: signed_constant
+score_init: unif #skew #half #bimodal #skew # bern
+scale_fan: False #True
+
+# ===== Rounding ===== #
+round: naive 
+noise: True
+noise_ratio: 0 
+
+# ===== Quantization ===== #
+hc_quantized: True
+quantize_threshold: 0.5
+
+# ===== Regularization ===== #
+regularization: L2
+lmbda: 0.0001 # 1e-4
+
+# ===== Hardware setup ===== #
+workers: 8
+# gpu: 1
+multiprocessing_distributed: True
+mixed_precision: True
+
+# ===== Checkpointing ===== #
+checkpoint_at_prune: False
+
+# ==== sanity check ==== #
+skip_sanity_checks: True
diff --git a/configs/ddp_debug/conf2.yml b/configs/ddp_debug/conf2.yml
@@ -0,0 +1,62 @@
+# subfolder: target_sparsity_0_59_unflagT_real
+
+# Hypercube optimization
+algo: 'hc_iter'
+iter_period: 5
+
+# Architecture
+arch: resnet20
+
+# ===== Dataset ===== #
+dataset: CIFAR10
+name: resnet20_quantized_iter_hc
+
+# ===== Learning Rate Policy ======== #
+optimizer: sgd
+lr: 0.1 #0.01
+lr_policy: cosine_lr #constant_lr #multistep_lr
+fine_tune_lr: 0.01
+fine_tune_lr_policy: multistep_lr
+
+# ===== Network training config ===== #
+epochs: 20 
+wd: 0.0 
+momentum: 0.9
+batch_size: 512
+
+# ===== Sparsity =========== #
+conv_type: SubnetConv
+bn_type: NonAffineBatchNorm
+freeze_weights: True
+prune_type: BottomK
+# enter target sparsity here
+target_sparsity: 20
+# decide if you want to "unflag"
+unflag_before_finetune: True
+init: signed_constant
+score_init: unif #skew #half #bimodal #skew # bern
+scale_fan: False #True
+
+# ===== Rounding ===== #
+round: naive 
+noise: True
+noise_ratio: 0 
+
+# ===== Quantization ===== #
+hc_quantized: True
+quantize_threshold: 0.5
+
+# ===== Regularization ===== #
+regularization: L2
+lmbda: 0.0001 # 1e-4
+
+# ===== Hardware setup ===== #
+workers: 8
+gpu: 0
+mixed_precision: True
+
+# ===== Checkpointing ===== #
+checkpoint_at_prune: False
+
+# ==== sanity check ==== #
+skip_sanity_checks: True
diff --git a/configs/hypercube/resnet20/resnet20_sparsity_1_44_unflagT_real.yml b/configs/hypercube/resnet20/resnet20_sparsity_1_44_unflagT_real.yml
@@ -55,7 +55,9 @@ lmbda: 0.0001 # 1e-4 #0.00005 # 5e-5
 
 # ===== Hardware setup ===== #
 workers: 4
-gpu: 0
+# gpu: 0
+multiprocessing_distributed: True
+mixed_precision: True
 
 # ===== Checkpointing ===== #
 checkpoint_at_prune: False

diff --git a/configs/hypercube/resnet50/imagenet/resnet50_sparsity_10.yml b/configs/hypercube/resnet50/imagenet/resnet50_sparsity_10.yml
@@ -0,0 +1,68 @@
+# subfolder: regular_imagenet_resnet50
+# trial_num: 1
+#lam_finetune_loss: 1
+#num_step_finetune: 5
+
+# Hypercube optimization
+algo: 'hc_iter'
+iter_period: 5
+
+# Architecture
+arch: ResNet50
+
+# ===== Dataset ===== #
+dataset: ImageNet
+name: resnet50_imagenet
+data: /home/ubuntu/ILSVRC2012/
+
+# ===== Learning Rate Policy ======== #
+optimizer: sgd
+lr: 0.4 #0.01
+lr_policy: cosine_lr #constant_lr #multistep_lr
+fine_tune_lr: 0.04
+fine_tune_lr_policy: multistep_lr
+
+# ===== Network training config ===== #
+epochs: 88
+wd: 0.0 
+momentum: 0.9
+batch_size: 1024
+mixed_precision: True
+
+# ===== Sparsity =========== #
+conv_type: SubnetConv
+bn_type: NonAffineBatchNorm
+freeze_weights: True
+prune_type: BottomK
+# enter target sparsity here
+target_sparsity: 10
+# decide if you want to "unflag"
+unflag_before_finetune: False
+init: signed_constant
+score_init: unif #skew #half #bimodal #skew # bern
+scale_fan: False #True
+
+# ===== Rounding ===== #
+round: naive 
+noise: True
+noise_ratio: 0 
+
+# ===== Quantization ===== #
+hc_quantized: True
+quantize_threshold: 0.5
+
+# ===== Regularization ===== #
+regularization: L2
+lmbda: 0.0000001 # 1e-7
+
+# ===== Hardware setup ===== #
+workers: 12
+multiprocessing_distributed: True
+mixed_precision: True
+# gpu: 1
+
+# ===== Checkpointing ===== #
+checkpoint_at_prune: False
+
+# ==== sanity check ==== #
+skip_sanity_checks: True
diff --git a/configs/hypercube/resnet50/imagenet/resnet50_sparsity_15.yml b/configs/hypercube/resnet50/imagenet/resnet50_sparsity_15.yml
@@ -0,0 +1,68 @@
+# subfolder: regular_imagenet_resnet50
+# trial_num: 1
+#lam_finetune_loss: 1
+#num_step_finetune: 5
+
+# Hypercube optimization
+algo: 'hc_iter'
+iter_period: 5
+
+# Architecture
+arch: ResNet50
+
+# ===== Dataset ===== #
+dataset: ImageNet
+name: resnet50_imagenet
+data: /home/ubuntu/ILSVRC2012/
+
+# ===== Learning Rate Policy ======== #
+optimizer: sgd
+lr: 0.4 #0.01
+lr_policy: cosine_lr #constant_lr #multistep_lr
+fine_tune_lr: 0.04
+fine_tune_lr_policy: multistep_lr
+
+# ===== Network training config ===== #
+epochs: 88
+wd: 0.0 
+momentum: 0.9
+batch_size: 1024
+mixed_precision: True
+
+# ===== Sparsity =========== #
+conv_type: SubnetConv
+bn_type: NonAffineBatchNorm
+freeze_weights: True
+prune_type: BottomK
+# enter target sparsity here
+target_sparsity: 15
+# decide if you want to "unflag"
+unflag_before_finetune: False
+init: signed_constant
+score_init: unif #skew #half #bimodal #skew # bern
+scale_fan: False #True
+
+# ===== Rounding ===== #
+round: naive 
+noise: True
+noise_ratio: 0 
+
+# ===== Quantization ===== #
+hc_quantized: True
+quantize_threshold: 0.5
+
+# ===== Regularization ===== #
+regularization: L2
+lmbda: 0.00000005 # 5e-8
+
+# ===== Hardware setup ===== #
+workers: 12
+multiprocessing_distributed: True
+mixed_precision: True
+# gpu: 1
+
+# ===== Checkpointing ===== #
+checkpoint_at_prune: False
+
+# ==== sanity check ==== #
+skip_sanity_checks: True