File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 1+ model :
2+ model_name : " microsoft/deberta-v3-base"
3+ num_labels : 2
4+ use_fast_tokenizer : true
5+
6+ data :
7+ dataset_path : " ./data/clean/" # Use HF dataset identifier
8+ max_len : 32
9+ train_split : " train"
10+ validation_split : " val"
11+ test_split : " test"
12+
113training :
214 # ---------- bookkeeping ----------
315 output_dir : " runs/teacher/deberta_v3_base"
@@ -11,7 +23,7 @@ training:
1123 per_device_train_batch_size : 64 # 4× bigger than before; fits easily in 80 GB
1224 per_device_eval_batch_size : 256 # evaluation is memory‑lighter, so push higher
1325 gradient_accumulation_steps : 1 # no need for micro‑batching on an H100
14- num_train_epochs : 6 # SST‑2 is small; 4 epochs normally reaches peak F1
26+ num_train_epochs : 4 # SST‑2 is small; 4 epochs normally reaches peak F1
1527
1628 # ---------- precision & speed ----------
1729 bf16 : true # H100 has native BF16; gives ~1.8× speed‑up over FP32
@@ -41,3 +53,4 @@ training:
4153 early_stopping_threshold : 0.0005
4254
4355 do_test_eval : true
56+
You can’t perform that action at this time.
0 commit comments