Skip to content

Commit 3e12110

Browse files
committed
sd
1 parent b5ec09a commit 3e12110

1 file changed

Lines changed: 14 additions & 1 deletion

File tree

configs/teacher/sst2_hf.yaml

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,15 @@
1+
model:
2+
model_name: "microsoft/deberta-v3-base"
3+
num_labels: 2
4+
use_fast_tokenizer: true
5+
6+
data:
7+
dataset_path: "./data/clean/" # Use HF dataset identifier
8+
max_len: 32
9+
train_split: "train"
10+
validation_split: "val"
11+
test_split: "test"
12+
113
training:
214
# ---------- bookkeeping ----------
315
output_dir: "runs/teacher/deberta_v3_base"
@@ -11,7 +23,7 @@ training:
1123
per_device_train_batch_size: 64 # 4× bigger than before; fits easily in 80 GB
1224
per_device_eval_batch_size: 256 # evaluation is memory‑lighter, so push higher
1325
gradient_accumulation_steps: 1 # no need for micro‑batching on an H100
14-
num_train_epochs: 6 # SST‑2 is small; 4 epochs normally reaches peak F1
26+
num_train_epochs: 4 # SST‑2 is small; 4 epochs normally reaches peak F1
1527

1628
# ---------- precision & speed ----------
1729
bf16: true # H100 has native BF16; gives ~1.8× speed‑up over FP32
@@ -41,3 +53,4 @@ training:
4153
early_stopping_threshold: 0.0005
4254

4355
do_test_eval: true
56+

0 commit comments

Comments
 (0)