-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpipeline.sh
More file actions
executable file
·60 lines (54 loc) · 1.7 KB
/
pipeline.sh
File metadata and controls
executable file
·60 lines (54 loc) · 1.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#!/bin/bash
set -e
# Configuration
INPUT_DIR="data/1_validated-audio"
OUTPUT_FILE="data/dataset_consolidated.h5"
METADATA_FILE="data/data-file/validated.tsv"
# Spectrogram parameters
N_FFT=512
WIN_LENGTH=20
HOP_LENGTH=10
N_MELS=80
F_MIN=50
F_MAX=7600
SEGMENT_DURATION=0.1
OVERLAP=0.5
# Step 1: Create consolidated dataset with Log-Mel spectrograms
python -m src.preprocessing.create_consolidated_dataset \
--input-dir "$INPUT_DIR" \
--output-file "$OUTPUT_FILE" \
--metadata-file "$METADATA_FILE" \
--n-fft $N_FFT \
--win-length $WIN_LENGTH \
--hop-length $HOP_LENGTH \
--n-mels $N_MELS \
--f-min $F_MIN \
--f-max $F_MAX \
--segment-duration $SEGMENT_DURATION \
--overlap $OVERLAP
# Step 2: Train model
python main.py --use-consolidated \
--dataset-path data/dataset_consolidated.h5 \
--batch-size 128 \
--num-workers 8 \
--epochs 100 \
--lr 1e-4 \
--checkpoint-freq 10 \
--experiment-name "spectrogram-dreamer-v1" \
--h-state-size 200 \
--z-state-size 30 \
--action-size 128
# Step 3: Test inference with best model
if [ -d "checkpoints" ]; then
LATEST_CHECKPOINT=$(find checkpoints -name "best_model.pt" -type f -printf '%T@ %p\n' | sort -n | tail -1 | cut -f2- -d" ")
if [ -n "$LATEST_CHECKPOINT" ]; then
TEST_AUDIO=$(find "$INPUT_DIR" -type f \( -name "*.mp3" -o -name "*.wav" \) | head -1)
if [ -n "$TEST_AUDIO" ]; then
python infer.py \
--model "$LATEST_CHECKPOINT" \
--input "$TEST_AUDIO" \
--mode recon \
--use_log
fi
fi
fi