-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun_second_round.py
More file actions
executable file
·173 lines (143 loc) · 5.62 KB
/
run_second_round.py
File metadata and controls
executable file
·173 lines (143 loc) · 5.62 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
#!/usr/bin/env python3
"""
Quick launcher for Second Round Hyperparameter Experiments
Usage:
python run_second_round.py # Run all experiments
python run_second_round.py --phase 1 # Run only Phase 1
python run_second_round.py --resume # Resume from checkpoint
python run_second_round.py --dry-run # Show what would run
"""
import argparse
import sys
import os
import subprocess
import time
from pathlib import Path
def check_dependencies():
"""Check if required dependencies are available"""
required_packages = ['torch', 'numpy', 'matplotlib', 'sklearn', 'tqdm']
missing_packages = []
for package in required_packages:
try:
__import__(package)
except ImportError:
missing_packages.append(package)
if missing_packages:
print(f"❌ Missing required packages: {', '.join(missing_packages)}")
print("Please install them with: pip install " + " ".join(missing_packages))
return False
return True
def check_database():
"""Check if the MPD database exists"""
db_path = "data/MPD/mpd_database.db"
if not os.path.exists(db_path):
print(f"❌ Database not found: {db_path}")
print("Please ensure the MPD database is built first")
return False
print(f"✅ Database found: {db_path}")
return True
def estimate_runtime():
"""Estimate total runtime based on first round results"""
# Based on your first round: ~6-18 minutes per experiment
# 35 experiments × 12 minutes average = ~7 hours
print("⏱️ Runtime Estimate:")
print(" • Total experiments: 35")
print(" • Average time per experiment: ~12 minutes")
print(" • Estimated total time: ~7 hours")
print(" • With early stopping: ~5-6 hours")
print()
def show_experiment_summary():
"""Show summary of planned experiments"""
print("📋 Experiment Summary:")
print(" Phase 1 (High Impact):")
print(" • Learning Rate: 6 experiments (0.005, 0.008, 0.01, 0.012, 0.015, 0.02)")
print(" • Negative Samples: 5 experiments (15, 18, 22, 25, 30)")
print(" • Hidden Dimensions: 5 experiments (64, 96, 128, 192, 256)")
print()
print(" Phase 2 (Medium Impact):")
print(" • Context Windows: 4 experiments (2, 3, 4, 5)")
print(" • Weight Decay: 4 experiments (0.0, 0.0001, 0.0005, 0.001)")
print(" • Batch Sizes: 4 experiments (512, 1024, 2048, 4096)")
print()
print(" Phase 3 (Advanced):")
print(" • Learning Rate Schedulers: 3 experiments (cosine, step, plateau)")
print(" • Optimizers: 2 experiments (AdamW, RAdam)")
print(" • Plus 2 baseline experiments")
print()
def run_experiments(phase=None, resume=False, dry_run=False):
"""Run the experiments"""
script_path = "pipeline/second_round_experiments.py"
if not os.path.exists(script_path):
print(f"❌ Experiment script not found: {script_path}")
return False
# Build command
cmd = [sys.executable, script_path]
if phase:
print(f"🎯 Running Phase {phase} only")
# Note: You'd need to modify the script to support phase filtering
if resume:
print("🔄 Resuming from checkpoint")
if dry_run:
print("🔍 Dry run - showing what would be executed")
print(f"Command: {' '.join(cmd)}")
return True
print("🚀 Starting experiments...")
print(" Press Ctrl+C to pause (experiments will resume from checkpoint)")
print()
try:
# Run the experiment script
result = subprocess.run(cmd, check=True)
return result.returncode == 0
except KeyboardInterrupt:
print("\n⏸️ Experiments paused by user")
print(" Progress saved to checkpoint. Run again to resume.")
return False
except subprocess.CalledProcessError as e:
print(f"❌ Experiments failed with error code: {e.returncode}")
return False
def main():
"""Main function"""
parser = argparse.ArgumentParser(description="Second Round Hyperparameter Experiments")
parser.add_argument("--phase", type=int, choices=[1, 2, 3],
help="Run only specific phase")
parser.add_argument("--resume", action="store_true",
help="Resume from checkpoint")
parser.add_argument("--dry-run", action="store_true",
help="Show what would run without executing")
parser.add_argument("--check", action="store_true",
help="Check dependencies and database only")
args = parser.parse_args()
print("🎵 Spotify Profiler - Second Round Experiments")
print("=" * 50)
# Check dependencies
if not check_dependencies():
sys.exit(1)
# Check database
if not check_database():
sys.exit(1)
if args.check:
print("✅ All checks passed!")
return
# Show experiment summary
show_experiment_summary()
# Estimate runtime
estimate_runtime()
# Confirm before running
if not args.dry_run:
response = input("🚀 Ready to start experiments? (y/N): ")
if response.lower() != 'y':
print("❌ Aborted by user")
return
# Run experiments
success = run_experiments(
phase=args.phase,
resume=args.resume,
dry_run=args.dry_run
)
if success:
print("🎉 Experiments completed successfully!")
else:
print("❌ Experiments failed or were interrupted")
sys.exit(1)
if __name__ == "__main__":
main()