-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsetup_data.py
More file actions
59 lines (49 loc) · 1.8 KB
/
setup_data.py
File metadata and controls
59 lines (49 loc) · 1.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#!/usr/bin/env python3
"""
Download all portfolio datasets from Kaggle into data/.
Requires: pip install kaggle, and ~/.kaggle/kaggle.json (from Kaggle Account > Create New Token).
Optional: Jane Street competition requires accepting rules at
https://www.kaggle.com/c/jane-street-market-prediction (skip with --no-jane-street).
"""
import argparse
import sys
from pathlib import Path
# Add project root for imports
sys.path.insert(0, str(Path(__file__).resolve().parent))
from portfolio_utils.data_loader import (
get_data_dir,
ensure_dataset,
KAGGLE_DATASETS,
)
def main():
parser = argparse.ArgumentParser(description="Download portfolio datasets from Kaggle.")
parser.add_argument(
"--no-jane-street",
action="store_true",
help="Skip Jane Street (competition; requires accepting rules and large download).",
)
args = parser.parse_args()
data_dir = get_data_dir()
print(f"Data directory: {data_dir}")
for key in KAGGLE_DATASETS:
print(f"Ensuring dataset: {key} ...")
try:
ensure_dataset(key)
print(f" OK: {key}")
except Exception as e:
print(f" FAIL: {key} - {e}")
if key == "jane_street" and "competition" in str(e).lower():
print(" Tip: Accept competition rules at https://www.kaggle.com/c/jane-street-market-prediction")
raise
if not args.no_jane_street:
print("Ensuring Jane Street (competition) ...")
try:
from portfolio_utils.data_loader import load_jane_street
load_jane_street()
print(" OK: jane_street")
except Exception as e:
print(f" SKIP or FAIL: {e}")
print(" To skip: run with --no-jane-street")
print("Done.")
if __name__ == "__main__":
main()