-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathSnakefile
More file actions
64 lines (53 loc) · 1.8 KB
/
Snakefile
File metadata and controls
64 lines (53 loc) · 1.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# Snakefile — orchestrates the replication pipeline end-to-end.
#
# Replace the placeholder rules with your actual replication steps. The
# canonical pattern is one rule per pipeline stage, and each rule wraps a
# notebook executed via jupytext (so the notebook stays the source of truth
# and the Snakefile just sequences them).
#
# Usage:
# snakemake --cores 1 # run everything
# snakemake --cores 1 -n # dry run
NOTEBOOKS = "notebooks"
DATA = "data"
RESULTS = "results"
FIGURES = "figures"
rule all:
input:
# Replace with your actual final artefacts:
f"{FIGURES}/main_result.png",
f"{RESULTS}/summary.csv",
# ---------- 01: Data download ----------
# Every replication MUST be self-contained: data is downloaded by the notebook,
# never assumed to exist locally. See CLAUDE.md § Self-contained data.
rule data_download:
output:
f"{DATA}/raw/dataset.zip",
log:
f"{RESULTS}/logs/01_data_download.log",
shell:
f"cd {{NOTEBOOKS}} && jupytext --to notebook --execute 01_data_download.py 2>&1 | tee ../{{log}}"
# ---------- 02: Data clean ----------
rule data_clean:
input:
f"{DATA}/raw/dataset.zip",
output:
f"{DATA}/clean/dataset.parquet",
shell:
f"cd {{NOTEBOOKS}} && jupytext --to notebook --execute 02_data_clean.py"
# ---------- 03: Analysis ----------
rule analysis:
input:
f"{DATA}/clean/dataset.parquet",
output:
f"{RESULTS}/summary.csv",
shell:
f"cd {{NOTEBOOKS}} && jupytext --to notebook --execute 03_analysis.py"
# ---------- 04: Figures ----------
rule figures:
input:
f"{RESULTS}/summary.csv",
output:
f"{FIGURES}/main_result.png",
shell:
f"cd {{NOTEBOOKS}} && jupytext --to notebook --execute 04_figures.py"