forrt-replication-template/Snakefile at main · ScienceLiveHub/forrt-replication-template · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# Snakefile — orchestrates the replication pipeline end-to-end.
#
# Replace the placeholder rules with your actual replication steps. The
# canonical pattern is one rule per pipeline stage, and each rule wraps a
# notebook executed via jupytext (so the notebook stays the source of truth
# and the Snakefile just sequences them).
#
# Usage:
#   snakemake --cores 1                  # run everything
#   snakemake --cores 1 -n               # dry run

NOTEBOOKS = "notebooks"
DATA = "data"
RESULTS = "results"
FIGURES = "figures"


rule all:
    input:
        # Replace with your actual final artefacts:
        f"{FIGURES}/main_result.png",
        f"{RESULTS}/summary.csv",


# ---------- 01: Data download ----------
# Every replication MUST be self-contained: data is downloaded by the notebook,
# never assumed to exist locally. See CLAUDE.md § Self-contained data.
rule data_download:
    output:
        f"{DATA}/raw/dataset.zip",
    log:
        f"{RESULTS}/logs/01_data_download.log",
    shell:
        f"cd {{NOTEBOOKS}} && jupytext --to notebook --execute 01_data_download.py 2>&1 | tee ../{{log}}"


# ---------- 02: Data clean ----------
rule data_clean:
    input:
        f"{DATA}/raw/dataset.zip",
    output:
        f"{DATA}/clean/dataset.parquet",
    shell:
        f"cd {{NOTEBOOKS}} && jupytext --to notebook --execute 02_data_clean.py"


# ---------- 03: Analysis ----------
rule analysis:
    input:
        f"{DATA}/clean/dataset.parquet",
    output:
        f"{RESULTS}/summary.csv",
    shell:
        f"cd {{NOTEBOOKS}} && jupytext --to notebook --execute 03_analysis.py"


# ---------- 04: Figures ----------
rule figures:
    input:
        f"{RESULTS}/summary.csv",
    output:
        f"{FIGURES}/main_result.png",
    shell:
        f"cd {{NOTEBOOKS}} && jupytext --to notebook --execute 04_figures.py"