ci_middleware/data/SConscript at dfa58a22e7d474ca3760fc2960aff820a4aba2fe · lsst/ci_middleware · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
# -*- python -*-
# This file is part of ci_middleware.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (https://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.
from __future__ import annotations

import os

from lsst.ci.middleware.scons import PipelineCommands, python_cmd, tar_repo_cmd
from lsst.sconsUtils import state

# Set up a list of all build targets produced in this subdir.
state.targets["data"] = []

# Make a tarred-up repo with dimension records,skymap, and input datasets
# common to most mock pipelines (raws, calibs, refcats, etc.).
base_repo = state.env.Command(
    "base-repo.tgz",
    state.targets["version"],
    [
        python_cmd("-m", "lsst.ci.middleware", "make-base-repo", "--clobber", "${TARGET.base}"),
        tar_repo_cmd("${TARGET.base}", "${TARGET}"),
    ],
)
state.targets["data"].extend(base_repo)

# ci_hsc is run as a full pipeline with no steps, with a constraint limiting
# it to a single patch.  This leads to "tract slicing", in which detectors
# that don't overlap that patch are dropped from steps that normally expect
# complete visits.  We don't care here because the most important tasks that
# care about tract slicing are not run in this pipeline, and for the remainder
# we just live with any slight degradation in the quality of the results.
ci_hsc = (
    PipelineCommands(
        "ci_hsc", os.path.join(os.environ["DRP_PIPE_DIR"], "pipelines", "HSC", "DRP-ci_hsc.yaml"), base_repo
    )
    .add(where="skymap='ci_mw' AND tract=0 AND patch=1")
    .finish()
)
state.targets["data"].extend(ci_hsc)

# RC2 is run in steps, with the target of the real pipeline being outputs for
# just a few non-overlapping tracts.  We do care here about giving full visits
# to steps that expect full visits (no "tract slicing"), and that's the main
# reason for splitting up into steps.  For the mock version, we process just
# one tract (0), and deliberately leave out the two visits here (18202, 96980)
# that doesn't overlap it.
rc2 = (
    PipelineCommands(
        "RC2", os.path.join(os.environ["DRP_PIPE_DIR"], "pipelines", "HSC", "DRP-RC2.yaml"), base_repo
    )
    # Add side runs (later runs do not build on these) to check
    # --raise-on-partial-outputs.
    .add_side_run(
        "test-raise-partial-outputs",
        ["isr", "calibrateImage"],
        where="skymap='ci_mw'",
        fail=[
            # Configure one quantum to fail with AnnotatedPartialOutputsError.
            # Due to command-line options this should be interpreted as
            # a regular failure, and downstream quanta should not be run.
            '''isr:lsst.pipe.base.AnnotatedPartialOutputsError:"instrument='HSC' AND exposure=95104"'''
        ],
        raise_on_partial_outputs=True,
        expect_failure=True,
    )
    .add_side_run(
        "test-no-raise-partial-outputs",
        ["isr", "calibrateImage"],
        where="skymap='ci_mw'",
        fail=[
            # Configure one quantum to fail with AnnotatedPartialOutputsError.
            # By default this is treated as a qualified success, and downstream
            # quanta will be run - we expect them to them do NoWorkFound, which
            # just reflects the fact that it's a little weird to pick ISR as
            # the task to raise this error, but that doesn't matter for testing
            # the mechanics.
            '''isr:lsst.pipe.base.AnnotatedPartialOutputsError:"instrument='HSC' AND exposure=95104"'''
        ],
        expect_failure=False,
    )
    .add_side_run(
        "test-data-id-table",
        ["isr", "calibrateImage"],
        where="instrument='HSC'",
        extra_qg_args=["--data-id-table", "resource://lsst.ci.middleware/data/data_ids.ecsv"],
    )
    .add("step1", where="instrument='HSC' AND exposure NOT IN (18202, 96980)", ingest_graph=False)
    # Add more side runs to check --extend-run and --clobber-outputs.
    .add_side_run(
        "test-clobber-without-skip",
        ["calibrateImage", "consolidatePreSourceTable"],
        where="skymap='ci_mw'",
        extend_run=True,
        clobber_outputs=True,
    )
    .add_side_run(
        "test-skip-and-clobber",
        ["calibrateImage", "consolidatePreSourceTable"],
        skip_existing_in_last=True,
        extend_run=True,
        clobber_outputs=True,
    )
    .add("step2a", where="skymap='ci_mw'")
    # Test fail-and-rescue workflow by configuring one task fail on the first
    # try, and then recover from that with a new run and --skip-existing-in.
    .add("step2b", where="skymap='ci_mw' AND tract=0")
    .add("step2cde")
    .add(
        "step3",
        where="skymap='ci_mw' AND tract=0",
        fail=[
            # Configure one quantum to fail on the first try and succeed on the
            # second, simulating an out-of-memory failure and automatic retry.
            """assembleCoadd::"skymap='ci_mw' AND tract=0 AND patch=2 AND band='r'":6GB"""
        ],
        auto_retry_mem=("4GB", "8GB"),
    )
    .add("step4")
    .add(
        "step5",
        where="skymap='ci_mw' AND tract=0",
        group="attempt1",
        fail=['''consolidateForcedSourceTable::"skymap='ci_mw' AND tract=0"'''],
        ingest_graph=False,
    )
    .add("step5", where="skymap='ci_mw' AND tract=0", group="attempt2", skip_existing_in_last=True)
    .add("step6")
    .add("step7")
    .finish()
)
state.targets["data"].extend(rc2)

# The Prod pipeline is run in steps on very large amounts of data, in which we
# expect each step to have to be split up into multiple submissions to keep QG
# and workflow size under control.  We process all data in the input
# repository, and split up steps into groups where appropriate.  We process
# independent groups in serial - which is not realistic, but a useful
# simplification - in order to keep up the practice of making a new repo for
# each run, with no need to merge them.
prod = (
    PipelineCommands(
        "Prod", os.path.join(os.environ["DRP_PIPE_DIR"], "pipelines", "HSC", "DRP-Prod.yaml"), base_repo
    )
    # Using band for grouping here is just a convenient to split our visits up
    # into two groups, not a reflection of expected real pipeline usage.
    .add("step1", group="r", where="band='r'")
    .add(
        "step1",
        group="i-attempt1",
        where="band='i'",
        fail=['''calibrateImage::"instrument='HSC' AND visit=18202"'''],
        ingest_graph=False
    )
    .add("step1", group="i-attempt2", where="band='i'", skip_existing_in_last=True, ingest_graph=False)
    .add("step2a", group="r", where="band='r'")
    .add("step2a", group="i", where="band='i'")
    .add("step2b", group="even", where="skymap='ci_mw' AND tract IN (0, 2)")
    .add("step2b", group="odd", where="skymap='ci_mw' AND tract IN (1, 3)")
    .add("step2c")
    .add("step2d", group="r", where="band='r'")
    .add("step2d", group="i", where="band='i'")
    .add("step2e")
    .add("step3", group="even", where="skymap='ci_mw' AND tract IN (0, 2)")
    .add("step3", group="odd", where="skymap='ci_mw' AND tract IN (1, 3)")
    .add("step4", group="r", where="band='r'")
    .add("step4", group="i", where="band='i'")
    .add("step7")
    .finish()
)
state.targets["data"].extend(prod)

state.env.CleanTree([], ["ci_hsc", "RC2", "Prod"])