Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions docs/developer-guide/restart-checkpoint.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,15 @@ Checkpoint format is ASCII text with one key/value per line:
- header: `SIPNET_RESTART 1.0`
- metadata: `model_version`, `build_info`, `checkpoint_utc_epoch`, `processed_steps`
- schema layout guard metadata: `schema_layout.envi_size`, `schema_layout.trackers_size`, `schema_layout.phenology_trackers_size`, `schema_layout.event_trackers_size`
- `schema_layout.trackers_size` guards the serialized tracker payload shape for schema v1.0
- mode flags: `flags.*`
- boundary metadata: `boundary.year`, `boundary.day`, `boundary.time`, `boundary.length` (no forcing fields, no cumulative GDD)
- mean tracker metadata: `mean.npp.*`
- full runtime state: `envi.*`, `trackers.*`, `phenology.*`, `event_trackers.*`
- full runtime state: `envi.*`, serialized `trackers.*`, `phenology.*`, `event_trackers.*`
- includes `trackers.gdd` for year-to-date cumulative GDD continuity
- excludes step-level diagnostics that are recomputed on the next timestep:
`trackers.methane`, `trackers.nLeaching`, `trackers.nFixation`,
`trackers.nUptake`
- mean ring buffers: `mean.npp.values.length` + `mean.npp.values.<idx>`, `mean.npp.weights.length` + `mean.npp.weights.<idx>`
- end marker: `end_restart 1`

Expand Down Expand Up @@ -77,7 +81,7 @@ When `--gdd` is enabled, checkpoint resume restores cumulative GDD from `tracker

Restart schema v1.0 includes compile-time and runtime drift guards so struct layout changes cannot silently pass:

- Compile-time guards: `_Static_assert` checks in `src/sipnet/restart.c` for `Envi`, `Trackers`, `PhenologyTrackers`, and `EventTrackers`.
- Compile-time guards: `_Static_assert` checks in `src/sipnet/restart.c` for `Envi`, serialized tracker payload shape, `PhenologyTrackers`, and `EventTrackers`.
- Runtime guards: `schema_layout.*` fields in each checkpoint are validated on load.
- Test guardrails: `tests/sipnet/test_restart_infrastructure/testRestartMVP.c` verifies schema layout keys are present and rejects tampered values.

Expand Down
68 changes: 64 additions & 4 deletions src/sipnet/restart.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "restart.h"

#include <errno.h>
#include <limits.h>
#include <math.h>
#include <stdlib.h>
Expand All @@ -22,12 +23,44 @@
#define RESTART_SCHEMA_LAYOUT_PHENOLOGY_TRACKERS_SIZE 12
#define RESTART_SCHEMA_LAYOUT_EVENT_TRACKERS_SIZE 8

typedef struct RestartSerializedTrackersV1 {
double gpp;
double rtot;
double ra;
double rh;
double npp;
double nee;
double yearlyGpp;
double yearlyRtot;
double yearlyRa;
double yearlyRh;
double yearlyNpp;
double yearlyNee;
double totGpp;
double totRtot;
double totRa;
double totRh;
double totNpp;
double totNee;
double evapotranspiration;
double soilWetnessFrac;
double rRoot;
double rSoil;
double rAboveground;
double yearlyLitter;
double woodCreation;
double n2o;
double gdd;
int lastYear;
} RestartSerializedTrackersV1;

_Static_assert(sizeof(Envi) == RESTART_SCHEMA_LAYOUT_ENVI_SIZE,
"Restart schema 1.0 drift: Envi changed; bump restart schema "
"version and update schema_layout.* checks");
_Static_assert(sizeof(Trackers) == RESTART_SCHEMA_LAYOUT_TRACKERS_SIZE,
"Restart schema 1.0 drift: Trackers changed; bump restart "
"schema version and update schema_layout.* checks");
_Static_assert(sizeof(RestartSerializedTrackersV1) ==
RESTART_SCHEMA_LAYOUT_TRACKERS_SIZE,
"Restart schema 1.0 drift: serialized trackers payload changed; "
"bump restart schema version and update schema_layout.* checks");
_Static_assert(
sizeof(PhenologyTrackers) == RESTART_SCHEMA_LAYOUT_PHENOLOGY_TRACKERS_SIZE,
"Restart schema 1.0 drift: PhenologyTrackers changed; bump restart "
Expand Down Expand Up @@ -146,6 +179,31 @@ validateCheckpointBoundaryForWrite(const char *restartOut,
}
}

static void
validateCheckpointBoundaryForLoad(const char *restartIn,
const RestartClimateSignature *boundary) {
double stepHours = boundary->length * 24.0;
if (stepHours <= RESTART_FLOAT_EPSILON) {
logError("Restart boundary mismatch in %s: checkpoint boundary has "
"non-positive timestep length (year=%d day=%d time=%.8f "
"length=%.8f)\n",
restartIn, boundary->year, boundary->day, boundary->time,
boundary->length);
exit(EXIT_CODE_BAD_PARAMETER_VALUE);
}

double hoursUntilMidnight = 24.0 - boundary->time;
if (hoursUntilMidnight > (stepHours + RESTART_FLOAT_EPSILON)) {
logError(
"Restart boundary mismatch in %s: checkpoint boundary is more than "
"one timestep before midnight\n",
restartIn);
logError("Checkpoint boundary: year=%d day=%d time=%.8f length=%.8f\n",
boundary->year, boundary->day, boundary->time, boundary->length);
exit(EXIT_CODE_BAD_PARAMETER_VALUE);
}
}
Comment on lines +183 to +205
Copy link

Copilot AI Mar 4, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

validateCheckpointBoundaryForLoad has the same issue as the write-side validation: it doesn't reject out-of-range boundary.time/boundary.day values. A checkpoint with boundary.time > 24 makes hoursUntilMidnight negative, which will incorrectly pass the "near midnight" check. Please validate boundary.time is within [0,24] and boundary.day is within the valid day-of-year range for boundary.year before applying the midnight-window check.

Copilot uses AI. Check for mistakes.

static void sanitizeBuildInfo(char *dest, size_t destLen, const char *src) {
if (destLen == 0) {
return;
Expand Down Expand Up @@ -184,8 +242,9 @@ static void parseValueError(const char *restartIn, const char *key,
static long long parseLongLongStrict(const char *restartIn, const char *key,
const char *value) {
char *end = NULL;
errno = 0;
long long parsed = strtoll(value, &end, 10);
if (end == value || *end != '\0') {
if (end == value || *end != '\0' || errno == ERANGE) {
parseValueError(restartIn, key, value);
}
return parsed;
Expand Down Expand Up @@ -1105,6 +1164,7 @@ void restartLoadCheckpoint(const char *restartIn, MeanTracker *meanNPP) {

readRestartState(restartIn, &state, meanNPP);

validateCheckpointBoundaryForLoad(restartIn, &(state.boundaryClimate));
checkRestartContextCompatibility(&state);
validateRestartModelBuild(&state);
validateRestartBoundary(&state);
Expand Down
82 changes: 82 additions & 0 deletions tests/sipnet/test_restart_infrastructure/testRestartMVP.c
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,11 @@ static int testSegmentedEquivalence(void) {
status |= !fileContains(CHECKPOINT_FILE, "mean.npp.length ");
status |= !fileContains(CHECKPOINT_FILE, "mean.npp.values.length ");
status |= !fileContains(CHECKPOINT_FILE, "mean.npp.weights.length ");
// These step-level diagnostics are intentionally omitted from restart schema.
status |= fileContains(CHECKPOINT_FILE, "trackers.methane ");
status |= fileContains(CHECKPOINT_FILE, "trackers.nLeaching ");
status |= fileContains(CHECKPOINT_FILE, "trackers.nFixation ");
status |= fileContains(CHECKPOINT_FILE, "trackers.nUptake ");
status |= fileContains(CHECKPOINT_FILE, "balance.");
status |= !previousNonEmptyLineBeforeStartsWith(
CHECKPOINT_FILE, "end_restart 1", "mean.npp.weights.");
Expand Down Expand Up @@ -565,6 +570,41 @@ static int testCheckpointMustEndNearMidnight(void) {
return status;
}

static int testTamperedBoundaryNotNearMidnightFails(void) {
int status = 0;
int stepStatus = 0;
int rc;

runShell("rm -f run.out events.out run.restart *.log");

stepStatus = prepRunFiles("restart_segment1.clim", "events_segment1.in");
if (stepStatus) {
logTest("Failed to prepare files for tampered-boundary test segment 1\n");
return stepStatus;
}
status |= (runModel("restart_seg1.in", "tampered_boundary_seg1.log") != 0);
status |= replaceFirstLineStartingWith(CHECKPOINT_FILE, "boundary.time ",
"boundary.time 12.0\n");

stepStatus = prepRunFiles("restart_segment2.clim", "events_segment2.in");
if (stepStatus) {
logTest("Failed to prepare files for tampered-boundary test segment 2\n");
return status | stepStatus;
}

rc = runModel("restart_seg2.in", "tampered_boundary_seg2.log");
status |= (rc != EXIT_CODE_BAD_PARAMETER_VALUE);
status |= !fileContains(
"tampered_boundary_seg2.log",
"checkpoint boundary is more than one timestep before midnight");

if (status) {
logTest("testTamperedBoundaryNotNearMidnightFails failed (rc=%d)\n", rc);
}

return status;
}

static int testRestartMustStartNearMidnight(void) {
int status = 0;
int stepStatus = 0;
Expand Down Expand Up @@ -1097,6 +1137,46 @@ static int testTruncatedCheckpointFails(void) {
return status;
}

static int testProcessedStepsOverflowFails(void) {
int status = 0;
int stepStatus = 0;
int rc;

runShell("rm -f run.out events.out run.restart *.log");

stepStatus = prepRunFiles("restart_segment1.clim", "events_segment1.in");
if (stepStatus) {
logTest("Failed to prepare files for processed-steps overflow test "
"segment 1\n");
return stepStatus;
}
status |=
(runModel("restart_seg1.in", "processed_steps_overflow_seg1.log") != 0);
status |= replaceFirstLineStartingWith(
CHECKPOINT_FILE, "processed_steps ",
"processed_steps 999999999999999999999999999999\n");

stepStatus = prepRunFiles("restart_segment2.clim", "events_segment2.in");
if (stepStatus) {
logTest("Failed to prepare files for processed-steps overflow test "
"segment 2\n");
return status | stepStatus;
}

rc = runModel("restart_seg2.in", "processed_steps_overflow_seg2.log");
status |= (rc != EXIT_CODE_BAD_PARAMETER_VALUE);
status |=
!fileContains("processed_steps_overflow_seg2.log",
"invalid value '999999999999999999999999999999' for key "
"'processed_steps'");

if (status) {
logTest("testProcessedStepsOverflowFails failed (rc=%d)\n", rc);
}

return status;
}

static int testMalformedCheckpointFails(void) {
int status = 0;
int stepStatus = 0;
Expand Down Expand Up @@ -1140,6 +1220,7 @@ int run(void) {
status |= testSegmentedEquivalence();
status |= testStrictClimateMismatchFails();
status |= testCheckpointMustEndNearMidnight();
status |= testTamperedBoundaryNotNearMidnightFails();
status |= testRestartMustStartNearMidnight();
status |= testRestartEventBoundaryRequiresSegmentedEvents();
status |= testMissingFinalNewlineCheckpointSucceeds();
Expand All @@ -1155,6 +1236,7 @@ int run(void) {
status |= testLegacyBalanceKeyRejected();
status |= testBuildInfoMismatchWarnsAndSucceeds();
status |= testTruncatedCheckpointFails();
status |= testProcessedStepsOverflowFails();
status |= testMalformedCheckpointFails();

return status;
Expand Down
Loading