Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ and uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
* [753](https://github.com/dbekaert/RAiDER/pull/753) - Removed support for ERA-Interim.

### Changed
* [791](https://github.com/dbekaert/RAiDER/pull/791) - Updated warning message to reflect the number of dropped stations due to NaN values, and adjust logging lines for `raiderCombine.py` workflow such that every message is not a warning.
* [788](https://github.com/dbekaert/RAiDER/pull/788) - Streamlined the logic for handling stations where σ_model² < 0, specifically by default such cases are set to nan, but a user may clamp σ_model² to 0 instead by specifying the `--no-allow-nan` for `raiderCombine.py`.
* [743](https://github.com/dbekaert/RAiDER/pull/743) - Switched from HTTPS to DAP4 for retrieving MERRA2 data, and suppressed a warning for using DAP4 for GMAO data where doing so is not possible.

Expand Down
163 changes: 133 additions & 30 deletions tools/RAiDER/gnss/processDelayFiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,11 +355,22 @@ def variance_analysis(
else:
station_start = pd.NaT
station_end = pd.NaT
logger.warning(
"Flagged station %s with invalid station start and/or end "
"date(s). Refer to unique dates here: %s.",
group.name,
n_unique_days,
)

if n_global_days is not None and n_global_days > 0:
coverage_pct = (n_unique_days / n_global_days) * 100.0
else:
coverage_pct = np.nan
logger.warning(
"Flagged station %s with no valid sampled days %s.",
group.name,
n_global_days,
)

# Mean-squared terms
# σ_res² = D[r] = E((r - E(r))²)
Expand All @@ -375,22 +386,45 @@ def variance_analysis(
else np.nan
)

# report warnings associated with insufficient sampling
if not n_epochs > 1:
logger.warning(
"Flagged station %s with insufficient valid epochs %d, so σ_res² "
"& σ_GNSS² set to NaN",
group.name,
n_epochs,
)
if not len(sig) > 0:
logger.warning(
"Flagged station %s with insufficient valid sigZTD observations "
"%d, so σ_mean² set to NaN",
group.name,
len(sig),
)

# Mean bias calculation
mean_bias = resid.mean()

# Model variance computation
sigma_model_neg = False
if np.isfinite(sigma_res_sq) and np.isfinite(sigma_gnss_sq):
# σ_wm² = σ_res² - σ_gnss²
diff = sigma_res_sq - sigma_gnss_sq
negative_diff = diff < 0

if negative_diff:
sigma_model_neg = True
logger.warning(
f"Flagged station {group.name} with negative sigma values, "
f"with mean bias {mean_bias}, mean σ_wm² {diff}, "
f"with {n_unique_days} unique days sampled which translates "
f"to {coverage_pct}% daily overlap with the "
f"input dataset timespan."
"Flagged station %s with negative sigma values, "
"with mean bias %s, "
"mean σ_wm² %s, with %d unique days sampled "
"which translates to %.2f%% daily overlap with the "
"input dataset timespan.",
group.name,
mean_bias,
diff,
n_unique_days,
coverage_pct,
)
sigma_model_sq = (
np.nan
Expand All @@ -410,15 +444,23 @@ def variance_analysis(
else:
sigma_mean_bias = np.nan

def first_non_null(series: pd.Series):
def first_non_null(series: pd.Series, label: str):
vals = series.dropna()
return vals.iloc[0] if not vals.empty else np.nan
if not vals.empty:
return vals.iloc[0]
else:
logger.warning(
"Flagged station %s with no valid %s data.",
group.name,
label,
)
return np.nan

data_series = {
"ID": group.name,
"Lat": first_non_null(group["Lat"]),
"Lon": first_non_null(group["Lon"]),
"Hgt_m": first_non_null(group["Hgt_m"]),
"Lat": first_non_null(group["Lat"], "Lat"),
"Lon": first_non_null(group["Lon"], "Lon"),
"Hgt_m": first_non_null(group["Hgt_m"], "Hgt_m"),
"Datetime": station_start,
"Enddate_Datetime": station_end,
"sigZTD": group["sigZTD"].median(),
Expand All @@ -433,6 +475,7 @@ def first_non_null(series: pd.Series):
"n_epochs": n_epochs,
"n_unique_days": n_unique_days,
"pct_days_global": coverage_pct,
"sigma_model_neg": sigma_model_neg,
}

# Only parse Localtime if present
Expand Down Expand Up @@ -721,16 +764,20 @@ def main(
filt_len = len(dfc)
errlimit_dropped = prefilt_len - filt_len
logger.warning(
f"Dropped {errlimit_dropped} observations with sigZTD > input "
f"{obs_errlimit} ({filt_len} remaining)."
"Dropped %d observations with sigZTD > input %s (%d remaining).",
errlimit_dropped,
obs_errlimit,
filt_len,
)

# get temporal sampling stats
mean_delta_days, mode_delta_days = sampling_delta_stats(dfc)

logger.warning(
f"Global mean delta (days): {mean_delta_days} "
f"Global mode delta (days): {mode_delta_days}"
logger.info(
"Global mean delta (days): %s "
"Global mode delta (days): %s",
mean_delta_days,
mode_delta_days,
)

# determine coverage window across all retained observations
Expand All @@ -743,12 +790,14 @@ def main(
n_global_days_total_span = (global_end - global_start).days + 1
# capture reference, maximum temporal sampling
# using mean time delta of all observations
n_global_days = int(n_global_days_total_span / mean_delta_days)
logger.warning(
"The earliest/latest dates found are "
f"{global_start} & {global_end} "
f"which spans {n_global_days_total_span} days, with "
f"an average sampling of {n_global_days} days"
n_global_days = math.ceil(n_global_days_total_span / mode_delta_days)
logger.info(
"The earliest/latest dates found are %s & %s which spans %d days, "
"with a sampling mode of %d days",
global_start,
global_end,
n_global_days_total_span,
n_global_days,
)

dfc_qm = (
Expand All @@ -765,10 +814,37 @@ def main(
.reset_index(drop=True)
)

# Drop all lines with NaNs and duplicates
# compute statistics for flagged `sigma_model_neg
dfc_qm.drop_duplicates(inplace=True)
n_before = len(dfc_qm)
sigma_model_filt_len = dfc_qm["sigma_model_neg"].sum()
if sigma_model_filt_len > 0:
df_neg = dfc_qm.loc[dfc_qm["sigma_model_neg"]]
stats = df_neg[
["n_unique_days", "pct_days_global"]
].agg(["mean", "median", "min", "max"])

logger.warning(
"Negative σ_model² for %d stations. "
"Unique days sampled: mean=%.2f, median=%.2f, "
"min=%.2f, max=%.2f. "
"Daily overlap: mean=%.2f%%, median=%.2f%%, "
"min=%.2f%%, max=%.2f%%.",
len(df_neg),
stats.loc["mean", "n_unique_days"],
stats.loc["median", "n_unique_days"],
stats.loc["min", "n_unique_days"],
stats.loc["max", "n_unique_days"],
stats.loc["mean", "pct_days_global"],
stats.loc["median", "pct_days_global"],
stats.loc["min", "pct_days_global"],
stats.loc["max", "pct_days_global"],
)

# Drop all lines with NaNs and duplicates
dfc_qm = dfc_qm.drop(columns=["sigma_model_neg"])
dfc_qm.dropna(how="any", inplace=True)
dfc_qm.drop_duplicates(inplace=True)
nan_filt_len = n_before - len(dfc_qm)

if min_pct_days > 0:
before_filter = len(dfc_qm)
Expand All @@ -782,18 +858,45 @@ def main(
len(dfc_qm),
)

if allow_nan_for_negative:
n_flagged = n_before - len(dfc_qm)
if not allow_nan_for_negative:
n_flagged = (dfc_qm["sigma_model"] == 0).sum()
logger.warning(
f"Dropped {n_flagged} stations containing NaN sigma values "
f"({len(dfc_qm)} remaining)."
"%d/%d stations contain 0 sigma values.",
n_flagged,
len(dfc_qm),
)
else:
n_flagged = (dfc_qm["sigma_model"] == 0).sum()

if nan_filt_len > 0:
logger.warning(
f"{n_flagged}/{len(dfc_qm)} stations contain 0 sigma values."
"Dropped %d stations containing NaN due to all imposed filters "
"(see other warnings above for more details), "
"(%d) stations remaining.",
nan_filt_len,
len(dfc_qm),
)

# capture final stats
stats = dfc_qm[
["n_unique_days", "pct_days_global"]
].agg(["mean", "median", "min", "max"])

logger.info(
"%d kept stations. "
"Unique days sampled: mean=%.2f, median=%.2f, "
"min=%.2f, max=%.2f. "
"Daily overlap: mean=%.2f%%, median=%.2f%%, "
"min=%.2f%%, max=%.2f%%.",
len(dfc_qm),
stats.loc["mean", "n_unique_days"],
stats.loc["median", "n_unique_days"],
stats.loc["min", "n_unique_days"],
stats.loc["max", "n_unique_days"],
stats.loc["mean", "pct_days_global"],
stats.loc["median", "pct_days_global"],
stats.loc["min", "pct_days_global"],
stats.loc["max", "pct_days_global"],
)

dfc_qm.to_csv(
out_path,
index=False,
Expand Down