Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
50b8eab
add generate_sobol_design function
Nov 26, 2025
9011c49
add global SA scripts
Nov 26, 2025
12299b4
add array louncher script
Nov 26, 2025
ae6f7ce
GSA analysis report
Nov 26, 2025
211df2b
update readme for executing workflows
Nov 26, 2025
ef33bca
add dummy parameter to source map to prevent metadata mismatch
divine7022 Dec 10, 2025
19bdc20
remove old 023/024 scripts (renumbered)
divine7022 Feb 27, 2026
a9b3200
add crop lookup for site-specific n rate and compost ranges
divine7022 Feb 27, 2026
69f3fd0
add management events builder for n fert and compost
divine7022 Feb 27, 2026
b0ec279
simplify api to ic_size/met_size and add compute_sobol_indices
divine7022 Feb 27, 2026
011f130
add events to samplingspace and inputs, remove prerun tag
divine7022 Feb 27, 2026
9eeec42
add multisite xml builder with setensemblepaths for met, ic, events
divine7022 Feb 27, 2026
ea747b5
add management params to sobol design and use ic_size/met_size
divine7022 Feb 27, 2026
d4236c7
skip mgmt columns in samples.rdata, use param_sources mapping
divine7022 Feb 27, 2026
1ba9994
add per-sample events generator from sobol design quantiles
divine7022 Feb 27, 2026
a015084
add pecan workflow runner with setensemblepaths for events
divine7022 Feb 27, 2026
25ba198
add sobol index computation from ensemble output
divine7022 Feb 27, 2026
d1a5da3
add global sa shell orchestration script
divine7022 Feb 27, 2026
d9b7eaf
add local sa shell orchestration script
divine7022 Feb 27, 2026
6c5ff60
add full pipeline orchestration script
divine7022 Feb 27, 2026
d4f1626
add management and crop lookup config paths
divine7022 Feb 27, 2026
ecec32e
update readme with correct script names and pipeline structure
divine7022 Feb 27, 2026
d3111a0
rewrite report with cross-site boxplots and graphs over tables
divine7022 Feb 27, 2026
39460be
use directly from github scenarios repo
divine7022 Feb 27, 2026
9cdad09
update readme
divine7022 Feb 27, 2026
bae9f28
update readme
divine7022 Feb 27, 2026
188f035
fix paths
divine7022 Feb 27, 2026
0a06f0c
add empty line
divine7022 Feb 27, 2026
258aebf
renamed master_design_points to design_points and consistently use de…
divine7022 Feb 27, 2026
4615707
update global sa report
divine7022 Mar 5, 2026
e35d069
add pois dist
divine7022 Mar 5, 2026
dd58751
support anchor site AGU event file
divine7022 Mar 5, 2026
bb4d04d
add event to input_design
divine7022 Mar 5, 2026
b8677eb
support anchor site event file
divine7022 Mar 5, 2026
4313054
remove SA block from setting
divine7022 Mar 5, 2026
f130d91
update sobol metdata path
divine7022 Mar 5, 2026
aaafba0
update readme
divine7022 Mar 5, 2026
9cb4530
move all bash scripts to tools/
divine7022 Mar 5, 2026
4e34ec0
remove unused shell scripts from scripts directory
divine7022 Mar 5, 2026
91ec3cb
add modellauncher and git version
divine7022 Mar 5, 2026
bec7fc7
add missing anchor site event file path to config
divine7022 Mar 11, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 22 additions & 3 deletions 000-config.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Default configuration for CCMMF Phase 4 workflows
# Configuration for uncertainty quantification pipeline
default:
flags:
production: false
Expand All @@ -8,8 +8,27 @@ default:
raw_data_dir: "data_raw"
cache_dir: "cache"
pecan_outdir: "/projectnb2/dietzelab/ccmmf/modelout/ccmmf_phase_2b_mixed_pfts_20250701"
design_points: "/projectnb2/dietzelab/ccmmf/data/design_points.csv"
settings_xml: "data_raw/settings_sa.xml"
pecan_xml_template: "data_raw/template.xml"
sites:
design_points_file: "data_raw/sa_design_points.csv"
n_sample: 10
sensitivity:
sigma_levels: [-2, -1, 1, 2]
projection:
ca_albers_crs: 3310
ca_albers_crs: 3310
# Management uncertainty (023)
events_baseline_url: "https://raw.githubusercontent.com/ccmmf/scenarios/main/data/events_baseline.json"
# Anchor site management events (17 sites)
anchor_events_json: "/projectnb/dietzelab/ccmmf/management/event_files/anchors_events_pecanFormat.json"
start_year: 2016
Comment thread
divine7022 marked this conversation as resolved.
end_year: 2023
site_id: "1000025731"
# Crop identity -> N rate mapping (021 + 023)
crop_lookup:
landiq_parquet: "/projectnb2/dietzelab/ccmmf/LandIQ-harmonized-v3/crops_all_years.parq"
pft_table_csv: "/projectnb2/dietzelab/ccmmf/cadwr-landuse/data/CARB_PFTs_table.csv"
crosswalk_csv: "/projectnb2/dietzelab/ccmmf/management/fertilization/crop_type_crosswalk.csv"
landiq_year: 2023
landiq_season: 2

163 changes: 163 additions & 0 deletions R/crop_lookup.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
# Map design-point sites to crop-specific N and compost ranges.
#
# Bridges LandIQ v3 parquet + CARB PFT table + crop crosswalk +
# PEcAn.data.land lookup functions to produce a per-site tibble
# consumed by 023_generate_management_events.R.

#' Resolve per-site crop identity with N and compost ranges
#'
#' @param design_points_csv Path to design_points.csv
#' @param landiq_parquet Path to LandIQ crops_all_years.parq
#' @param pft_table_csv Path to CARB_PFTs_table.csv
#' @param crosswalk_csv Path to crop_type_crosswalk.csv
#' @param year LandIQ survey year (default 2023, most recent).
#' NB crop identity assumed constant - simplification for rotations.
#' TODO use per-year LandIQ when rotation data is available.
#' @param season LandIQ season (default 2 = summer crop).
#' @param compost_material Compost type for look_up_ca_compost_amendment().
#' Default "Cow manure" - most common amendment in CA row crop ag.
#' TODO make per crop when crop specific compost data is available.
#' @return Tibble with N rate + compost columns per site.
get_site_crop_info <- function(design_points_csv,
landiq_parquet,
pft_table_csv,
crosswalk_csv,
year = 2023L,
season = 2L,
compost_material = "Cow manure") {

dp <- readr::read_csv(design_points_csv, show_col_types = FALSE)
dp$uid_padded <- sprintf("%07d", as.integer(dp$UniqueID))

if (!requireNamespace("arrow", quietly = TRUE)) {
PEcAn.logger::logger.severe(
"Package 'arrow' is required to read LandIQ parquet files. ",
"Install with: install.packages('arrow')"
)
}

crops <- arrow::read_parquet(landiq_parquet) |>
Copy link

Copilot AI Feb 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The function get_site_crop_info uses the arrow package to read parquet files but doesn't explicitly require it in the function documentation or check for its availability. If arrow is not installed, this will fail with an obscure namespace error. Consider adding a check like if (!requireNamespace("arrow", quietly = TRUE)) with a helpful error message, or document the arrow dependency clearly.

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

noted..

dplyr::filter(.data$year == .env$year, .data$season == .env$season) |>
dplyr::select("UniqueID", "CLASS", "SUBCLASS")

crops <- dplyr::distinct(crops, .data$UniqueID, .keep_all = TRUE)

pft_tbl <- readr::read_csv(pft_table_csv, show_col_types = FALSE) |>
dplyr::select("crop_type", "crop_code", "crop_desc", "pft_group") |>
dplyr::mutate(crop_code = as.character(.data$crop_code))

crosswalk <- readr::read_csv(crosswalk_csv, show_col_types = FALSE)

matched <- dplyr::left_join(dp, crops, by = c("uid_padded" = "UniqueID"))
matched <- dplyr::left_join(
matched, pft_tbl,
by = c("CLASS" = "crop_type", "SUBCLASS" = "crop_code")
)
matched <- dplyr::left_join(
matched, crosswalk,
by = c("crop_desc" = "landiq"),
suffix = c("", "_xwalk")
)
matched$lookup_name <- dplyr::coalesce(
matched$uc_anr, matched$frep, matched$crop_desc
)

# compost ranges from look_up_ca_compost_amendment()
compost_dat <- PEcAn.data.land::look_up_ca_compost_amendment(compost_material)
if (nrow(compost_dat) == 0) {
PEcAn.logger::logger.severe(
"No compost data for '", compost_material, "'"
)
}
# select widest C:N range row for the material
compost_row <- compost_dat[which.max(compost_dat$cn_max - compost_dat$cn_min), ]

# N rate per site
result <- purrr::pmap_dfr(
list(
site_id = matched$site_id,
uid = matched$uid_padded,
pft_orig = matched$pft,
crop_name = matched$lookup_name,
pft_grp = matched$pft_group
),
function(site_id, uid, pft_orig, crop_name, pft_grp) {
empty_row <- tibble::tibble(
site_id = site_id, UniqueID = uid, pft = pft_orig,
crop_name = crop_name,
pft_group = pft_grp %||% NA_character_,
min_n_g_m2 = NA_real_, max_n_g_m2 = NA_real_,
lookup_source = "unmatched"
)
if (is.na(crop_name)) return(empty_row)

rate <- suppressWarnings(
PEcAn.data.land::look_up_ca_n_rate(crop_name, unit = "g_m2")
)
if (nrow(rate) == 0) {
empty_row$lookup_source <- "no_rate"
return(empty_row)
}

tibble::tibble(
site_id = site_id, UniqueID = uid, pft = pft_orig,
crop_name = crop_name, pft_group = rate$pft_group[1],
min_n_g_m2 = rate$min_n[1], max_n_g_m2 = rate$max_n[1],
lookup_source = "crop_specific"
)
}
)

# fallback: pft-level medians, then global median
needs_fallback <- is.na(result$min_n_g_m2)
if (any(needs_fallback)) {
pft_medians <- result |>
dplyr::filter(!is.na(.data$min_n_g_m2)) |>
dplyr::summarize(
med_min = median(.data$min_n_g_m2),
med_max = median(.data$max_n_g_m2),
.by = "pft"
)
global_min <- median(result$min_n_g_m2, na.rm = TRUE)
global_max <- median(result$max_n_g_m2, na.rm = TRUE)

for (i in which(needs_fallback)) {
pft_match <- pft_medians[pft_medians$pft == result$pft[i], ]
if (nrow(pft_match) > 0) {
result$min_n_g_m2[i] <- pft_match$med_min[1]
result$max_n_g_m2[i] <- pft_match$med_max[1]
result$lookup_source[i] <- "pft_fallback"
} else {
result$min_n_g_m2[i] <- global_min
result$max_n_g_m2[i] <- global_max
result$lookup_source[i] <- "global_fallback"
}
}
PEcAn.logger::logger.warn(
sum(needs_fallback), " of ", nrow(result),
" sites used fallback N rates"
)
}

# attach compost ranges
result$compost_c_min_g_m2 <- compost_row$total_c_min_g_m2
result$compost_c_max_g_m2 <- compost_row$total_c_max_g_m2
result$compost_cn_min <- compost_row$cn_min
result$compost_cn_max <- compost_row$cn_max
result$compost_material <- compost_material

PEcAn.logger::logger.info(
"Crop lookup: ", sum(result$lookup_source == "crop_specific"),
" crop-specific, ",
sum(result$lookup_source %in% c("pft_fallback", "global_fallback")),
" fallback"
)
PEcAn.logger::logger.info(
"Compost: ", compost_material,
" (C: ", round(compost_row$total_c_min_g_m2, 1),
"-", round(compost_row$total_c_max_g_m2, 1), " g/m2",
", C:N: ", compost_row$cn_min, "-", compost_row$cn_max, ")"
)

result
}
Loading