From e5aedda51ae8167284ae5537dda71f93574edf28 Mon Sep 17 00:00:00 2001 From: Bill Denney Date: Thu, 23 Apr 2026 22:37:47 +0000 Subject: [PATCH] data-raw: handle NCI stamp URL 403 gracefully at call site MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The NCI EVS server now returns HTTP 403 for the Publication Date Stamp text files. Wrap fetch_current_release_date() in tryCatch() in both fetch_ct_sdtm.R and fetch_ct_adam.R so that a 403 (or any download error) is logged as a message and the script falls back to archive-only date discovery rather than aborting CI. fetch_current_release_date() itself remains a hard failure (no change to utils_nci.R) — any caller without a tryCatch will still fail loudly. The all_dates construction now excludes NA to prevent a spurious NA date entering the processing loop. An additional guard skips the update entirely if both the stamp and the archive listing are unreachable. Co-Authored-By: Claude Sonnet 4.6 --- data-raw/fetch_ct_adam.R | 85 +++++++++++++++++++++---------------- data-raw/fetch_ct_sdtm.R | 91 +++++++++++++++++++++++----------------- 2 files changed, 101 insertions(+), 75 deletions(-) diff --git a/data-raw/fetch_ct_adam.R b/data-raw/fetch_ct_adam.R index 71190f9..4920a3c 100644 --- a/data-raw/fetch_ct_adam.R +++ b/data-raw/fetch_ct_adam.R @@ -25,49 +25,62 @@ if (file.exists(existing_path)) { } # ── Find versions to add ─────────────────────────────────────────────────────── -current_date <- fetch_current_release_date("adam") +current_date <- tryCatch( + fetch_current_release_date("adam"), + error = function(e) { + message(sprintf( + "Could not fetch ADaM CT release date stamp: %s\nFalling back to archive listing only.", + e$message + )) + as.Date(NA_character_) + } +) archive_dates <- list_archive_dates("adam") -all_dates <- sort(unique(c(archive_dates, current_date))) - -dates_needed <- all_dates[ - !vapply(all_dates, function(d) { - already_have_version(ct_adam, d) || file.exists(raw_ct_path("adam", d)) - }, logical(1L)) -] +all_dates <- sort(unique(c(archive_dates, if (!is.na(current_date)) current_date))) -if (length(dates_needed) == 0L) { - message("ADaM CT is already up to date.") +if (length(all_dates) == 0L) { + message("Could not reach NCI server. Skipping ADaM CT update.") } else { - message(sprintf( - "Adding %d new ADaM CT version(s): %s", - length(dates_needed), - paste(format(dates_needed), collapse = ", ") - )) + dates_needed <- all_dates[ + !vapply(all_dates, function(d) { + already_have_version(ct_adam, d) || file.exists(raw_ct_path("adam", d)) + }, logical(1L)) + ] - # ── Apply updates in chronological order ─────────────────────────────────── - for (d in sort(dates_needed)) { - d <- as.Date(d) - message(sprintf(" Processing %s...", format(d))) + if (length(dates_needed) == 0L) { + message("ADaM CT is already up to date.") + } else { + message(sprintf( + "Adding %d new ADaM CT version(s): %s", + length(dates_needed), + paste(format(dates_needed), collapse = ", ") + )) - url <- if (identical(d, current_date)) ADAM_CURRENT else archive_url("adam", d) + # ── Apply updates in chronological order ─────────────────────────────────── + for (d in sort(dates_needed)) { + d <- as.Date(d) + message(sprintf(" Processing %s...", format(d))) - new_release <- tryCatch( - fetch_raw_ct_tbl(url, "adam", d), - error = function(e) { - warning(sprintf("Failed to fetch/parse ADaM CT %s: %s", format(d), e$message)) - NULL - } - ) + url <- if (identical(d, current_date)) ADAM_CURRENT else archive_url("adam", d) + + new_release <- tryCatch( + fetch_raw_ct_tbl(url, "adam", d), + error = function(e) { + warning(sprintf("Failed to fetch/parse ADaM CT %s: %s", format(d), e$message)) + NULL + } + ) - if (!is.null(new_release)) { - ct_adam <- apply_ct_update(ct_adam, new_release, d) - if (nrow(ct_adam) > 0L) assert_no_na_valid_from(ct_adam) - message(sprintf(" -> ct_adam now has %d rows", nrow(ct_adam))) + if (!is.null(new_release)) { + ct_adam <- apply_ct_update(ct_adam, new_release, d) + if (nrow(ct_adam) > 0L) assert_no_na_valid_from(ct_adam) + message(sprintf(" -> ct_adam now has %d rows", nrow(ct_adam))) + } } - } - # ── Save ──────────────────────────────────────────────────────────────────── - usethis::use_data(ct_adam, overwrite = TRUE, compress = "xz") - message("ct_adam saved.") - adam_updated <- TRUE + # ── Save ──────────────────────────────────────────────────────────────────── + usethis::use_data(ct_adam, overwrite = TRUE, compress = "xz") + message("ct_adam saved.") + adam_updated <- TRUE + } } diff --git a/data-raw/fetch_ct_sdtm.R b/data-raw/fetch_ct_sdtm.R index 2ead593..d39852d 100644 --- a/data-raw/fetch_ct_sdtm.R +++ b/data-raw/fetch_ct_sdtm.R @@ -25,52 +25,65 @@ if (file.exists(existing_path)) { } # ── Find versions to add ─────────────────────────────────────────────────────── -current_date <- fetch_current_release_date("sdtm") +current_date <- tryCatch( + fetch_current_release_date("sdtm"), + error = function(e) { + message(sprintf( + "Could not fetch SDTM CT release date stamp: %s\nFalling back to archive listing only.", + e$message + )) + as.Date(NA_character_) + } +) archive_dates <- list_archive_dates("sdtm") -all_dates <- sort(unique(c(archive_dates, current_date))) - -dates_needed <- all_dates[ - !vapply(all_dates, function(d) { - # A date is "done" if it already has rows in the table OR if its raw - # cache file exists (meaning it was processed but produced no new rows, - # i.e. the release was identical to the prior one). - already_have_version(ct_sdtm, d) || file.exists(raw_ct_path("sdtm", d)) - }, logical(1L)) -] +all_dates <- sort(unique(c(archive_dates, if (!is.na(current_date)) current_date))) -if (length(dates_needed) == 0L) { - message("SDTM CT is already up to date.") +if (length(all_dates) == 0L) { + message("Could not reach NCI server. Skipping SDTM CT update.") } else { - message(sprintf( - "Adding %d new SDTM CT version(s): %s", - length(dates_needed), - paste(format(dates_needed), collapse = ", ") - )) + dates_needed <- all_dates[ + !vapply(all_dates, function(d) { + # A date is "done" if it already has rows in the table OR if its raw + # cache file exists (meaning it was processed but produced no new rows, + # i.e. the release was identical to the prior one). + already_have_version(ct_sdtm, d) || file.exists(raw_ct_path("sdtm", d)) + }, logical(1L)) + ] - # ── Apply updates in chronological order ─────────────────────────────────── - for (d in sort(dates_needed)) { - d <- as.Date(d) - message(sprintf(" Processing %s...", format(d))) + if (length(dates_needed) == 0L) { + message("SDTM CT is already up to date.") + } else { + message(sprintf( + "Adding %d new SDTM CT version(s): %s", + length(dates_needed), + paste(format(dates_needed), collapse = ", ") + )) - url <- if (identical(d, current_date)) SDTM_CURRENT else archive_url("sdtm", d) + # ── Apply updates in chronological order ─────────────────────────────────── + for (d in sort(dates_needed)) { + d <- as.Date(d) + message(sprintf(" Processing %s...", format(d))) - new_release <- tryCatch( - fetch_raw_ct_tbl(url, "sdtm", d), - error = function(e) { - warning(sprintf("Failed to fetch/parse SDTM CT %s: %s", format(d), e$message)) - NULL - } - ) + url <- if (identical(d, current_date)) SDTM_CURRENT else archive_url("sdtm", d) + + new_release <- tryCatch( + fetch_raw_ct_tbl(url, "sdtm", d), + error = function(e) { + warning(sprintf("Failed to fetch/parse SDTM CT %s: %s", format(d), e$message)) + NULL + } + ) - if (!is.null(new_release)) { - ct_sdtm <- apply_ct_update(ct_sdtm, new_release, d) - if (nrow(ct_sdtm) > 0L) assert_no_na_valid_from(ct_sdtm) - message(sprintf(" -> ct_sdtm now has %d rows", nrow(ct_sdtm))) + if (!is.null(new_release)) { + ct_sdtm <- apply_ct_update(ct_sdtm, new_release, d) + if (nrow(ct_sdtm) > 0L) assert_no_na_valid_from(ct_sdtm) + message(sprintf(" -> ct_sdtm now has %d rows", nrow(ct_sdtm))) + } } - } - # ── Save ──────────────────────────────────────────────────────────────────── - usethis::use_data(ct_sdtm, overwrite = TRUE, compress = "xz") - message("ct_sdtm saved.") - sdtm_updated <- TRUE + # ── Save ──────────────────────────────────────────────────────────────────── + usethis::use_data(ct_sdtm, overwrite = TRUE, compress = "xz") + message("ct_sdtm saved.") + sdtm_updated <- TRUE + } }