From 6a17a667771f6bb7ab1db4638f19fb76e6e42e63 Mon Sep 17 00:00:00 2001 From: Martin Hinz Date: Wed, 29 May 2024 11:57:34 +0200 Subject: [PATCH 1/3] changed some functions to speed up parsing --- DESCRIPTION | 7 +-- R/xronos-api.R | 121 ++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 103 insertions(+), 25 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index afaebf4..f9558fb 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -27,16 +27,17 @@ License: MIT + file LICENSE Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.2 +RoxygenNote: 7.3.1 Imports: - jsonlite, + rjson, httr, rlang, purrr, utils, cli, dplyr, - countrycode + countrycode, + tibble Suggests: testthat (>= 3.0.0), covr, diff --git a/R/xronos-api.R b/R/xronos-api.R index 2dd6e0e..8535476 100644 --- a/R/xronos-api.R +++ b/R/xronos-api.R @@ -88,29 +88,105 @@ xronos_query <- function(filter, values) { #' @noRd xronos_parse <- function(response) { content <- httr::content(response, as = "text") - result <- jsonlite::parse_json(content) - - # Flatten nested attributes - result <- purrr::map_depth(result, 2, - ~purrr::map_at(., c("measurement", "periods"), - ~list(unlist(., use.names = FALSE)))) - result <- purrr::map_depth(result, 2, - ~purrr::map_at(., c("measurement", "typochronological_units"), - ~list(unlist(., use.names = FALSE)))) - result <- purrr::map_depth(result, 2, - ~purrr::map_at(., c("measurement", "ecochronological_units"), - ~list(unlist(., use.names = FALSE)))) - - # Normalise & rectangle - result <- purrr::map(result, normalise_empty) - result <- purrr::map_dfr(result, "measurement") - - result + result <- rjson::fromJSON(content) + + print("Processing samples...") + + # Initialize a lightweight progress bar + pb <- utils::txtProgressBar(min = 0, max = length(result), style = 3) + + measurements <- lapply(seq_along(result), function(i) { + utils::setTxtProgressBar(pb, i) + process_measurement(result[[i]]$measurement) + }) + + # Combine list of named lists into a data frame and then convert to tibble + result_df <- dplyr::bind_rows(measurements) + + result_df <- lapply(result_df, normalise_empty) + + columns_are <- c(id = NA_integer_, + labnr = NA_character_, + bp = NA_integer_, + std = NA_integer_, + cal_bp = NA_integer_, + cal_std = NA_integer_, + delta_c13 = NA_real_, + source_database = NA_character_, + lab_name = NA_character_, + material = NA_character_, + species = NA_character_, + feature = NA_character_, + feature_type = NA_character_, + site = NA_character_, + country = NA_character_, + lat = NA_character_, + lng = NA_character_, + site_type = NA_character_, + periods = NA, + typochronological_units = NA, + ecochronological_units = NA, + reference = NA + ) + + return(dplyr::as_tibble(result_df) |> + tibble::add_column(!!!columns_are[setdiff(names(columns_are), + names(result_df))]) |> + dplyr::select(names(columns_are)) + ) } # Helpers ----------------------------------------------------------------- +#' XRONOS web address +#' +#' Helper function to process each measurement +#' +#' @param measurement An individual measurement from the response as JSON +#' +#' @return +#' A row of parsed JSON for an individual measurement. +#' @keywords internal +#' @noRd +process_measurement <- function(measurement) { + + periods <- unlist( + lapply( + measurement$periods, + function(x) x$periode + ), + use.names = FALSE) + + typochronological_units <- unlist( + lapply( + measurement$typochronological_units, + function(x) x$typochronological_unit + ), + use.names = FALSE + ) + + ecochronological_units <- unlist( + lapply( + measurement$ecochronological_units, + function(x) x$ecochronological_unit + ), + use.names = FALSE + ) + + reference <- lapply( + measurement$reference, + function(x) list(reference = x$reference) + ) + + measurement$periods <- list(periods) + measurement$typochronological_units <- list(typochronological_units) + measurement$ecochronological_units <- list(ecochronological_units) + measurement$reference <- reference + + return(measurement) +} + #' XRONOS web address #' #' Returns the current base URL of XRONOS, or the address of the API version @@ -175,9 +251,10 @@ xronos_assert_valid_filter <- function(x) { #' @noRd normalise_empty <- function(x) { if (is.list(x)) { - if (length(x) == 0) NA - else purrr::map(x, normalise_empty) + lapply(x, normalise_empty) + } else if (is.null(x)) { + NA + } else { + x } - else if (is.null(x)) NA - else x } From 50f2c9eafb5b905aabbc8b54f42b1d850b938084 Mon Sep 17 00:00:00 2001 From: Martin Hinz Date: Mon, 10 Jun 2024 15:12:51 +0200 Subject: [PATCH 2/3] removed progress bar and hard coded list of attributes, solves #8 --- DESCRIPTION | 5 ++--- R/xronos-api.R | 36 +----------------------------------- 2 files changed, 3 insertions(+), 38 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index f9558fb..b06493a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: xronos Type: Package Title: Client for the 'XRONOS' Chronological Database -Version: 0.1.1.9000 +Version: 0.1.1.9001 Authors@R: c( person(given = "Martin", family = "Hinz", role = "aut", @@ -36,8 +36,7 @@ Imports: utils, cli, dplyr, - countrycode, - tibble + countrycode Suggests: testthat (>= 3.0.0), covr, diff --git a/R/xronos-api.R b/R/xronos-api.R index 8535476..ced508f 100644 --- a/R/xronos-api.R +++ b/R/xronos-api.R @@ -90,13 +90,7 @@ xronos_parse <- function(response) { content <- httr::content(response, as = "text") result <- rjson::fromJSON(content) - print("Processing samples...") - - # Initialize a lightweight progress bar - pb <- utils::txtProgressBar(min = 0, max = length(result), style = 3) - measurements <- lapply(seq_along(result), function(i) { - utils::setTxtProgressBar(pb, i) process_measurement(result[[i]]$measurement) }) @@ -105,35 +99,7 @@ xronos_parse <- function(response) { result_df <- lapply(result_df, normalise_empty) - columns_are <- c(id = NA_integer_, - labnr = NA_character_, - bp = NA_integer_, - std = NA_integer_, - cal_bp = NA_integer_, - cal_std = NA_integer_, - delta_c13 = NA_real_, - source_database = NA_character_, - lab_name = NA_character_, - material = NA_character_, - species = NA_character_, - feature = NA_character_, - feature_type = NA_character_, - site = NA_character_, - country = NA_character_, - lat = NA_character_, - lng = NA_character_, - site_type = NA_character_, - periods = NA, - typochronological_units = NA, - ecochronological_units = NA, - reference = NA - ) - - return(dplyr::as_tibble(result_df) |> - tibble::add_column(!!!columns_are[setdiff(names(columns_are), - names(result_df))]) |> - dplyr::select(names(columns_are)) - ) + return(dplyr::as_tibble(result_df)) } From eb377fba089d12225ce3e456dac85cd5ee10485a Mon Sep 17 00:00:00 2001 From: Joe Roe Date: Wed, 5 Feb 2025 11:34:16 +0100 Subject: [PATCH 3/3] Fix versions --- DESCRIPTION | 4 ++-- xronos.Rproj | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index b06493a..7d218a1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: xronos Type: Package Title: Client for the 'XRONOS' Chronological Database -Version: 0.1.1.9001 +Version: 0.1.1.9000 Authors@R: c( person(given = "Martin", family = "Hinz", role = "aut", @@ -27,7 +27,7 @@ License: MIT + file LICENSE Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 Imports: rjson, httr, diff --git a/xronos.Rproj b/xronos.Rproj index 270314b..5131c92 100644 --- a/xronos.Rproj +++ b/xronos.Rproj @@ -1,4 +1,5 @@ Version: 1.0 +ProjectId: dde9acba-558b-41ef-ac0d-d54b8e2d9578 RestoreWorkspace: Default SaveWorkspace: Default