|
| 1 | +#' Get dataset's geography : codes and labels |
| 2 | +#' |
| 3 | +#' Retrieves the list of geo dimensions and all their possible values |
| 4 | +#' (codes and human-readable labels) for a given dataset. |
| 5 | +#' |
| 6 | +#' For other dimensions, refer to get_range() |
| 7 | +#' |
| 8 | +#' @param ds_name dataset name |
| 9 | +#' @param base_url_melodi API Melodi URL - default production URL |
| 10 | +#' @param lang french or english labels - default french ("fr") |
| 11 | +#' |
| 12 | +#' @return A data frame with columns `GEO_REF`, `GEO_OBJECT`, `GEO`, `GEO_LABEL` |
| 13 | +#' @export |
| 14 | +#' |
| 15 | +#' @examples |
| 16 | +#' get_range_geo("DS_POPULATIONS_REFERENCE") |
| 17 | +#' get_range("DS_TICM_PRATIQUES") |
| 18 | +get_range_geo <- function( |
| 19 | + ds_name, |
| 20 | + base_url_melodi = "https://api.insee.fr/melodi", |
| 21 | + lang = "fr" |
| 22 | +) { |
| 23 | + # check parameters |
| 24 | + if (!lang %in% c("fr", "en")) { |
| 25 | + stop("lang must be : fr or en") |
| 26 | + } |
| 27 | + url <- paste0(base_url_melodi, "/range/", ds_name) |
| 28 | + |
| 29 | + message("Request dataset range : ", url) |
| 30 | + |
| 31 | + dataset <- httr2::request(url) |> |
| 32 | + httr2::req_perform() |> |
| 33 | + httr2::resp_body_json(simplifyVector = FALSE) |
| 34 | + |
| 35 | + range <- dataset[["range"]] |
| 36 | + |
| 37 | + # Keep GEO only |
| 38 | + range <- Filter(function(x) { |
| 39 | + x[["concept"]][["code"]] == "GEO" |
| 40 | + }, range) |
| 41 | + |
| 42 | + if (length(range) == 0) { |
| 43 | + stop("Error: 'GEO' dimension is not present in the dataset.") |
| 44 | + } |
| 45 | + |
| 46 | + # for null cases (English GEO labels...) |
| 47 | + safe_extract <- function(x) { |
| 48 | + if (is.null(x)) NA else x |
| 49 | + } |
| 50 | + |
| 51 | + codebook_list <- list() |
| 52 | + |
| 53 | + for (i in seq_along(range)) { |
| 54 | + # concepts returned by the API are effectively dimensions of the dataset |
| 55 | + dimension <- range[[i]][["concept"]][["code"]] |> safe_extract() |
| 56 | + dimension_label <- range[[i]][["concept"]][["label"]][[lang]] |> safe_extract() |
| 57 | + |
| 58 | + values <- range[[i]][["values"]] |> safe_extract() |
| 59 | + |
| 60 | + for (j in seq_along(values)) { |
| 61 | + value <- values[[j]][["code"]] |> safe_extract() |
| 62 | + value_label <- values[[j]][["label"]][[lang]] |> safe_extract() |
| 63 | + value_id <- values[[j]][["id"]] |> safe_extract() |
| 64 | + |
| 65 | + # Créer la liste sans geo_object si dimension_geo est FALSE |
| 66 | + codebook_list[[length(codebook_list) + 1]] <- list( |
| 67 | + dimension = dimension, |
| 68 | + dimension_label = dimension_label, |
| 69 | + value = value, |
| 70 | + value_label = value_label, |
| 71 | + value_id = value_id) |
| 72 | + } |
| 73 | + } |
| 74 | + |
| 75 | + codebook_df <- do.call(rbind, lapply(codebook_list, as.data.frame)) |
| 76 | + rownames(codebook_df) <- NULL |
| 77 | + |
| 78 | + codebook_df <- codebook_df |> |
| 79 | + dplyr::arrange(dimension, value) |> |
| 80 | + tidyr::separate(value_id, into = c("GEO_REF", "GEO_OBJECT", "GEO"), sep = "-") |> |
| 81 | + dplyr::mutate(GEO_LABEL = value_label) |> |
| 82 | + dplyr::select(GEO_REF, GEO_OBJECT, GEO, GEO_LABEL) |> |
| 83 | + dplyr::arrange(GEO_OBJECT, GEO) |
| 84 | + |
| 85 | + return(codebook_df) |
| 86 | +} |
0 commit comments