|
| 1 | +# Functions to import, process, and summarise patient waiting time data. |
| 2 | + |
| 3 | +library(readr) |
| 4 | +library(dplyr) |
| 5 | +library(lubridate) |
| 6 | + |
| 7 | + |
| 8 | +#' Import raw patient data and check that required columns are present. |
| 9 | +#' |
| 10 | +#' Raises an error if the CSV file does not contain exactly the expected |
| 11 | +#' columns in the expected order. |
| 12 | +#' |
| 13 | +#' @param path Character string giving path to the CSV file containing the |
| 14 | +#' patient data. |
| 15 | +#' |
| 16 | +#' @return A data frame containing the raw patient-level data. |
| 17 | +#' |
| 18 | +#' @export |
| 19 | +import_patient_data <- function(path) { |
| 20 | + df <- readr::read_csv(path, show_col_types = FALSE) |
| 21 | + |
| 22 | + # Expected columns in the raw data (names and order must match) |
| 23 | + expected <- c( |
| 24 | + "PATIENT_ID", |
| 25 | + "ARRIVAL_DATE", "ARRIVAL_TIME", |
| 26 | + "SERVICE_DATE", "SERVICE_TIME" |
| 27 | + ) |
| 28 | + if (!identical(colnames(df), expected)) { |
| 29 | + stop( |
| 30 | + sprintf( |
| 31 | + "Unexpected columns: %s (expected %s)", |
| 32 | + paste(colnames(df), collapse = ", "), |
| 33 | + paste(expected, collapse = ", ") |
| 34 | + ) |
| 35 | + ) |
| 36 | + } |
| 37 | + |
| 38 | + return(df) |
| 39 | +} |
| 40 | + |
| 41 | + |
| 42 | +#' Add arrival/service datetimes and waiting time in minutes. |
| 43 | +#' |
| 44 | +#' @param df Data frame with patient-level data containing `ARRIVAL_DATE`, |
| 45 | +#' `ARRIVAL_TIME`, `SERVICE_DATE`, and `SERVICE_TIME` columns. |
| 46 | +#' |
| 47 | +#' @return A copy of the input data frame with additional columns: |
| 48 | +#' `arrival_datetime`, `service_datetime`, and `waittime`. |
| 49 | +#' |
| 50 | +#' @export |
| 51 | +calculate_wait_times <- function(df) { |
| 52 | + df <- df |> |
| 53 | + dplyr::mutate( |
| 54 | + arrival_datetime = lubridate::ymd_hm( |
| 55 | + paste( |
| 56 | + as.character(ARRIVAL_DATE), |
| 57 | + sprintf("%04d", as.integer(ARRIVAL_TIME)) |
| 58 | + ) |
| 59 | + ), |
| 60 | + service_datetime = lubridate::ymd_hm( |
| 61 | + paste( |
| 62 | + as.character(SERVICE_DATE), |
| 63 | + sprintf("%04d", as.integer(SERVICE_TIME)) |
| 64 | + ) |
| 65 | + ) |
| 66 | + ) |
| 67 | + |
| 68 | + if (any(is.na(df$arrival_datetime) | is.na(df$service_datetime))) { |
| 69 | + stop( |
| 70 | + "Failed to parse arrival or service datetimes; ", |
| 71 | + "check for missing or invalid dates/times." |
| 72 | + ) |
| 73 | + } |
| 74 | + |
| 75 | + df <- df |> |
| 76 | + dplyr::mutate( |
| 77 | + waittime = as.numeric( |
| 78 | + difftime(service_datetime, arrival_datetime, units = "mins") |
| 79 | + ) |
| 80 | + ) |
| 81 | + |
| 82 | + df |
| 83 | +} |
| 84 | + |
| 85 | + |
| 86 | +#' Calculate mean, standard deviation and 95% confidence interval (CI). |
| 87 | +#' |
| 88 | +#' CI is calculated using the t-distribution, which is appropriate for |
| 89 | +#' small samples and converges to the normal distribution as the sample |
| 90 | +#' size increases. |
| 91 | +#' |
| 92 | +#' @param data Numeric vector of data to use in the calculation. |
| 93 | +#' |
| 94 | +#' @return A named list with elements `mean`, `std_dev`, `ci_lower` and |
| 95 | +#' `ci_upper`. Each value is a numeric, or `NA` if it can't be computed. |
| 96 | +#' |
| 97 | +#' @export |
| 98 | +summary_stats <- function(data) { |
| 99 | + tibble::tibble(value = data) |> |
| 100 | + dplyr::reframe( |
| 101 | + n_complete = sum(!is.na(value)), |
| 102 | + mean = mean(value, na.rm = TRUE), |
| 103 | + std_dev = stats::sd(value, na.rm = TRUE), |
| 104 | + ci_lower = { |
| 105 | + if (n_complete < 2L) { |
| 106 | + NA_real_ |
| 107 | + } else if (std_dev == 0 || is.na(std_dev)) { |
| 108 | + mean # CI collapses to mean when no variation |
| 109 | + } else { |
| 110 | + stats::t.test(value)$conf.int[1L] |
| 111 | + } |
| 112 | + }, |
| 113 | + ci_upper = { |
| 114 | + if (n_complete < 2L) { |
| 115 | + NA_real_ |
| 116 | + } else if (std_dev == 0 || is.na(std_dev)) { |
| 117 | + mean # CI collapses to mean when no variation |
| 118 | + } else { |
| 119 | + stats::t.test(value)$conf.int[2L] |
| 120 | + } |
| 121 | + } |
| 122 | + ) |> |
| 123 | + dplyr::select(-n_complete) |> |
| 124 | + as.list() |
| 125 | +} |
0 commit comments