Skip to content

Commit 770e462

Browse files
authored
Merge pull request #10 from pythonhealthdatascience/dev
Dev
2 parents 3276219 + 60728b6 commit 770e462

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+4838
-284
lines changed

.github/workflows/r_tests.yaml

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
name: r_tests
2+
run-name: Run R tests
3+
4+
on:
5+
push:
6+
branches: [main]
7+
workflow_dispatch:
8+
9+
jobs:
10+
tests:
11+
runs-on: ubuntu-latest
12+
13+
env:
14+
RENV_CONFIG_PAK_ENABLED: true
15+
16+
steps:
17+
- uses: actions/checkout@v4
18+
19+
- name: Set up R
20+
uses: r-lib/actions/setup-r@v2
21+
with:
22+
use-public-rspm: true
23+
r-version: 4.4.1
24+
25+
- name: Restore renv from root
26+
run: |
27+
Rscript -e 'renv::restore(project = ".")'
28+
29+
- name: Run testthat tests
30+
run: |
31+
Rscript -e 'renv::activate()'
32+
Rscript -e 'devtools::load_all("examples/r_package")'
33+
Rscript -e 'testthat::test_dir("examples/r_package/tests/testthat")'

CONTRIBUTING.md

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,21 @@ This file is for contributors. It describes how the `hdruk_tests` site is set-up
44

55
## Example code
66

7-
The example code is contained in `examples/python_package/`.
7+
The example code is contained in `examples`.
88

9-
We want to be able to show individual functions without imports in the quarto website, so we have a script `tools/extract_snippets.py` which extracts each function without imports into individual `.py` files within `pages/code/`.
9+
We want to be able to show individual functions without imports in the quarto website, so we have a script `tools/extract_snippets.py` which extracts each function without imports into individual `.py` files within `pages/code/`. This is run each time the site is built via Quarto's `pre-render` hook.
1010

11-
This is run each time the site is built via Quarto's `pre-render` hook.
11+
Example commands for the python package:
12+
13+
* `pip install -e examples/python_package`
14+
* `pytest examples/python_package`
15+
16+
Example commands for the R package (having first opened R console by running `R` - escaped with `quit()`):
17+
18+
* `devtools::document("examples/r_package")`
19+
* `devtools::check("examples/r_package")`
20+
* `withr::with_dir("examples/r_package", {usethis::use_mit_license()})`
21+
* ` devtools::test("examples/r_package")`
1222

1323
## Rendering the quarto site
1424

DESCRIPTION

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
Title: hdruk_tests
22
Imports:
3+
covr
4+
devtools
35
knitr
6+
patrick
47
reticulate
58
rmarkdown
69
testthat

_quarto.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ project:
22
type: website
33
pre-render:
44
- python tools/extract_snippets.py
5+
- python tools/extract_rsnippets.py
56

67
website:
78
title: "Testing in Research Workflows"

examples/r_package/.Rbuildignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
^LICENSE\.md$

examples/r_package/DESCRIPTION

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
Package: waitingtimes
2+
Title: waitingtimes
3+
Version: 0.0.0.9000
4+
Authors@R:
5+
person("Amy", "Heather", , "a.heather2@exeter.ac.uk", role = c("aut", "cre"))
6+
Description: Writing tests for a simple waiting times analysis.
7+
License: MIT + file LICENSE
8+
Encoding: UTF-8
9+
Roxygen: list(markdown = TRUE)
10+
RoxygenNote: 7.3.3
11+
Imports:
12+
devtools,
13+
dplyr,
14+
lubridate,
15+
readr,
16+
roxygen2,
17+
usethis
18+
Suggests:
19+
testthat (>= 3.0.0)
20+
Config/testthat/edition: 3

examples/r_package/LICENSE

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
YEAR: 2026
2+
COPYRIGHT HOLDER: Amy Heather and Tom Monks

examples/r_package/LICENSE.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# MIT License
2+
3+
Copyright (c) 2026 Amy Heather and Tom Monks
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.

examples/r_package/NAMESPACE

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# Generated by roxygen2: do not edit by hand
2+
3+
export(calculate_wait_times)
4+
export(import_patient_data)
5+
export(summary_stats)
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
# Functions to import, process, and summarise patient waiting time data.
2+
3+
library(readr)
4+
library(dplyr)
5+
library(lubridate)
6+
7+
8+
#' Import raw patient data and check that required columns are present.
9+
#'
10+
#' Raises an error if the CSV file does not contain exactly the expected
11+
#' columns in the expected order.
12+
#'
13+
#' @param path Character string giving path to the CSV file containing the
14+
#' patient data.
15+
#'
16+
#' @return A data frame containing the raw patient-level data.
17+
#'
18+
#' @export
19+
import_patient_data <- function(path) {
20+
df <- readr::read_csv(path, show_col_types = FALSE)
21+
22+
# Expected columns in the raw data (names and order must match)
23+
expected <- c(
24+
"PATIENT_ID",
25+
"ARRIVAL_DATE", "ARRIVAL_TIME",
26+
"SERVICE_DATE", "SERVICE_TIME"
27+
)
28+
if (!identical(colnames(df), expected)) {
29+
stop(
30+
sprintf(
31+
"Unexpected columns: %s (expected %s)",
32+
paste(colnames(df), collapse = ", "),
33+
paste(expected, collapse = ", ")
34+
)
35+
)
36+
}
37+
38+
return(df)
39+
}
40+
41+
42+
#' Add arrival/service datetimes and waiting time in minutes.
43+
#'
44+
#' @param df Data frame with patient-level data containing `ARRIVAL_DATE`,
45+
#' `ARRIVAL_TIME`, `SERVICE_DATE`, and `SERVICE_TIME` columns.
46+
#'
47+
#' @return A copy of the input data frame with additional columns:
48+
#' `arrival_datetime`, `service_datetime`, and `waittime`.
49+
#'
50+
#' @export
51+
calculate_wait_times <- function(df) {
52+
df <- df |>
53+
dplyr::mutate(
54+
arrival_datetime = lubridate::ymd_hm(
55+
paste(
56+
as.character(ARRIVAL_DATE),
57+
sprintf("%04d", as.integer(ARRIVAL_TIME))
58+
)
59+
),
60+
service_datetime = lubridate::ymd_hm(
61+
paste(
62+
as.character(SERVICE_DATE),
63+
sprintf("%04d", as.integer(SERVICE_TIME))
64+
)
65+
)
66+
)
67+
68+
if (any(is.na(df$arrival_datetime) | is.na(df$service_datetime))) {
69+
stop(
70+
"Failed to parse arrival or service datetimes; ",
71+
"check for missing or invalid dates/times."
72+
)
73+
}
74+
75+
df <- df |>
76+
dplyr::mutate(
77+
waittime = as.numeric(
78+
difftime(service_datetime, arrival_datetime, units = "mins")
79+
)
80+
)
81+
82+
df
83+
}
84+
85+
86+
#' Calculate mean, standard deviation and 95% confidence interval (CI).
87+
#'
88+
#' CI is calculated using the t-distribution, which is appropriate for
89+
#' small samples and converges to the normal distribution as the sample
90+
#' size increases.
91+
#'
92+
#' @param data Numeric vector of data to use in the calculation.
93+
#'
94+
#' @return A named list with elements `mean`, `std_dev`, `ci_lower` and
95+
#' `ci_upper`. Each value is a numeric, or `NA` if it can't be computed.
96+
#'
97+
#' @export
98+
summary_stats <- function(data) {
99+
tibble::tibble(value = data) |>
100+
dplyr::reframe(
101+
n_complete = sum(!is.na(value)),
102+
mean = mean(value, na.rm = TRUE),
103+
std_dev = stats::sd(value, na.rm = TRUE),
104+
ci_lower = {
105+
if (n_complete < 2L) {
106+
NA_real_
107+
} else if (std_dev == 0 || is.na(std_dev)) {
108+
mean # CI collapses to mean when no variation
109+
} else {
110+
stats::t.test(value)$conf.int[1L]
111+
}
112+
},
113+
ci_upper = {
114+
if (n_complete < 2L) {
115+
NA_real_
116+
} else if (std_dev == 0 || is.na(std_dev)) {
117+
mean # CI collapses to mean when no variation
118+
} else {
119+
stats::t.test(value)$conf.int[2L]
120+
}
121+
}
122+
) |>
123+
dplyr::select(-n_complete) |>
124+
as.list()
125+
}

0 commit comments

Comments
 (0)