|
| 1 | +library(syntheticadam) |
| 2 | +library(digest) |
| 3 | + |
| 4 | +# copy config_workflow file in the root folder |
| 5 | +file.copy("inst/config_workflow.yml", "./") |
| 6 | + |
| 7 | +ignore_checksum <- c("ad_adlb_param_tab.R") # templates where we do not compare checksums |
| 8 | + |
| 9 | +# read folder from config_workflow.yml file and create folders |
| 10 | +current_script_directory <- getwd() |
| 11 | + |
| 12 | +config <- yaml::read_yaml("./config_workflow.yml") |
| 13 | +esub_path <- config[["paths"]][["adam"]][["esub"]] |
| 14 | + |
| 15 | +adam_path <- config[["paths"]][["adam"]][["data"]] |
| 16 | +adam_path_parquet_expected <- gsub("adams", "adams_expected", config[["paths"]][["adam"]][["data"]]) |
| 17 | +adam_path_parquet_actual <- config[["paths"]][["adam"]][["data"]] |
| 18 | +templates_dir <- file.path("inst", "templates") |
| 19 | + |
| 20 | +dir.create(esub_path, showWarnings = FALSE, recursive = TRUE) |
| 21 | +dir.create(adam_path, showWarnings = FALSE, recursive = TRUE) |
| 22 | +dir.create(adam_path_parquet_actual, showWarnings = FALSE, recursive = TRUE) |
| 23 | +dir.create(adam_path_parquet_expected, showWarnings = FALSE, recursive = TRUE) |
| 24 | + |
| 25 | +################################################### |
| 26 | +# 1. Retrieve syntheticadam data (expected data) |
| 27 | +################################################### |
| 28 | + |
| 29 | +# read all data from syntheticadam package and store them as parquet files on expected data dir |
| 30 | +datasets <- data(package = "syntheticadam") |
| 31 | +dataset_names <- datasets$results[, "Item"] |
| 32 | +for (dataset_name in dataset_names) { |
| 33 | + data_obj <- get(dataset_name, pos = "package:syntheticadam") |
| 34 | + parquet_file <- file.path(adam_path_parquet_expected, paste0(dataset_name, ".parquet")) |
| 35 | + arrow::write_parquet(data_obj, parquet_file) |
| 36 | +} |
| 37 | + |
| 38 | +######################################################## |
| 39 | +# 2. Run snakemake workflow, and retrieve failed rules |
| 40 | +######################################################## |
| 41 | + |
| 42 | +# clean previous snakemake logs |
| 43 | +logs_dir <- "inst/templates/logs/" |
| 44 | +unlink(".snakemake", recursive = TRUE) |
| 45 | +dir.create(logs_dir, recursive = TRUE) |
| 46 | + |
| 47 | +output <- system2(c("snakemake", "-F", "-j8", "--snakefile", "./inst/Snakefile", "all"), stdout = TRUE, stderr = TRUE) |
| 48 | +cat(paste(output, collapse = "\n")) |
| 49 | + |
| 50 | +# Loop over logs files in logs dir - check erors cases |
| 51 | +log_files <- list.files(logs_dir, pattern = ".*\\.log$", full.names = TRUE) |
| 52 | + |
| 53 | +# Find the Snakemake log file in .snakemake/log directory |
| 54 | +failed_logs_paths <- c() |
| 55 | +warnings_logs_paths <- c() |
| 56 | +for (log_file in log_files) { |
| 57 | + # retrieve failed rules |
| 58 | + snakemake_log <- readLines(log_file) |
| 59 | + snakemake_log <- paste(snakemake_log, collapse = "\n") |
| 60 | + if (grepl("Execution halted", snakemake_log)) { |
| 61 | + failed_logs_paths <- c(failed_logs_paths, log_file) |
| 62 | + } |
| 63 | + if (grepl("Warning message:", snakemake_log)) { |
| 64 | + warnings_logs_paths <- c(warnings_logs_paths, log_file) |
| 65 | + } |
| 66 | +} |
| 67 | + |
| 68 | + |
| 69 | +if (length(failed_logs_paths) > 0) { |
| 70 | + # exit code 1 will end up with error in the gitlab-ci pipelines |
| 71 | + print("Some templates failed") |
| 72 | + for (log_path in failed_logs_paths) { |
| 73 | + cat(sprintf("\nTemplate %s failed\n", gsub(".log", "", basename(log_path)))) |
| 74 | + failed_rule_log <- readLines(log_path) |
| 75 | + cat(paste(failed_rule_log, collapse = "\n")) |
| 76 | + } |
| 77 | + q(status = 1) |
| 78 | +} |
| 79 | + |
| 80 | +# check also other potential errors in the snakemake cmd |
| 81 | +if (!is.null(attr(output, "status"))) { |
| 82 | + print("Snakemake cmd failed") |
| 83 | + cat(paste(output, collapse = "\n")) |
| 84 | + q(status = 1) |
| 85 | +} |
| 86 | + |
| 87 | +############################################################# |
| 88 | +# 3. Compare syntheticadam data with actual templates data |
| 89 | +############################################################# |
| 90 | + |
| 91 | +# list produced datasets and compare them to syntheticadam data |
| 92 | +actual_paths <- list.files(adam_path_parquet_actual, pattern = "*.parquet") |
| 93 | +diff_templates <- c() |
| 94 | +for (parquet_path in actual_paths) { |
| 95 | + data_actual <- arrow::read_parquet(file.path(adam_path_parquet_actual, parquet_path)) |
| 96 | + data_expected <- arrow::read_parquet(file.path(adam_path_parquet_expected, parquet_path)) |
| 97 | + output_diff <- diffdf::diffdf(compare = data_actual, base = data_expected) |
| 98 | + if (diffdf::diffdf_has_issues(output_diff)) { |
| 99 | + # display differences in case differences found by diffdf() |
| 100 | + diff_templates <- c(diff_templates, list(list(parquet_path = parquet_path, diff = output_diff))) |
| 101 | + } |
| 102 | +} |
| 103 | + |
| 104 | + |
| 105 | +# exit code 125 will end up with warnings in the gitlab-ci pipelines |
| 106 | +if (length(diff_templates) > 0) { |
| 107 | + print("Some templates have differences with syntheticadam package :") |
| 108 | + for (d in diff_templates) { |
| 109 | + print(sprintf("Differences detected for data %s", d$parquet_path)) |
| 110 | + print(d$diff) |
| 111 | + } |
| 112 | +} |
| 113 | + |
| 114 | +if (length(warnings_logs_paths) > 0) { |
| 115 | + # exit code 1 will end up with error in the gitlab-ci pipelines |
| 116 | + print("Some templates ends-up with warnings") |
| 117 | + for (log_path in warnings_logs_paths) { |
| 118 | + cat(sprintf("\nWarning detected on Template log %s\n", gsub(".log", "", basename(log_path)))) |
| 119 | + # warn_rule_log <- readLines(log_path) |
| 120 | + # cat(paste(warn_rule_log, collapse='\n')) |
| 121 | + } |
| 122 | +} |
0 commit comments