-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathStep1a_env_setup.Rmd
More file actions
101 lines (81 loc) · 3.26 KB
/
Step1a_env_setup.Rmd
File metadata and controls
101 lines (81 loc) · 3.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
---
title: "Step1a_environment_setup"
subtitle: "Templating of folders and pipeline parameters."
author: "T. Divoll"
date: "2023-09-12"
output:
html_notebook:
df_print: paged
toc: true
params:
#don't change these
datetime: !r (format(Sys.time(), '%Y%m%dT%H:%M:%SZ'))
user: !r Sys.getenv('LOGNAME')
data_path: "/oscar/data/tkartzin/projects"
#do change params below this line as needed
sample_sheet: "sample_sheet.xlsx"
project_code: "test"
---
### Environment Setup
```{r include=FALSE}
if (!require("pacman")) install.packages("pacman")
pacman::p_load(knitr, here, tidyr, dplyr, readxl, stringr, spgs, filesstrings, lubridate, fs, rlang, rmarkdown)
here::i_am("./Step1a_env_setup.Rmd")
```
```{r setup, include=FALSE}
# set global chunk parameters here
knitr::opts_chunk$set(echo = TRUE, cache = TRUE)
do.call('Sys.setenv', params)
```
#### Set up standard path variables
**Note:** When using the provided test data for training:
Make sure you set the `project_code` param in the YAML to "test" and use the provided `sample_sheet_test.xlsx`!
```{r}
dir.create(here(params$datetime))
work_dir <- file.path(here(params$datetime))
sample_sheet <- read_xlsx(here(params$sample_sheet), col_types = "text")
```
#### Cleanup on data formats
```{r}
sample_sheet <- sample_sheet %>%
mutate(across(contains("date"), ~ ymd(.)))
sample_sheet$SampleName <- as.character(sample_sheet$SampleName)
sample_sheet$SampleID <- as.character(sample_sheet$SampleID)
if (!all(str_detect(sample_sheet$SampleType, "control|sample"))) {
stop("Error: Some types are wrong; They should be either `control` or `sample` in lowercase.")
} else {
message("All types look correctly formatted!")
}
```
The following dates are pulled from the sample sheet and can be used to filter out dates in case of contamination. The unique extraction, PCR, and sequencing dates are cast to lists that we can then index into later.
```{r}
extraction_date <- unique(sample_sheet$`Ext date`)
pcr_date <- unique(sample_sheet$`PCR date`)
sequencing_dates <- unique(sample_sheet$`Seq date`)
folders <- unique(sample_sheet$Folder)
```
Next create a folder for results for each sequencing date in the sample sheet.
```{r}
for (j in seq_along(sequencing_dates)){
dir.create(file.path(here(work_dir, sequencing_dates[j])))
dir.create(file.path(here(work_dir, sequencing_dates[j], "results")))
dir.create(file.path(here(work_dir, sequencing_dates[j], "results/id_data")))
dir.create(file.path(here(work_dir, sequencing_dates[j], "results/bad_controls_path")))
}
seq_date_dirs <- dir(file.path(here(work_dir)), pattern="^\\d{4}-\\d{2}-\\d{2}", full.names=TRUE)
for (k in seq_along(seq_date_dirs)){
dir_copy(here("template/src"), seq_date_dirs[k], overwrite=TRUE)
}
```
#### Copy over data and sample sheet to each run folder
```{r}
samples_path = file.path(params$data_path, params$project_code)
seq_date_names <- data.frame(sample_sheet %>% group_by(Folder) %>% summarise(value = unique(`Seq date`)))
seq_date_names$value <- ymd(seq_date_names$value)
for (m in seq_along(folders)){
from = file.path(samples_path, folders[m])
to = file.path(here(work_dir, seq_date_names$value[m]))
dir_copy(file.path(from, "raw_data"), file.path(to, "renamed_data"), overwrite=TRUE)
}
file.copy(from=here(params$sample_sheet), to=here(work_dir))
```