-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathparams.config
More file actions
78 lines (65 loc) · 4.53 KB
/
params.config
File metadata and controls
78 lines (65 loc) · 4.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
/*
* Default parameter values
* Can be overridden by command line (--param value) or params file (-params-file file.yml)
*/
params {
//# Input/Output paths
input = null // Path to input VCF files (required)
covarfile = null // Path to covariates file (required)
phenofile = null // Path to phenotype file (required)
//# Variable names in covariate/phenotype files
pheno_name = 'y' // Phenotype column name(s), space-separated for multiple
covar_numeric = 'SEX age_at_baseline PC1 PC2' // Numeric covariate names, space-separated. PCs calculated later will be added.
covar_categorical = '' // Categorical covariate names, space-separated (mutually exclusive with covar_numeric)
covar_interact = '' // Numeric covariate for SNP interaction testing (survival analysis only, must be in covar_numeric list)
study_arm_col = 'study_arm' // Required column defining study groups analyzed separately: If only one arm (e.g. case-only study), set all samples to same value.
time_col = 'study_days' // Time column for longitudinal/survival analysis.
// For survival analysis: If tstart and tend columns exist in phenotype file, they will be used directly.
// Otherwise, tstart will be set to 0 and tend will be set to the value of time_col.
//# Analysis model selection
longitudinal_flag = false // Longitudinal analysis (GALLOP)
survival_flag = false // Survival analysis (Cox PH)
linear_flag = true // Cross-sectional analysis (GLM)
//# Processing parameters
chunk_flag = true // Enable chunking of variants
chunk_size = 30000 // Number of variants per chunk for GWAS analysis
// Note: SPLIT_VCF process uses 3x this value (90000) for initial splitting
// to optimize parallelization, then chunks are processed at this size for GWAS
//# Genetic QC parameters
r2thres = -9 // R2 threshold for imputation quality (-9 = disabled)
minor_allele_freq = '0.05' // Minor allele frequency threshold
minor_allele_ct = '20' // Minor allele count threshold
kinship = '0.177' // Kinship threshold for relatedness filtering
ancestry = 'EUR' // Target ancestry (EUR, AFR, EAS, SAS, AMR)
assembly = 'hg19' // Genome assembly (hg19 or hg38)
//# Workflow mode
skip_pop_split = false // Skip population splitting: use when data is already ancestry-specific
// Enables efficient LD pruning per chromosome before merge
//# Analysis name identifier
analysis_name = 'TEST' // Identifier for this analysis run
genetic_data_id = null // Optional: Identifier for genetic data cache (auto-computed if null)
// Use same ID across analyses to reuse genetic QC outputs
//# Output options
mh_plot = true // Generate Manhattan plots
//# Storage configuration
// Storage root - can be local path or cloud bucket
// Priority: 1) Environment variable, 2) Command line param, 3) Current directory
STORE_ROOT = null
PROJECT_NAME = null
// Reference genomes directory
reference_dir = null
// Derived paths (computed automatically)
project_dir = null
genetic_cache_key = null // Computed: cache key for genetic data processing
}
// Compute derived paths
params.STORE_ROOT = params.STORE_ROOT ?: System.getenv('STORE_ROOT') ?: "${launchDir}"
params.PROJECT_NAME = params.PROJECT_NAME ?: System.getenv('PROJECT_NAME') ?: 'unnamed_project'
params.reference_dir = params.reference_dir ?: System.getenv('REFERENCE_DIR') ?: "${params.STORE_ROOT}/References"
params.project_dir = "${params.STORE_ROOT}/${params.PROJECT_NAME}"
// Compute genetic cache key: based on genetic data and QC params, not phenotype/analysis mode
// This allows reusing genetic QC outputs across different analyses
// Format prefix identifies input type: bed, pgen, or vcf
def skip_suffix = params.skip_pop_split ? "_skip" : ""
def format_prefix = params.input =~ /\.bed$/ ? "bed" : (params.input =~ /\.pgen$/ ? "pgen" : "vcf")
params.genetic_cache_key = params.genetic_data_id ?: "${format_prefix}_${params.ancestry}_${params.assembly}_maf${params.minor_allele_freq}_kin${params.kinship}${skip_suffix}"