speechmetryflow/nextflow.config at main · lingualab/speechmetryflow · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
manifest {
    name = 'lingualab/speechmetryflow'
    contributors = [
        [
            name: 'Christophe Bedetti',
            affiliation: 'Université de Montréal',
            email: '',
            github: '@cbedetti',
            contribution: ['author'], // List of contribution types ('author', 'maintainer' or 'contributor')
            orcid: '0000-0002-1443-8922',
        ],
    ]
    homePage = 'https://github.com/lingualab/speechmetryflow'
    description = 'Automated nextflow-based workflow designed to extract both audio and text metrics from speech at scale.'
    nextflowVersion = '>=24.10.4'
    version = 'v0.2.5'
}

params {
    // Pipeline options
    input = null
    audio_folder    = null
    text_folder     = null
    audio_extension = ".wav"
    text_extension  = ".txt"

    // Pipeline output
    output_dir = 'results'
    population_dir = [participant_id: 'Statistics']

    // Whisper options
    whisper_model = 'large-v3'  // Can be: tiny, base, small, medium, large
    whisper_model_dir = null

    // Uhm-O-Meter options
    uhmometer {
        preprocessing          = "None"              // choices: "None", "Band pass (300..3300 Hz)", "Reduce noise"
        silence_threshold      = -25.0               // unit: dB
        minimum_dip_near_peak  = 2.0                 // unit: dB
        minimum_pause_duration = 0.3                 // unit: s
        detect_filled_pauses   = "yes"                // choices: "yes", "no"
        language               = "English"           // choices: "English", "Dutch"
        filled_pause_threshold =  1.00               // cut-off higher/lower
        data                   = "Praat Info window" // choices: "TextGrid(s) only", "Praat Info window", "Save as text file", "Table"
        data_collection_type   = "OverWriteData"     // choices: "OverWriteData", "AppendData"
        keep_objects           = "yes"               // when_processing_files, choices: choices: "yes", "no"
        // output                 = "population_uhmometer_metrics.csv"
    }

    // opensmile options
    opensmile {
        feature_sets = [
            "GeMAPSv01a",
            "GeMAPSv01b",
            "eGeMAPSv01a",
            "eGeMAPSv01b",
            "eGeMAPSv02",
        ]
        output_base = "population_opensmile_metrics_"
    }

    // Reports options
    report_timestamp = java.time.LocalDateTime.now().format(java.time.format.DateTimeFormatter.ofPattern("yyyyMMdd-HHmmss"))
}

process {
    publishDir = [
        path: { "${params.output_dir}/${meta.participant_id}/${task.process.split(':')[0]}" },
        mode: 'copy'
    ]
    errorStrategy = 'ignore'
}

report {
  enabled = true
  file = "log/report-${params.report_timestamp}.html"
}

dag {
  enabled = true
  file = "log/dag-${params.report_timestamp}.html"
  verbose = true
}

timeline {
  enabled = true
  file = "log/timeline-${params.report_timestamp}.html"
}

profiles {

    unf_elm {
        params.container_dir    = "/data/brambati/local/containers/speechmetryflow"
        process.cpus            = 6
        workDir                 = "/scratch/$USER/speechmetryflow/work"
        includeConfig "configs/apptainer.config"
    }

}