diff --git a/compass-nd/10_connectome_slurm_array_missing.bash b/compass-nd/10_connectome_slurm_array_missing.bash new file mode 100644 index 0000000..87b7985 --- /dev/null +++ b/compass-nd/10_connectome_slurm_array_missing.bash @@ -0,0 +1,45 @@ +#!/bin/bash +#SBATCH --account=def-pbellec +#SBATCH --output=/lustre04/scratch/nclarke/logs/%x_%A.out +#SBATCH --error=/lustre04/scratch/nclarke/logs/%x_%A.out +#SBATCH --cpus-per-task=1 + +GIGA_CONNECTOME_VERSION=0.4.1 +GIGA_CONNECTOME=/home/${USER}/projects/rrg-pbellec/containers/giga_connectome-${GIGA_CONNECTOME_VERSION}.simg +FMRIPREP_DIR=/lustre04/scratch/${USER}/compass-nd_fmriprep-20.2.7lts/bids_release_7/fmriprep-20.2.7lts +CONNECTOME_OUTPUT=/lustre04/scratch/${USER}/compass-nd_connectome-${GIGA_CONNECTOME_VERSION} +WORKINGDIR="${CONNECTOME_OUTPUT}/working_directory" + +module load apptainer + +mkdir -p $WORKINGDIR + +PARTICIPANT_LABEL=$(sed -n "${SLURM_ARRAY_TASK_ID}p" ${participant_labels}) +PARTICIPANT_OUTPUT="${CONNECTOME_OUTPUT}/${PARTICIPANT_LABEL}" + +echo "${FMRIPREP_DIR}" +if [ -d "${FMRIPREP_DIR}" ]; then + mkdir -p ${WORKINGDIR} + mkdir -p ${SLURM_TMPDIR} + mkdir -p ${PARTICIPANT_OUTPUT} + echo "Running ${PARTICIPANT_LABEL} connectomes" + echo "=========${STRATEGY}=========" + echo "${ATLAS}" + apptainer run \ + --bind ${FMRIPREP_DIR}:/data/input \ + --bind ${SLURM_TMPDIR}:/data/output \ + --bind ${WORKINGDIR}:/data/working \ + ${GIGA_CONNECTOME} \ + -w /data/working \ + --atlas ${ATLAS} \ + --denoise-strategy ${STRATEGY} \ + ${INTRANETWORK_FLAG} \ + /data/input \ + /data/output \ + participant \ + --participant_label ${PARTICIPANT_LABEL} + exitcode=$? # catch exit code + if [ $exitcode -eq 0 ] ; then rsync -rltv --info=progress2 ${SLURM_TMPDIR}/*.h5 ${PARTICIPANT_OUTPUT} ; fi +else + echo "no preprocessed data for ${DATASET}" +fi \ No newline at end of file diff --git a/compass-nd/10_submit_connectome_array_missing.sh b/compass-nd/10_submit_connectome_array_missing.sh new file mode 100644 index 0000000..930ece2 --- /dev/null +++ b/compass-nd/10_submit_connectome_array_missing.sh @@ -0,0 +1,38 @@ +#!/bin/bash +#SBATCH --account=def-pbellec + +DATASET="compass-nd" +CONNECTOME_OUTPUT=/lustre04/scratch/${USER}/compass-nd_connectome-0.4.1 + +STRATEGIES=("simple" "simple+gsr" "scrubbing.2" "scrubbing.2+gsr" "scrubbing.5" "scrubbing.5+gsr" "acompcor50") +ATLASES=("DiFuMo" "Schaefer20187Networks" "MIST") + +for strategy in "${STRATEGIES[@]}"; do + for atlas in "${ATLASES[@]}"; do + mem=8G + time="00:15:00" + INTRANETWORK_FLAG="--calculate-intranetwork-average-correlation" + + if [ "${atlas}" == "DiFuMo" ]; then + time="00:25:00" + mem=12G + INTRANETWORK_FLAG="" + fi + + # Generate the participant_labels file based on the missing file information + missing_file="missing_atlas-${atlas}_desc-${strategy}.h5.txt" + participant_labels="/lustre03/project/rrg-pbellec/nclarke/giga_preprocess2/compass-nd/${missing_file}" + echo "participant_labels: ${participant_labels}" + + # Determine the array size based on the number of directories missing + array_size=$(wc -l < "$participant_labels") + echo "ARRAY_SIZE: ${array_size}" + + echo "Submitting ${atlas} ${strategy}" + sbatch \ + --time=${time} --mem-per-cpu=${mem} --array=1-${array_size}\ + --job-name=${DATASET}_${atlas}_${strategy} \ + --export=DATASET=${DATASET},ATLAS=${atlas},STRATEGY=${strategy},participant_labels=${participant_labels}\ + ./connectome_slurm_array_missing.bash + done +done diff --git a/compass-nd/11_archive_connectome.sh b/compass-nd/11_archive_connectome.sh new file mode 100644 index 0000000..c9cc0e2 --- /dev/null +++ b/compass-nd/11_archive_connectome.sh @@ -0,0 +1,18 @@ +#!/bin/bash +#SBATCH --account=rrg-pbellec +#SBATCH --job-name=conn_archive +#SBATCH --output=/lustre04/scratch/nclarke/logs/compass-nd_conn_archive.out +#SBATCH --error=/lustre04/scratch/nclarke/logs/compass-nd_conn_archive.err +#SBATCH --time=10:00:00 +#SBATCH --cpus-per-task=1 +#SBATCH --mem-per-cpu=8G + + +CONNECTOME_PATH="/lustre04/scratch/${USER}/compass-nd_connectome-0.4.1" +DATASET_NAME=`basename $CONNECTOME_PATH` + +ARCHIVE_PATH="/lustre03/nearline/6035398/giga_preprocessing_2/compass-nd_fmriprep-20.2.7lts" + +cd ${CONNECTOME_PATH} +echo $PWD +tar -vcf ${ARCHIVE_PATH}/${DATASET_NAME}.tar.gz . diff --git a/compass-nd/1_create_dataset_description.py b/compass-nd/1_create_dataset_description.py new file mode 100644 index 0000000..327848e --- /dev/null +++ b/compass-nd/1_create_dataset_description.py @@ -0,0 +1,16 @@ +import json + +# Path to directory where dataset_description.json will be created +path = "/home/nclarke/scratch/compass-nd/bids_release_7/" + +# JSON file content +json_content = {"Name": "COMPASS-ND", "BIDSVersion": "1.9.0"} + +# Serialize JSON +json_object = json.dumps(json_content, indent=2) + +# Write to dataset_description.json +with open(path + "dataset_description.json", "w") as file: + file.write(json_object) + +print(f"dataset_description.json created at'{path}'") diff --git a/compass-nd/2_create_task-rest_bold.py b/compass-nd/2_create_task-rest_bold.py new file mode 100644 index 0000000..072f170 --- /dev/null +++ b/compass-nd/2_create_task-rest_bold.py @@ -0,0 +1,16 @@ +import json + +# Path to directory where task-rest_bold.json will be created +path = "/home/nclarke/scratch/compass-nd/bids_release_7/" + +# JSON file content +json_content = {"TaskName": "rest"} + +# Serialize JSON +json_object = json.dumps(json_content, indent=2) + +# Write to dataset_description.json +with open(path + "task-rest_bold.json", "w") as file: + file.write(json_object) + +print(f"task-rest_bold.json created at'{path}'") diff --git a/compass-nd/3_create_bidsignore.sh b/compass-nd/3_create_bidsignore.sh new file mode 100644 index 0000000..d456ea6 --- /dev/null +++ b/compass-nd/3_create_bidsignore.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +cd /home/nclarke/scratch/compass-nd/bids_release_7 + +cat < .bidsignore +fmap/ +dwi/ +*FLAIR* +*PD* + +EOF + +echo 'Done!' diff --git a/compass-nd/4_generate_slurm_script.sh b/compass-nd/4_generate_slurm_script.sh new file mode 100644 index 0000000..ba56f1d --- /dev/null +++ b/compass-nd/4_generate_slurm_script.sh @@ -0,0 +1,36 @@ +#!/bin/bash +#SBATCH --mem-per-cpu=12288 +#SBATCH --time=02:00:00 + +CONTAINER_PATH="/lustre03/project/6003287/containers" +VERSION="20.2.7" +EMAIL=${SLACK_EMAIL_BOT} + +module load singularity/3.8 +echo "Create fmriprep-slurm scripts for COMPASS-ND" + +DATASET_PATH="/lustre04/scratch/${USER}/compass-nd/bids_release_7" +echo $DATASET_PATH +time=`date +%s` +OUTPUT_PATH="/lustre04/scratch/nclarke/compass-nd_fmriprep-${VERSION}lts" + +mkdir -p $OUTPUT_PATH + +# run BIDS validator on the dataset +# you only need this done once +singularity exec -B ${DATASET_PATH}:/DATA \ + ${CONTAINER_PATH}/fmriprep-${VERSION}lts.sif bids-validator /DATA \ + > ${OUTPUT_PATH}/bids_validator.log + +# running the script from the current directory, reference +# fmriprep_slurm_singularity_run.bash from one level up +bash ../scripts/fmriprep_slurm_singularity_run.bash \ + ${OUTPUT_PATH} \ + ${DATASET_PATH} \ + fmriprep-${VERSION}lts \ + --fmriprep-args=\"--ignore slicetiming fieldmaps\" \ + --email=${EMAIL} \ + --time=24:00:00 \ + --mem-per-cpu=12288 \ + --cpus=1 \ + --container fmriprep-${VERSION}lts diff --git a/compass-nd/5_archive_fmriprep.sh b/compass-nd/5_archive_fmriprep.sh new file mode 100644 index 0000000..8248519 --- /dev/null +++ b/compass-nd/5_archive_fmriprep.sh @@ -0,0 +1,20 @@ +#!/bin/bash +#SBATCH --account=rrg-pbellec +#SBATCH --job-name=fmriprep_archive +#SBATCH --output=/home/nclarke/scratch/logs/compass-nd_fmriprep_archive.out +#SBATCH --error=/home/nclarke/scratch/logs/compass-nd_fmriprep_archive.err +#SBATCH --time=72:00:00 +#SBATCH --cpus-per-task=1 +#SBATCH --mem-per-cpu=8G + + +FMRIPREP_PATH="/lustre04/scratch/${USER}/compass-nd_fmriprep-20.2.7lts" +DATASET_NAME=`basename $FMRIPREP_PATH` + +ARCHIVE_PATH="/lustre03/nearline/6035398/giga_preprocessing_2/${DATASET_NAME}" + +mkdir -p $ARCHIVE_PATH + +cd ${FMRIPREP_PATH} +echo $PWD +tar -vcf ${ARCHIVE_PATH}/${DATASET_NAME}.tar.gz . diff --git a/compass-nd/6_submit_qc_participant.sh b/compass-nd/6_submit_qc_participant.sh new file mode 100644 index 0000000..7e7383d --- /dev/null +++ b/compass-nd/6_submit_qc_participant.sh @@ -0,0 +1,29 @@ +#!/bin/bash +#SBATCH --account=def-pbellec +#SBATCH --job-name=compass-nd_qc +#SBATCH --output=/lustre04/scratch/nclarke/logs/%x_%A.out +#SBATCH --error=/lustre04/scratch/nclarke/logs/%x_%A.out +#SBATCH --time=00:10:00 +#SBATCH --cpus-per-task=1 +#SBATCH --mem=5G +#SBATCH --array=1-784 + +module load apptainer + +GIGA_QC_VERSION=0.3.3 +FMRIPREP_DIR=/lustre04/scratch/${USER}/compass-nd_fmriprep-20.2.7lts/bids_release_7/fmriprep-20.2.7lts +GIGA_AUTO_QC_CONTAINER=/lustre03/project/rrg-pbellec/${USER}/giga_preprocess2/giga_auto_qc-${GIGA_QC_VERSION}.simg +QC_OUTPUT=/lustre04/scratch/${USER}/compass-nd_giga_auto_qc-${GIGA_QC_VERSION} +participant_labels=/home/${USER}/projects/rrg-pbellec/nclarke/giga_preprocess2/compass-nd/participant_labels.txt # One subject number per line + +mkdir -p $QC_OUTPUT + +PARTICIPANT_LABEL=$(sed -n "${SLURM_ARRAY_TASK_ID}p" ${participant_labels}) + +# Create a directory for participant +PARTICIPANT_OUTPUT="${QC_OUTPUT}/${PARTICIPANT_LABEL}" +mkdir -p $PARTICIPANT_OUTPUT + +echo "Running ${PARTICIPANT_LABEL} QC" + +apptainer run --cleanenv -B ${FMRIPREP_DIR}:/inputs -B ${PARTICIPANT_OUTPUT}:/outputs ${GIGA_AUTO_QC_CONTAINER} /inputs /outputs participant --participant_label ${PARTICIPANT_LABEL} diff --git a/compass-nd/7_archive_qc.sh b/compass-nd/7_archive_qc.sh new file mode 100644 index 0000000..6c1bf41 --- /dev/null +++ b/compass-nd/7_archive_qc.sh @@ -0,0 +1,20 @@ +#!/bin/bash +#SBATCH --account=def-pbellec +#SBATCH --job-name=qc_archive +#SBATCH --output=/lustre04/scratch/nclarke/logs/compass-nd_qc_archive.out +#SBATCH --error=/lustre04/scratch/nclarke/logs/compass-nd_qc_archive.err +#SBATCH --time=01:00:00 +#SBATCH --cpus-per-task=1 +#SBATCH --mem-per-cpu=8G + + +RAW_PATH="/lustre04/scratch/nclarke/compass-nd_giga_auto_qc-0.3.3" +DATASET_NAME=`basename $RAW_PATH` + +ARCHIVE_PATH="/lustre03/nearline/6035398/giga_preprocessing_2/compass-nd_fmriprep-20.2.7lts" + +mkdir -p $ARCHIVE_PATH + +cd ${RAW_PATH} +echo $PWD +tar -vcf ${ARCHIVE_PATH}/${DATASET_NAME}.tar.gz . diff --git a/compass-nd/8_submit_connectome.sh b/compass-nd/8_submit_connectome.sh new file mode 100644 index 0000000..6e80d03 --- /dev/null +++ b/compass-nd/8_submit_connectome.sh @@ -0,0 +1,58 @@ +#!/bin/bash +#SBATCH --account=def-pbellec +#SBATCH --output=/lustre04/scratch/nclarke/logs/%x_%A.out +#SBATCH --error=/lustre04/scratch/nclarke/logs/%x_%A.out +#SBATCH --cpus-per-task=1 +#SBATCH --time=02:30:00 +#SBATCH --cpus-per-task=1 +#SBATCH --mem=12G +#SBATCH --array=1-784 + +GIGA_CONNECTOME_VERSION=0.4.1 +GIGA_CONNECTOME=/home/${USER}/projects/rrg-pbellec/containers/giga_connectome-${GIGA_CONNECTOME_VERSION}.simg +FMRIPREP_DIR=/lustre04/scratch/${USER}/compass-nd_fmriprep-20.2.7lts/bids_release_7/fmriprep-20.2.7lts +CONNECTOME_OUTPUT=/lustre04/scratch/${USER}/compass-nd_connectome-${GIGA_CONNECTOME_VERSION} +WORKINGDIR=${CONNECTOME_OUTPUT}/working_directory +participant_labels=/home/${USER}/projects/rrg-pbellec/nclarke/giga_preprocess2/compass-nd/participant_labels.txt # One subject number per line + +module load apptainer + +PARTICIPANT_LABEL=$(sed -n "${SLURM_ARRAY_TASK_ID}p" ${participant_labels}) +PARTICIPANT_OUTPUT="${CONNECTOME_OUTPUT}/${PARTICIPANT_LABEL}" + +mkdir -p $WORKINGDIR + +# Create participant-specific directory +mkdir -p ${PARTICIPANT_OUTPUT} + +# Define strategies and atlases +STRATEGIES=("acompcor50" "simple" "simple+gsr" "scrubbing.2" "scrubbing.2+gsr" "scrubbing.5" "scrubbing.5+gsr") +ATLASES=("Schaefer20187Networks" "MIST" "DiFuMo") + +# Loop through each strategy and atlas +for STRATEGY in "${STRATEGIES[@]}"; do + for ATLAS in "${ATLASES[@]}"; do + # Set AFC flag based on atlas + if [[ "$ATLAS" == "DiFuMo" ]]; then + INTRANETWORK_FLAG="" + else + INTRANETWORK_FLAG="--calculate-intranetwork-average-correlation" + fi + echo "Running ${PARTICIPANT_LABEL} with ${ATLAS} ${STRATEGY}" + apptainer run \ + --bind ${FMRIPREP_DIR}:/data/input \ + --bind ${SLURM_TMPDIR}:/data/output \ + --bind ${WORKINGDIR}:/data/working \ + ${GIGA_CONNECTOME} \ + -w /data/working \ + --atlas ${ATLAS} \ + --denoise-strategy ${STRATEGY} \ + ${INTRANETWORK_FLAG} \ + /data/input \ + /data/output \ + participant \ + --participant_label ${PARTICIPANT_LABEL} + exitcode=$? # catch exit code + if [ $exitcode -eq 0 ] ; then rsync -rltv --info=progress2 ${SLURM_TMPDIR}/*.h5 ${PARTICIPANT_OUTPUT} ; fi + done +done diff --git a/compass-nd/9_check_participant_missing_h5.py b/compass-nd/9_check_participant_missing_h5.py new file mode 100644 index 0000000..f06f255 --- /dev/null +++ b/compass-nd/9_check_participant_missing_h5.py @@ -0,0 +1,55 @@ +import os + +# Directory containing sub-directories +main_directory = "/home/nclarke/scratch/compass-nd_connectome-0.4.1" + +# List of file endings to check for +file_endings_to_check = [ + "atlas-DiFuMo_desc-acompcor50.h5", + "atlas-MIST_desc-scrubbing.5.h5", + "atlas-DiFuMo_desc-scrubbing.2+gsr.h5", + "atlas-MIST_desc-simple+gsr.h5", + "atlas-DiFuMo_desc-scrubbing.2.h5", + "atlas-MIST_desc-simple.h5", + "atlas-DiFuMo_desc-scrubbing.5+gsr.h5", + "atlas-Schaefer20187Networks_desc-acompcor50.h5", + "atlas-DiFuMo_desc-scrubbing.5.h5", + "atlas-Schaefer20187Networks_desc-scrubbing.2+gsr.h5", + "atlas-DiFuMo_desc-simple+gsr.h5", + "atlas-Schaefer20187Networks_desc-scrubbing.2.h5", + "atlas-DiFuMo_desc-simple.h5", + "atlas-Schaefer20187Networks_desc-scrubbing.5+gsr.h5", + "atlas-MIST_desc-acompcor50.h5", + "atlas-Schaefer20187Networks_desc-scrubbing.5.h5", + "atlas-MIST_desc-scrubbing.2+gsr.h5", + "atlas-Schaefer20187Networks_desc-simple+gsr.h5", + "atlas-MIST_desc-scrubbing.2.h5", +] + + +# Directories to exclude +excluded_directories = {"working_directory"} + + +# Create a dictionary to store missing directories for each file ending +missing_directories_dict = {} + +# Loop over sub-directories and check for files +for directory in os.listdir(main_directory): + if directory not in excluded_directories: + sub_directory = os.path.join(main_directory, directory) + if os.path.isdir(sub_directory): + for ending in file_endings_to_check: + if not any(f.endswith(ending) for f in os.listdir(sub_directory)): + if ending not in missing_directories_dict: + missing_directories_dict[ending] = [] + missing_directories_dict[ending].append(directory) + +# Create a separate file for each file ending listing the missing directories +for ending, missing_directories in missing_directories_dict.items(): + output_file = f"missing_{ending}.txt" + with open(output_file, "w") as f: + for directory in missing_directories: + f.write(directory + "\n") + print(f"Missing file: {ending}") + print(f"Number of directories missing it: {len(missing_directories)}") diff --git a/compass-nd/README.md b/compass-nd/README.md new file mode 100644 index 0000000..3540b9e --- /dev/null +++ b/compass-nd/README.md @@ -0,0 +1,33 @@ +# COMPASS-ND-fmriprep-slurm +Scripts for preprocessing the Comprehensive Assessment of Neurodegeneration and Dementia (COMPASS-ND) Study with fMRIPrep2.2.7lts. +## Dependency +- [fmriprep-slurm](https://simexp-documentation.readthedocs.io/en/latest/giga_preprocessing/preprocessing.html) +- Python +## Retrieving data +Data was downloaded from [here](https://ccna.loris.ca/) after submitting a data access request and receiving approval. + +## BIDS validation fixes +- `1_create_dataset_description.py` + - creates `dataset_description.json` at the root directory with minimal information + - BIDS version used is unknown, so specified latest version (same as adni, see [discussion]( https://neurostars.org/t/what-bids-version-to-use-for-legacy-dataset/25619)). +- `2_create_task-rest_bold.py` + - creates `task-rest_bold.json` at the root directory, detailing bold task +- `3_create_bids_ignore.sh` + - creates `.bidsignore` at the root and adds relevant lines + +## Run fMRIPrep +- `4_generate_slurm_script.sh` +- `5_archive_fmriprep.sh` + +## Run QC +- `6_submit_qc_participant.sh` + - Run once first with one participant only and `--reindex-bids` flag. +- `7_archive_qc.sh` + +## Generate connectomes +- `8_submit_connectome.sh` loops through each participant and generates connectomes for each atlas and strategy pair. + - Run once first with one participant only and `--reindex-bids` flag. + - Do `ls -d sub-* | grep -v '\.html$' | sed 's/sub-//' > /home/${USER}/participant_labels.txt`. +- `9_check_participant_missing_h5.py` checks which subjects failed in the previous step. +- `10_connectome_slurm_array_missing.bash` and `10_submit_connectome_array_missing.sh` submit any failed atlas/strategy pairs one by one. Use the portal to check if jobs are timed out/ OOM and adjust accordingly, but if they failed after that will be due to no frames left after scrubbing. +- `11_archive_connectome.sh` \ No newline at end of file diff --git a/compass-nd/submit_qc_scrub5.sh b/compass-nd/submit_qc_scrub5.sh new file mode 100644 index 0000000..5deb892 --- /dev/null +++ b/compass-nd/submit_qc_scrub5.sh @@ -0,0 +1,29 @@ +#!/bin/bash +#SBATCH --account=def-pbellec +#SBATCH --job-name=qc +#SBATCH --output=/lustre04/scratch/nclarke/logs/%x_%A.out +#SBATCH --error=/lustre04/scratch/nclarke/logs/%x_%A.out +#SBATCH --time=00:15:00 +#SBATCH --cpus-per-task=1 +#SBATCH --mem=4G +#SBATCH --array=1-784 + +module load apptainer + +FMRIPREP_DIR=/lustre04/scratch/${USER}/compass-nd_fmriprep-20.2.7lts/bids_release_7/fmriprep-20.2.7lts +GIGA_AUTO_QC_CONTAINER=/lustre03/project/rrg-pbellec/${USER}/giga_preprocess2/giga_auto_qc-0.3.3.simg +QC_OUTPUT=/lustre04/scratch/${USER}/compass-nd_giga_auto_qc-0.3.3_scrub.5 +QC_PARAMS=/lustre03/project/rrg-pbellec/${USER}/giga_preprocess2/qc_params_scrub5.json +participant_labels=/lustre03/project/rrg-pbellec/${USER}/giga_preprocess2/compass-nd/participant_labels.txt # One subject number per line + +mkdir -p $QC_OUTPUT + +PARTICIPANT_LABEL=$(sed -n "${SLURM_ARRAY_TASK_ID}p" ${participant_labels}) + +# Create a directory for participant +PARTICIPANT_OUTPUT="${QC_OUTPUT}/${PARTICIPANT_LABEL}" +mkdir -p $PARTICIPANT_OUTPUT + +echo "Running ${PARTICIPANT_LABEL} QC" + +apptainer run --cleanenv -B ${QC_PARAMS} -B ${FMRIPREP_DIR}:/inputs -B ${PARTICIPANT_OUTPUT}:/outputs ${GIGA_AUTO_QC_CONTAINER} /inputs /outputs --quality_control_parameters ${QC_PARAMS} participant --participant_label ${PARTICIPANT_LABEL} diff --git a/prisme/ColeAnticevic12.json b/prisme/ColeAnticevic12.json new file mode 100644 index 0000000..44d7844 --- /dev/null +++ b/prisme/ColeAnticevic12.json @@ -0,0 +1,11 @@ +{ + "name": "coleanticevic", + "parameters": { + "atlas": "coleanticevic", + "template": "MNI152NLin2009cAsym", + "resolution": "1", + "suffix": "dseg" + }, + "desc":"12parcels", + "templateflow_dir": null +} diff --git a/prisme/README.md b/prisme/README.md new file mode 100644 index 0000000..1787e3a --- /dev/null +++ b/prisme/README.md @@ -0,0 +1,21 @@ +# PRISME QC and connectome generation + +For more info on connectome workflow see the giga-connectome [docs](https://giga-connectome.readthedocs.io/en/latest/usage.html). + +1. Run QC (see [repo](https://github.com/SIMEXP/giga_auto_qc)) +- `submit_qc.sh` + +2. Organise atlases according to TemplateFlow convention +- `ColeAnticevic12.json` & `brainnetome.json`: config files +- `copy_rename_CABNP.py` & `copy_rename_BA.py`: copy atlas files and rename. + +3. Submit connectome generation scripts on Beluga +- `submit_connectome_CABNP.sh` & `submit_connectome_BA.sh` + +4. Generate a .tsv connectome per participant from the .h5 file +- `h5_to_tsv_prisme.py`: uses [this repo](https://github.com/SIMEXP/rs-autoregression-prediction). + +5. Copy data to Elm `/data/orban/data/prisme-connectomes` + +6. Archive on Beluga nearline - TO DO + diff --git a/prisme/brainnetome.json b/prisme/brainnetome.json new file mode 100644 index 0000000..704c969 --- /dev/null +++ b/prisme/brainnetome.json @@ -0,0 +1,11 @@ +{ + "name": "brainnetome", + "parameters": { + "atlas": "brainnetome", + "template": "MNI152NLin2009cAsym", + "resolution": "02", + "suffix": "dseg" + }, + "desc":"246parcels", + "templateflow_dir": null +} diff --git a/prisme/copy_rename_BA.py b/prisme/copy_rename_BA.py new file mode 100644 index 0000000..10c28c3 --- /dev/null +++ b/prisme/copy_rename_BA.py @@ -0,0 +1,29 @@ +import json +import shutil +from pathlib import Path + +root_p = Path("/home/neuromod/prisme/BA") +json_file_p = "brainnetome.json" +nifti_file_p = "BN_Atlas_246_2mm.nii.gz" +output_dir = root_p +output_dir.mkdir(parents=True, exist_ok=True) + +with open(json_file_p, "r") as f: + config = json.load(f) + +# Extract parameters +template = config["parameters"]["template"] +resolution = config["parameters"]["resolution"] +atlas = config["parameters"]["atlas"] +suffix = config["parameters"]["suffix"] +desc = config["desc"] + +# Construct new filename +new_filename = ( + f"tpl-{template}_res-{resolution}_atlas-{atlas}_desc-{desc}_{suffix}.nii.gz" +) + +# Copy and rename the atlas +output_file_path = output_dir / new_filename +shutil.copy(nifti_file_p, output_file_path) +print(f"Copied and renamed file to: {output_file_path}") diff --git a/prisme/copy_rename_CABNP.py b/prisme/copy_rename_CABNP.py new file mode 100644 index 0000000..87a6103 --- /dev/null +++ b/prisme/copy_rename_CABNP.py @@ -0,0 +1,30 @@ +import json +import shutil +from pathlib import Path + +root_p = Path("/home/neuromod/prisme/CAB-NP") +json_file_p = "ColeAnticevic12.json" +nifti_file_p = "CAB-NP_volumetric_liberal_reordered.nii.gz" +output_dir = root_p +output_dir.mkdir(parents=True, exist_ok=True) + +with open(json_file_p, "r") as f: + config = json.load(f) + +# Extract parameters +template = config["parameters"]["template"] +resolution = config["parameters"]["resolution"] +atlas = config["parameters"]["atlas"] +suffix = config["parameters"]["suffix"] +desc = config["desc"] + +# Construct new filename +new_filename = ( + f"tpl-{template}_res-{resolution}_atlas-{atlas}_desc-{desc}_{suffix}.nii.gz" +) +output_file_p = output_dir / new_filename + +# Copy (no need to compress) +shutil.copy(nifti_file_p, output_file_p) + +print(f"Renamed file to: {output_file_p}") diff --git a/prisme/h5_to_tsv_prisme.py b/prisme/h5_to_tsv_prisme.py new file mode 100644 index 0000000..0bb82ac --- /dev/null +++ b/prisme/h5_to_tsv_prisme.py @@ -0,0 +1,78 @@ +import sys +import pandas as pd + +sys.path.append("/home/neuromod/rs-autoregression-prediction/src") +from data.load_data import load_data, load_h5_data_path +from pathlib import Path + + +def label_connectome(connectome_df, labels_p, atlas): + if atlas == "CAB-NP": + labels_df = pd.read_csv(labels_p, delimiter=";") + labels_df.sort_values(by=["index"], inplace=True) + region_labels = labels_df["name"].tolist() + elif atlas == "BA": + labels_df = pd.read_csv(labels_p) + labels_df.sort_values(by=["Label"], inplace=True) + region_labels = labels_df["region"].tolist() + else: + raise ValueError(f"Unknown atlas: {atlas}") + + connectome_df.columns = region_labels + connectome_df.index = region_labels + return connectome_df + + +def save_connectome_to_tsv(connectome, path, output_p): + filename = f"{Path(path).stem}.tsv" + filepath = output_p / filename + output_p.mkdir(parents=True, exist_ok=True) + connectome.to_csv(filepath, sep="\t") + print(f"Saved: {filepath}") + + +def process_connectomes(atlas, root_p): + if atlas == "CAB-NP": + h5_file = ( + root_p + / "prisme-connectomes/CAB-NP/prisme_connectome_CABNP-0.4.1_20250110/atlas-coleanticevic_desc-simple+gsr.h5" + ) + labels_p = root_p / "prisme/CAB-NP/CAB-NP_labels_reordered.csv" + output_p = ( + root_p + / "prisme-connectomes/CAB-NP/tsv_files_atlas-coleanticevic_desc-simple+gsr_20250110" + ) + elif atlas == "BA": + h5_file = ( + root_p + / "prisme-connectomes/BA/prisme_connectome-0.4.1_BA_20241218/atlas-brainnetome_desc-simple+gsr.h5" + ) + labels_p = root_p / "prisme/BA/subregion_func_network_Yeo_updated.csv" + output_p = ( + root_p + / "prisme-connectomes/BA/tsv_files_atlas-BA_desc-simple+gsr_20241218" + ) + else: + raise ValueError(f"Unknown atlas: {atlas}") + + # Load data + dset_paths = load_h5_data_path( + path=h5_file, data_filter="connectome", shuffle=False + ) + data = load_data( + path=h5_file, h5dset_path=dset_paths, standardize=False, dtype="data" + ) + + # Process each connectome + for connectome, path in zip(data, dset_paths): + connectome_df = pd.DataFrame(connectome) + connectome_df = label_connectome(connectome_df, labels_p, atlas) + save_connectome_to_tsv(connectome_df, path, output_p) + + print(f"Connectomes processed and saved to {output_p}") + + +if __name__ == "__main__": + root_p = Path("/home/neuromod") + for atlas in ["CAB-NP"]: # ["BA", "CAB-NP"] + process_connectomes(atlas, root_p) diff --git a/prisme/submit_connectome_BA.sh b/prisme/submit_connectome_BA.sh new file mode 100644 index 0000000..f15500e --- /dev/null +++ b/prisme/submit_connectome_BA.sh @@ -0,0 +1,49 @@ +#!/bin/bash +#SBATCH --account=rrg-pbellec +#SBATCH --job-name=prisme_BA_conn +#SBATCH --output=/lustre04/scratch/nclarke/logs/%x_%A.out +#SBATCH --error=/lustre04/scratch/nclarke/logs/%x_%A.err +#SBATCH --cpus-per-task=1 +#SBATCH --time=22:00:00 +#SBATCH --cpus-per-task=1 +#SBATCH --mem=36G + +GIGA_CONNECTOME_VERSION=0.4.1 +GIGA_CONNECTOME=/lustre03/project/6003287/containers/giga_connectome-${GIGA_CONNECTOME_VERSION}.simg +FMRIPREP_DIR=/lustre03/project/6003287/${USER}/prisme.fmriprep +CONNECTOME_OUTPUT=/lustre04/scratch/${USER}/prisme_connectome-${GIGA_CONNECTOME_VERSION}_BA_20241218 +WORKINGDIR=${CONNECTOME_OUTPUT}/working_directory +ATLAS_CONFIG=/lustre03/project/6003287/${USER}/giga_preprocess2/prisme/brainnetome.json +ATLAS_PATH=/home/${USER}/.cache/templateflow +export APPTAINERENV_TEMPLATEFLOW_HOME=/templateflow + +module load apptainer + +mkdir -p ${WORKINGDIR} + +echo "${FMRIPREP_DIR}" +if [ -d "${FMRIPREP_DIR}" ]; then + mkdir -p ${WORKINGDIR} + mkdir -p ${SLURM_TMPDIR} + mkdir -p ${CONNECTOME_OUTPUT}/ + echo "=========${STRATEGY}=========" + apptainer run \ + --bind ${FMRIPREP_DIR}:/data/input \ + --bind ${SLURM_TMPDIR}:/data/output \ + --bind ${WORKINGDIR}:/data/working \ + --bind ${ATLAS_CONFIG}:/data/brainnetome.json \ + --bind ${ATLAS_PATH}:/templateflow \ + -B /lustre03:/lustre03 \ + ${GIGA_CONNECTOME} \ + --reindex-bids \ + -w /data/working \ + --atlas /data/brainnetome.json \ + --denoise-strategy simple+gsr \ + /data/input \ + /data/output \ + group + exitcode=$? # catch exit code + if [ $exitcode -eq 0 ] ; then rsync -rltv --info=progress2 ${SLURM_TMPDIR}/*.h5 ${CONNECTOME_OUTPUT} ; fi +else + echo "no preprocessed data for ${FMRIPREP_DIR}" +fi diff --git a/prisme/submit_connectome_CABNP.sh b/prisme/submit_connectome_CABNP.sh new file mode 100644 index 0000000..ea66fee --- /dev/null +++ b/prisme/submit_connectome_CABNP.sh @@ -0,0 +1,49 @@ +#!/bin/bash +#SBATCH --account=rrg-pbellec +#SBATCH --job-name=prisme_CABNP_conn +#SBATCH --output=/lustre04/scratch/nclarke/logs/%x_%A.out +#SBATCH --error=/lustre04/scratch/nclarke/logs/%x_%A.err +#SBATCH --cpus-per-task=1 +#SBATCH --time=22:00:00 +#SBATCH --cpus-per-task=1 +#SBATCH --mem=34G + +GIGA_CONNECTOME_VERSION=0.4.1 +GIGA_CONNECTOME=/lustre03/project/6003287/containers/giga_connectome-${GIGA_CONNECTOME_VERSION}.simg +FMRIPREP_DIR=/lustre03/project/6003287/${USER}/prisme.fmriprep +CONNECTOME_OUTPUT=/lustre04/scratch/${USER}/prisme_connectome_CABNP-${GIGA_CONNECTOME_VERSION}_20250110 +WORKINGDIR=${CONNECTOME_OUTPUT}/working_directory +ATLAS_CONFIG=/lustre03/project/6003287/${USER}/giga_preprocess2/prisme/ColeAnticevic12.json +ATLAS_PATH=/home/${USER}/.cache/templateflow +export APPTAINERENV_TEMPLATEFLOW_HOME=/templateflow + +module load apptainer + +mkdir -p ${WORKINGDIR} + +echo "${FMRIPREP_DIR}" +if [ -d "${FMRIPREP_DIR}" ]; then + mkdir -p ${WORKINGDIR} + mkdir -p ${SLURM_TMPDIR} + mkdir -p ${CONNECTOME_OUTPUT}/ + echo "=========${STRATEGY}=========" + apptainer run \ + --bind ${FMRIPREP_DIR}:/data/input \ + --bind ${SLURM_TMPDIR}:/data/output \ + --bind ${WORKINGDIR}:/data/working \ + --bind ${ATLAS_CONFIG}:/data/ColeAnticevic12.json \ + --bind ${ATLAS_PATH}:/templateflow \ + -B /lustre03:/lustre03 \ + ${GIGA_CONNECTOME} \ + --reindex-bids \ + -w /data/working \ + --atlas /data/ColeAnticevic12.json \ + --denoise-strategy simple+gsr \ + /data/input \ + /data/output \ + group + exitcode=$? # catch exit code + if [ $exitcode -eq 0 ] ; then rsync -rltv --info=progress2 ${SLURM_TMPDIR}/*.h5 ${CONNECTOME_OUTPUT} ; fi +else + echo "no preprocessed data for ${FMRIPREP_DIR}" +fi diff --git a/prisme/submit_qc.sh b/prisme/submit_qc.sh new file mode 100644 index 0000000..3ee7a4f --- /dev/null +++ b/prisme/submit_qc.sh @@ -0,0 +1,21 @@ +#!/bin/bash +#SBATCH --account=def-pbellec +#SBATCH --job-name=prisme_qc +#SBATCH --output=/lustre04/scratch/nclarke/logs/%x_%A.out +#SBATCH --error=/lustre04/scratch/nclarke/logs/%x_%A.out +#SBATCH --time=10:00:00 +#SBATCH --cpus-per-task=1 +#SBATCH --mem=6G + +module load apptainer + +GIGA_QC_VERSION=0.3.4 +FMRIPREP_DIR=/lustre03/project/6003287/${USER}/prisme.fmriprep +GIGA_AUTO_QC_CONTAINER=/lustre03/project/6003287/containers/giga_auto_qc_unstable_update.simg +QC_OUTPUT=/lustre04/scratch/${USER}/prisme_giga_auto_qc-${GIGA_QC_VERSION}_20241029 + +mkdir -p $QC_OUTPUT + +echo "Running QC" + +apptainer run --cleanenv -B ${FMRIPREP_DIR}:/inputs -B ${QC_OUTPUT}:/outputs -B /lustre03:/lustre03 ${GIGA_AUTO_QC_CONTAINER} --reindex-bids /inputs /outputs group diff --git a/prisme/submit_qc_scrub5.sh b/prisme/submit_qc_scrub5.sh new file mode 100644 index 0000000..31acd8b --- /dev/null +++ b/prisme/submit_qc_scrub5.sh @@ -0,0 +1,22 @@ +#!/bin/bash +#SBATCH --account=def-pbellec +#SBATCH --job-name=prisme_qc +#SBATCH --output=/lustre04/scratch/nclarke/logs/%x_%A.out +#SBATCH --error=/lustre04/scratch/nclarke/logs/%x_%A.out +#SBATCH --time=10:00:00 +#SBATCH --cpus-per-task=1 +#SBATCH --mem=6G + +module load apptainer + +GIGA_QC_VERSION=0.3.4 +FMRIPREP_DIR=/lustre04/scratch/${USER}/prisme.fmriprep +GIGA_AUTO_QC_CONTAINER=/lustre03/project/6003287/containers/giga_auto_qc_unstable_update.simg +QC_OUTPUT=/lustre04/scratch/${USER}/prisme_giga_auto_qc-${GIGA_QC_VERSION}_scrub5 +QC_PARAMS=/lustre03/project/rrg-pbellec/${USER}/giga_preprocess2/qc_params_scrub5.json + +mkdir -p $QC_OUTPUT + +echo "Running QC" + +apptainer run --cleanenv -B ${QC_PARAMS}:/tmp/qc_params_scrub5.json -B ${FMRIPREP_DIR}:/inputs -B ${QC_OUTPUT}:/outputs -B /lustre03:/lustre03 ${GIGA_AUTO_QC_CONTAINER} --reindex-bids /inputs /outputs --quality_control_parameters ${QC_PARAMS} group