From eca8a44a42dfefe13243a21701171361175dc962 Mon Sep 17 00:00:00 2001 From: Jim Pollaro Date: Thu, 19 Dec 2024 12:54:55 -0600 Subject: [PATCH] documentation --- docs/Makefile | 20 ++ docs/make.bat | 35 +++ docs/source/bibliography.rst | 3 + docs/source/conf.py | 51 ++++ docs/source/edge_level_tests.rst | 78 ++++++ docs/source/getting_started.rst | 51 ++++ docs/source/index.rst | 28 ++ docs/source/methodology.rst | 79 ++++++ docs/source/network_atlases.rst | 40 +++ docs/source/overview.rst | 162 +++++++++++ docs/source/preface.rst | 19 ++ docs/source/refs.bib | 450 +++++++++++++++++++++++++++++++ docs/source/setup.rst | 38 +++ 13 files changed, 1054 insertions(+) create mode 100644 docs/Makefile create mode 100644 docs/make.bat create mode 100644 docs/source/bibliography.rst create mode 100644 docs/source/conf.py create mode 100644 docs/source/edge_level_tests.rst create mode 100644 docs/source/getting_started.rst create mode 100644 docs/source/index.rst create mode 100644 docs/source/methodology.rst create mode 100644 docs/source/network_atlases.rst create mode 100644 docs/source/overview.rst create mode 100644 docs/source/preface.rst create mode 100644 docs/source/refs.bib create mode 100644 docs/source/setup.rst diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 00000000..d0c3cbf1 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 00000000..747ffb7b --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/source/bibliography.rst b/docs/source/bibliography.rst new file mode 100644 index 00000000..1096f096 --- /dev/null +++ b/docs/source/bibliography.rst @@ -0,0 +1,3 @@ +.. rubric:: References + +.. bibliography:: \ No newline at end of file diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 00000000..cb9e299a --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,51 @@ +import os +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +html_baseurl = os.environ.get("READTHEDOCS_CANONICAL_URL", "/") + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +project = 'Network Level Analysis Toolbox' +copyright = '2024, Muriah Wheelock' +author = 'Muriah Wheelock' + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = [ + 'sphinx.ext.intersphinx', + 'sphinxcontrib.matlab', + 'sphinx_rtd_theme', + 'sphinxcontrib.bibtex', + 'sphinx.ext.autodoc' +] +this_dir = os.path.dirname(os.path.abspath(__file__)) +matlab_src_dir = os.path.abspath(os.path.join(this_dir, '../../+nla')) + +templates_path = ['_templates'] +exclude_patterns = [] + + + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +html_theme = 'sphinx_rtd_theme' +html_static_path = ['_static'] + + +# -- Options for Intersphinx ------------------------------------------------------ +# https://www.sphinx-doc.org/en/master/usage/extensions/intersphinx.html#module-sphinx.ext.intersphinx + +intersphinx_mapping = {} + +# -- Options for bibtex ---------------------------- +# https://sphinxcontrib-bibtex.readthedocs.io/en/latest/quickstart.html#installation + +bibtex_bibfiles = ['refs.bib'] +bibtex_default_style = 'plain' +bibtex_reference_style = 'super' \ No newline at end of file diff --git a/docs/source/edge_level_tests.rst b/docs/source/edge_level_tests.rst new file mode 100644 index 00000000..257e125a --- /dev/null +++ b/docs/source/edge_level_tests.rst @@ -0,0 +1,78 @@ +Edge-level Statistical Tests +========================================== + +Methods +------------------------- + +The non-permuted method calculates the correlation of each Region Of Interest (ROI) to all other +ROIs via the given test. These results are stored as a correlation coefficient, ``coeff``, a p-value, ``prob``, +and a thresholded p-value, ``prob_sig``. The permuted method is identical except the variables have a ``_perm`` suffix. + +Common Inputs +-------------------------- + +:P: Edge-level p-value threshold +:Network Atlas: :doc:`/network_atlases` +:Functional Connectivity: Initial coorelation matrix if size N\ :sub:`ROIs`\ x N\ :sub:`ROIs`\ x N\ :sub:`scans`\. + r-values or Fisher z-transformed r-values. +:Behavior: MATLAB table (``.mat``) or tab seperated text file (``.txt``) + + ============== =================== ================ + Variable Name Next Variable Name More Variable... + ============== =================== ================ + 1 5 1.5 + 0 1 3 + 1 7 2.6 + ... ... ... + ============== =================== ================ + + Each column header is a name of a variable. + Each column contains N\ :sub:`scans`\ entries. + After loading this file, the table should display in the GUI. + The user may mark one column as 'Behavior' for the score of interest. + Other columns may be marked as 'Covariates' which are partialed prior to running statistics. + (Note: Network Level Analysis cannot handle missing values for behavior or covariates. If there are ``NaNs`` or missing values, do not select this columns) + +Provided Tests +-------------------------------- + +* **Pearson's r** + + * MATLAB `corr ` function with ``type``, ``Pearson`` +* **Spearman's** :math:`\rho`\ + + * MATLAB `corr ` function with ``type``, ``Spearman`` +* **Spearman's** :math:`\rho`\ **estimator** + + * Faster approximation of the Spearman's rho function at the cost of slightly less accurate result. + * Based on developer testing, rho values may differ by :math:`10^{-4}` and p-values by :math:`10^{-5}`. + * This error is passed on to the network-level tests, and can cause p-value difference by :math:`10^{-4}` + * These differences were found with 10,000 permutations. Less permutations results in higher error in a less evenly distributed fashion. + * This is recommended for exploratory research with the Spearman's rho function for publications +* **Kendall's** :math:`\tau`\ **-b** + + * Implements Kendall's :math:`\tau`\ -b using C code in a MATLAB MEX file (``+mex/+src/kendallTauB.c``) + * Faster implementation that stardard MATLAB code providing identical :math:`\tau`\ and p-values. + * Run-time difference from *O*\ (*n*\ :sup:`2`) to *O*\ (*n* log *n*) + * This is done with a red-black tree. +* **Welch's t-test`` + + * Implements an optomized Welch's t-test comparing the functional connectivity of two groups. + * Extra imports compared to other edge level tests + :Group name(s): Names associated with each group. (For example, 'Male' and 'Female') + :Group val(s): Behavioral value associated with each group. If 'Female' is donated as '0', and 'Male' as '1', set the vals to the numerical values. + +* **Pre-calculated data loader** + + * Allows loading of observed and permuted edge-level data the user has pre-calculated outside the NLA. + * Four ``.mat`` files needed as inputs + * p-values should be thresholded + :Observed p: ``.mat`` file containing N\ :sub:`ROI_pairs`\ x 1 matrix of logical values, the observed, thresholded edge-level p-values. + N\ :sub:`ROI_pairs`\ are the lower triangle values of a N\ :sub:`ROIs`\ x N\ :sub:`ROIs`\ matrix. + :Observed coeff: ``.mat`` file containing N\ :sub:`ROI_pairs`\ x 1 matrix of observed edge-level coefficients. + :Permuted p: ``.mat`` file containing N\ :sub:`ROI_pairs`\ x N\ :sub:`permutations`\ of logical values. Observed, thresholded, permuted p-values. + :Permuted coeff: ``.mat`` file containing N\ :sub:`ROI_pairs`\ x N\ :sub:`permutations`\ of permuted edge-level coefficients. + +Creating additional edge-level tests +----------------------------------------------- + diff --git a/docs/source/getting_started.rst b/docs/source/getting_started.rst new file mode 100644 index 00000000..19c7bf7a --- /dev/null +++ b/docs/source/getting_started.rst @@ -0,0 +1,51 @@ +Getting Started +================================================ + +Running with example data +-------------------------------------------------- + +First, open the NLA software (as described in :doc:`setup`). Select 'Pearson's r' as the edge-level +test from the edge-level test dropdown. + +Click 'Select' to choose a network atlas, navigating to the ``support_files`` folder withing your +'NetworkLevelAnalysis' installation and selecting ``Wheelock_2020_CerebralCortex_15nets_288ROI_on_MNI.mat``. +This file is used to parcellate the data. + +Then, select the functional connectivity, located in the ``examples/fc_and_behavior`` folder under the name +``sample_func_conn.mat``. Click 'Yes' to Fisher z-transform the data. Take a moment to visualize the functional +connectivity (FC) average by clicking 'View'. Note that the FC appears to match the parcellation, (effects +generally line up with network boundaries) - this can be a useful diagnostic tool if you are having issues +with parcellations not matching data. + +Finally, load the behavior ``sample_behavior.mat`` from the ``examples/fc_and_behavior`` folder (The 'file type' drop-down +will need to be changed from 'Text' to 'MATLAB table' in the file browser). Set the behavioral variable to 'Flanker_AgeAdj' by +clicking on that column in the table and then the 'Set Behavior' button. + +Having finished our edge-level inputs, we now move over to the network-level panel on the right side. Select all the tests by clicking +the top one, and then shift+clicking the bottom one. + +_running_network_tests: +Run the tests using the 'Run' button on the bottom-right. The number of permutations can be changed with the input field +to the left of the 'Run' button. After pushing the 'Run' button, a result window will open. The edge-level test will be run +and the results can be visualized by pressing 'View' in the upper-left of the result window. To run the network-level tests, +push the 'Run' button in the results window. This will take longer, a progress window will show up displaying the progress. +To visualize the results, expand the lists in the reloaded (automatically) panel, and highlight a test. Press the 'View figures' +button. Other visualization options, such as chord plots and convergence maps, can also be shown. The results can be saved using the +'File' menu in the top-left. These results can be loaded into MATLAB or opened in the NLA main window also using the 'File' menu on that +window. + +Running with example pre-calculated data +---------------------------------------------------------- + +Similarly to the previous example, open the NLA window and load the ``Wheelock_2020_CerebralCortex_15nets_288ROI_on_MNI.mat`` parcellation. This +time, select the 'Precalculated data' edge-level test. Load the four input matrices in the ``examples/precalculated`` folder. + +* Observed coefficients: ``SIM_obs_coeff.mat`` +* Observed, thresholded p-values: ``SIM_obs_p.mat`` +* Permuted coefficients: ``SIM_perm_coeff.mat`` +* Permuted, thresholded p-values: ``SIM_perm_p.mat`` + +Set the lower and upper coefficient bounds to the range of the coefficients. For this case, the range is [-2, 2]. These bounds can be checked +with the 'View' button for the edge-level results button. In the bottom right corner, set the ``perm_count`` to the desired amount of +permutations. The example data provided has a maximums of 600 permutations. Run the tests using the procedure described in the +:ref:`previous section `. \ No newline at end of file diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 00000000..4ee1a48e --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,28 @@ +.. NetworkLevelAnalysis documentation master file, created by + sphinx-quickstart on Mon Nov 18 13:00:09 2024. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to NetworkLevelAnalysis's documentation! +================================================ + +.. toctree:: + :maxdepth: 2 + :caption: Table of Contents: + + preface + overview + methodology + setup + getting_started + network_atlases + edge_level_tests + bibliography + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/docs/source/methodology.rst b/docs/source/methodology.rst new file mode 100644 index 00000000..89823ee6 --- /dev/null +++ b/docs/source/methodology.rst @@ -0,0 +1,79 @@ +Methodology +================================ + +Brain Network Map Selection +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +NLA requires the user to specify the network map that will be used to depict the known architecture of the +human connectome, which is crucial given that the network map selection affects both statistical +significance testing and interpretation :cite:p:`BellecP`. The current pipeline uses network maps that are generated with +Infomap, due to its greater congruence with networks derived from task-activation and seed-based +connectivity studies than alternative modularity algorithms :cite:p:`PowerJ,RosvallM`. Network maps can be generated using +one's preferred algorithm or one of several published ROI and corresponding network map options that +will be included in the NLA toolbox :cite:p:`GordonE,PowerJ,ThomasY,GlasserM,ShenX,CraddockR`. The use of standardized ROI and network maps creates a +common, reproducible framework for testing brain-behavior associations across connectome research + +General Linear Model / Edge-wise Statistical Model Selection +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +NLA also requires the user to specify the desired statistical model for testing associations between +behavioral data and edge-wise�or ROI-pair connectivity�connectome data. The analysis pipeline within +the NLA toolbox offers both parametric and non-parametric correlation. + +Connectivity Matrices +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Other software packages are used to create the connectivity matrices that are provided as input into the +NLA toolbox. One useful option for mapping functional connectivity matrices is CONN - MATLAB-based +software with the ability to compute, display, and analyze functional connectivity in fMRI. + +The NLA Method +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +First, connectome-wide associations are calculated between ROI-pair connectivity and behavioral data, +resulting in a set of standardized regression coefficients that specify the brain-behavior association at +each ROI-pair of the connectome matrix. Next, network level analysis-consisting of transformation of the +edge-wise test statistics and enrichment statistic calculation :cite:p:`AckermanM` - is done to determine which networks are +strongly associated with the behavior of interest. + +Both p-value and test-statistic binarization are offered in the current NLA pipeline :cite:p:`EggebrechtA,WheelockM:2018`. Prior research has +supported the incorporation of a proportional edge density threshold, given that uneven edge density +thresholds have been shown to unfairly bias results :cite:p:`vandenHeuvelM`. +For enrichment statistic calculation, NLA offers a number of statistical tests. Prior research has relied on +chi-square and Fisher's Exact test, as well as a Kolmogorov-Smirnov (KS) test and non-parametric tests +based on ranks, which compare the distribution of test values within a region to other regions :cite:p:`WheelockM:2018,RudolphM,MoothaV,ZahnJ`. In +addition, KS alternatives such as averaging or minmax have also shown promise in connectome +applications :cite:p:`ChenJ,NewtonM,YaariG,EfronB`. + +NLA then conducts data-driven permutation testing to establish significance. In the NLA toolbox, network +level significance is determined by comparing each measured enrichment statistic to permuted +enrichment p-values which are calculated by randomly shuffling behavior vector labels and computing +the enrichment statistic many times to produce a null distribution for each network. The FPR is controlled +at the network level using Bonferroni correction. Therefore, NLA is able to retain edge-wise correlations +within each network module, but network communities are used to reduce the number of comparisons +and control the FPR at the network level. After significance is determined, the pipeline allows users to +create publication quality images to visualize network level findings both in connectome format and on +the surface of the brain. + +**Note**: While the behavior vector labels are shuffled to conduct permutations in the enrichment pipeline, +functional connectivity data are not shuffled in order to preserve the inherent covariant structure of the +data across permutations + +How Should the Test Statistic Threshold Be Chosen? +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A nominal threshold is used for the thresholding and binarization step of the edge-level tests. The +nominal threshold is uncorrected and is typically set at 0.05 or 0.01 in the edge-level prob_max field. In +contrast, a network-level corrected threshold using the Bonferroni method is used in the net-level +statistics, where the nominal threshold is divided by the number of tests being done to correct for +multiple comparisons. + +How Should the Networks Be Chosen? +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +There are many canonical ROI sets and there are many network definitions. Some of these network +definitions include ROI that are not consistently assigned to any network. These ROI are typically removed +prior to network level analysis, as is the case in the ``Seitzman_15nets_288ROI_on_TT`` and the +``Gordon_12nets_286parcels_on_MNI`` network atlases included in this version of the toolbox. Network +atlases that are not included in this package may also be used, but they must first be formatted into the +correct structure \ No newline at end of file diff --git a/docs/source/network_atlases.rst b/docs/source/network_atlases.rst new file mode 100644 index 00000000..bb7c2fde --- /dev/null +++ b/docs/source/network_atlases.rst @@ -0,0 +1,40 @@ +Network Atlases +================================== +.. mat:module:: . + +Overview +------------------------------------ + +A network atlas is a data file describing networks of the brain, each containing a number of related +regions of interest. It also contains metadata such as network colors and names, ROI spatial coordinates +(with associtated mesh/space), and optionally, a surface parcellation. + +.. mat:autoclass:: NetworkAtlas + + .. mat:automethod:: numNets + + .. mat:automethod:: numNetPairs + + .. mat:automethod:: numROIs + + .. mat:automethod:: numROIPairs + +Provided Network Atlases +-------------------------------- + +A number of network atlases are provided with the NLA software package in the ``support_files`` directory. +Only NLA-specific details will be provided about them, if you wish to go into more depth on a particular atlas +you should follow the link provided in its ``source`` field. + +* ``Gordon_13nets_333parcels_on_MNI`` + * Surface space. + * Consists of 333 parcels and corresponding 13 networks :cite:p:`GordonE`. Contains both the MNI centrois and surface parcels on a ``MNI_32k`` mesh. +* ``Gordon_12nets_286parcels_on_MNI`` + * Surface space + * Same as ``Gordon_13nets_333parcels_on_MNI`` with 'None' network and its ROIs removed :cite:p:`GordonE`. +* ``Seitzman_17nets_300ROI_on_TT`` + * Volume space + * 300 ROIs in 17 networks :cite:p:`SeitzmanB`. Contains TT centroids +* ``Seitzman_15nets_288ROI_on_TT`` + * Volume space + * Same as ``Seitzman_17nets_300ROI_on_TT`` with 12 ROI and 2 networks removed due inconsistent placement in a network :cite:p:`SeitzmanB`. diff --git a/docs/source/overview.rst b/docs/source/overview.rst new file mode 100644 index 00000000..9a4779fa --- /dev/null +++ b/docs/source/overview.rst @@ -0,0 +1,162 @@ +Network Level Analysis Overview +==================================== + +The connectome and network structure +------------------------------------------- + +The term connectome essentially describes any network description of whole brain connectivity, from the +microscale of single neurons and synapses up to the macroscale of entire brain regions and pathways +:cite:p:`SpornsO`. Connectomics is an ever-advancing field, and large-scale scientific endeavors such as the NIH's Human +Connectome Project have made significant progress in mapping, analyzing, and understanding the +human connectome. Contemporary connectome research views the brain as an extensive, complex +network of non-adjacent, yet functionally and structurally connected brain regions :cite:p:`GordonE,PowerJ`. The connectome +can be utilized to assess whole-brain associations between behavior and spatially distinct neural +networks. + +MRI has traditionally been viewed as the gold standard for mapping the connectome and has been used +to demonstrate consistencies between the spatial topology of task-based activation studies and the brain +networks derived from task-free functional connectivity :cite:p:`PowerJ,GrattonC`. Contemporary cluster correction approaches +do not utilize the spatial topology of brain networks when estimating cluster size significance :cite:p:`FormanS,FristonK,VieiraS`. +Therefore, there is an urgent need for standardized tools that address the robust hierarchical network +structure of the brain and the limitations of contemporary neuroimaging analysis approaches by utilizing +this biologically informed network structure to increase reproducibility and biological interpretation of +neuroscience results + +Why use this toolbox? +---------------------------------------- + +The NLA toolbox is designed to address the multiple comparisons problem that occurs within +connectome research, wherein studies use hundreds of regions of interest (ROI) to create connectomes +with thousands of potential connections, yet they lack the tools to establish statistical significance when +analyzing associations between connectome and behavior. For example, previous research failed to find +any significant differences in brain connectivity that passed a connectome-wise false discovery rate (FDR) +correction between individuals with a neurological disorder and healthy controls - a finding which +contradicts the recognized role of the brain in neurological functioning :cite:p:`GreeneD`. Other studies have found +connectome-behavior associations that pass the FDR correction, but lack the statistical tools necessary to +definitively establish these observations :cite:p:`ShirerW`. NLA, therefore, serves as a valuable tool for the statistical +quantification of network-level associations with behavior. The toolbox relies on cross-disciplinary +biostatistical approaches to evaluate brain-behavior relationships within the connectome and allows for +control of FDR at the network level. In this way, NLA diverges from most contemporary tools with a focus +on single connection associations, in that it is not dependent on edgewise false positive rate (FPR) or +spatially contiguous brain regions. By organizing connectivity-behavior associations according to an a- +priori model of underlying neurobiology (i.e., networks), NLA leverages the structure of the human +connectome and provides a framework for rational interpretation and replication of findings across +research methodologies. Finally, the integration of connectome analysis and visualization techniques +within a single, extensible MATLAB-based pipeline makes NLA an expedient tool for statistical testing and +production of publication quality images all in one package. + +Introduction to NLA and enrichment +--------------------------------------------- + +Network Level Analysis uses enrichment to evaluate whether pairs of networks demonstrate significant +clustering of strong brain-behavior correlations. Enrichment applies common statistical tests to measure +the clustering of associations within a given network pair and reduces the number of comparisons to +those performed at the network level :cite:p:`SubramanianA`. Network level statistics such as the Chi-Square test, +Hypergeometric test, and Kolmogorov-Smirnov test have been used in numerous network-level +investigations including joint attention and motor function in infants and toddlers, maternal +inflammation during gestation, motor and attention development in very preterm children, sex +differences during fetal brain development, and autism in adults :cite:p:`EggebrechtA,WheelockM:2018,WheelockM:2019,RudolphM,WheelockM:2021,MaronKatz,MarrusN,FeczkoE`. + +Edge-level Statistic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +First, correlations are calculated between behavioral scores and Fisher z-transformed functional +connectivity correlation measures for each pair of ROI. For behavioral scores that are normally +distributed, Pearson r correlations are used to calculate the associations. Non-parametric Spearman rank +correlations are used to assess the relationship between functional connectivity and behavioral scores +that are not normally distributed. Other tests of correlation such as Kendall's tau and 2-sample Welch�s t +can also be used. Network pairs are then tested for enrichment of strong correlation values, defined as +only those values that remain after being nominally thresholded. An uncorrected p-threshold (e.g., 0.05 or +0.01) is applied and the remaining correlations are binarized. + +Network Level Statistics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +After the edge-level statistic matrix has been calculated, it is given as input to a variety of network-level +tests. First, it is input directly to the tests, and the resulting statistic is called the "non-permuted network +level statistic" (for every given network-level test). Then, permuted edge-level statistics are calculated via +the same method as described previously, but with the behavioral scores permuted across subjects. The +network-level test is performed on this as well, and the significance of permuted network-level statistics +ranked against the non-permuted, to calculate the permuted experiment-wide p-value (an empirical p- +value produced from this ranking). Additionally, "single-sample within-net-pair" statistics are calculated +for each test, which, rather than comparing a given network to the connectome over a number of +permutations (as in the permuted network-level test), performs a single-sample test on the network +alone, which is then ranked against permutations of said network similarly to the permuted network-level +test. +A number of statistic tests are utilized at the network level. The 1-degree-of-freedom :math:\chi^{2} test is used to +compare the observed number of strong (thresholded and binarized) brain-behavior correlations within +one pair of functional networks to the number of strong brain-behavior correlations that would be +expected if strong correlations were uniformly distributed across all possible network pairs. A large +resulting test statistic can indicate that the number of strong correlations within a specific network pair is +enriched. The hypergeometric test aims to assess the likelihood of observing a given number of strong +correlations within a pair of networks, given (1) the total number of strong correlations observed over the +entire connectome and (2) the total number of possible hits for that network pair (i.e., the total number or +ROI-pairs within a given network pair). Other tests such as Kolmogorov-Smirnov, Wilcoxon rank-sum, +Welch's t can be used, as well as Cohen's d to measure effect sizes. +As described, significance for all statistical tests is determined using permutation testing. Behavioral +labels are randomly permuted and correlated with the connectome data (typically 10k times) to create +null brain-behavior correlation matrices. Tests are calculated on these permuted brain-behavior +correlation matrices generating a null distribution of network level statistics. The measured (real) test +statistics are compared to this null distribution to establish network-level significance. + +NLA Alternatives / Comparison to other analysis methods +---------------------------------------------------------------------- + +The NLA toolbox's use of a novel enrichment approach makes it a transformative tool in connectome- +wide association studies, given that all current enrichment analysis methods are built for use with +genome data and NLA is the first enrichment tool designed to analyze the connectome. Many alternative +methods for connectome analysis rely on spatial extent cluster correction in order to control voxel-wise +whole brain connectome FPR :cite:p:`ShehzadZ,SharmaA`. Despite mounting evidence that spatially non-contiguous brain regions +are strongly correlated and often co-activate to the same stimuli, cluster extent correction is often +regarded as the ideal thresholding approach in human connectome literature. By basing statistical +significance on contiguous voxels, however, cluster extent correction methods fail to account for this +covariance structure. Therefore, brain regions that are known to be highly correlated and part of the same +network - such as the anterior cingulate and posterior cingulate - may be thresholded separately, +resulting in one or both separate regions not meeting statistical thresholds :cite:p:`RaichleM`. NLA is distinguished from +the cluster extent correction methodology in that it groups highly correlated, non-contiguous brain +regions based on pre-defined network modules prior to estimating network-level significance. + +Network Based Statistic (NBS) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Given this deviation from the popular extent cluster correction thresholding method, the most +conceptually similar existing connectome analysis approach to NLA is the Network Based Statistic (NBS) +toolbox :cite:p:`ZaleskyA`. NBS was the first tool control the edgewise FPR by leveraging graph-based estimates of +modularity. Still, several crucial differences exist between NLA and NBS: (a) the results from NBS focus on +edgewise significance as opposed to network-level significance, (b) NBS does not have a built-in +visualization functionality, and (c) NBS allows for different module sizes, number of network modules, +and configurations of edges assigned to network modules across various clinical populations, but draws +no conclusions regarding the biological relevance of identified networks. The NLA pipeline addresses this +issue by presenting a vast array of analysis and visualization options that utilize biologically informed +hierarchical organization models of the brain. + +Graph Theoretical Toolboxes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Graph Theoretical Toolboxes are another comparable approach to NLA, offering an analysis methodology +to quantify network characteristics such as integration, segregation, resilience, and relative contribution +of individual network nodes to overall information flow within the network :cite:p:`RubinovM`. Various other toolboxes +have been created to address network thresholding, graph metric calculation, and graph visualization� +such as GRETNA, GEPHI, and BrainNet Viewer. Additional methodologies aim to determine network +topology differences by leveraging generalized estimating equations and generalized linear and nonlinear +mixed models :cite:p:`BahramiM,GinestetC,SimpsonS`. Each of these tools has helped to advance the application of graph theory approaches +to connectome analysis. The NLA toolbox estimates statistical associations edgewise, rather than on +network topology features, thereby providing a crucial and complementary approach to the existing +collection of brain network analysis tools + +Statistical Inference and the use of liberal primary thresholds +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +NLA establishes statistical significance in the weak sense similar to traditional voxelwise cluster-level +inference :cite:p:`NicholsT`. In voxelwise cluster correction, a liberal primary threshold is employed in addition to a +cluster-extent threshold (determined by e.g., random field theory or Monte Carlo simulations). The +resulting clusters are significant but inferences cannot be made about any particular sub-regions or +voxels within a cluster. Similarly, NLA employs a liberal primary threshold in order to calculate the +network-level statistic and significance is established with permutation testing, but claims cannot be +made about the significance of any given ROI-pair within the network. One could apply an FDR correction +within each network pair similar to the statistics outlined in the Network Based Statistics toolbox though +this would still only control the false positive rate in the weak sense. The motivation of all of these +approaches (cluster-level inference, network-level enrichment, network-based statistic) is to control the +false positive rate when a massive number of tests are performed. Controlling the false positive rate in the +strong sense with several thousand functional connections (e.g., 30k) will often result in no single ROI-pair +surviving OR a few scattered ROI-pairs surviving with no clear biological pattern :cite:p:`GreeneD`. \ No newline at end of file diff --git a/docs/source/preface.rst b/docs/source/preface.rst new file mode 100644 index 00000000..081d6108 --- /dev/null +++ b/docs/source/preface.rst @@ -0,0 +1,19 @@ +Preface +============== + +This is the reference manual for the Network Level Analysis (NLA) Toolbox. NLA is an extensible MATLAB- +based software package for the analysis of behavioral associations with brain connectivity data. NLA +utilizes a model-based statistical approach known variously as 'pathway analysis', 'over-representation +analysis', or 'enrichment analysis', which was first used to describe behavioral or clinical associations in +genome-wide association studies :cite:p:`RivalsI,KhatriP,BackesC,SubramanianA`. + +Enrichment is a model-based data reduction approach to elucidate statistically significant network- +features. The suite developed here includes data-driven permutation-based false-positive-rate +procedures that manage multiple comparisons corrections for one or two independent groups. + +Hardware and Software Requirements +------------------------------------------ +NLA has been tested on MATLAB 2020b on Ubuntu 20.04. Current release of the GUI is not supported for +Windows. NLA requires the Parallel Processing and Statistics and Machine Learning Toolboxes. Best +performance will be achieved on a server setup with multiple cores to support parallel processing +(particularly for the permutation testing portion of the toolbox) \ No newline at end of file diff --git a/docs/source/refs.bib b/docs/source/refs.bib new file mode 100644 index 00000000..0f4e0d1c --- /dev/null +++ b/docs/source/refs.bib @@ -0,0 +1,450 @@ +@Article{RivalsI, + title = {Enrichment or depletion of a GO category within a class +of genes: which test?}, + author = {Rivals I. Personnaz L. Taing L. & Potier M.-C.}, + journal = {Bioinformatics}, + year = {2007}, + volume = {23}, + pages = {401-407}, +} + +@Article{KhatriP, + title = {Ten Years of Pathway Analysis: Current Approaches and Outstanding +Challenges.}, + author = {Khatri P. Sirota M. & Butte A. J.}, + journal = {PloS Comput Biol}, + year = {2012}, + volume = {8}, + pages = {e1002375} +} + +@Article{BackesC, + title = {Systemic permutation testing in GWAS pathway analysis: identification of genetic networks +in dilated cardiomyopathy and ulcerative colitis.}, + author = {Backes C.}, + journal = {BMC Genomics}, + year = {2014}, + volume = {15}, + pages = {622} +} + +@Article{SubramanianA, + title = {Gene set enrichment analysis: A knowledge-based approach for interpreting genome-wide +expression profiles.}, + author = {Subramanian A.}, + journal = {Proc Natl Acad Sci}, + year = {2005}, + volume = {102}, + pages = {15545-15550} +} + +@Article{SpornsO, + title = {The Human Connectome: A Structural Description of the Human Brain}, + author = {Sporns O. Tononi G. K |oumlaut| tter R.}, + journal = {PLoS Comput Biol}, + year = {2005}, + volume = {1}, + pages = {42} +} + +@Article{GordonE, + title = {Generation and Evaluation of a Cortical Area Parcellation from Resting-State +Correlations.}, + author = {Gordon E.M.}, + journal = {Cereb Cortex}, + year = {2016}, + volume = {26}, + pages = {288-303} +} + +@Article{PowerJ, + title = {Functional Network Organization of the Human Brain.}, + author = {Power J.D.}, + journal = {Neuron}, + year = {2011}, + volume = {72}, + pages = {665-678} +} + +@Article{GrattonC, + title = {Funcational Brain Networks Are Dominated by Stable Group and Individual Factors, +Not Cognitive or Daily Variation}, + author = {Gratton C.}, + journal = {Neuron}, + year = {2018}, + volume = {98}, + pages = {439-452} +} + +@Article{FormanS, + title = {Improved Assessment of Significant Activation in Funcational Magnetic Resonance Imaging +(fMRI): Use of Cluster-Size Threshold.}, + author = {Forman S.D.}, + journal = {Magn Reson Med}, + year = {1995}, + volume = {33}, + pages = {636-647} +} + +@Article{FristonK, + title = {Assessing the significance of focal activations using their spatial extent: Assessing Focal +Activations by Spatial Extent}, + author = {Friston K.J. Worsley K.J. Frackowiak R.S.J. Mazziotta J.C. & Evans A.C.}, + journal = {Hum Brain Mapp}, + year = {1994}, + volume = {1}, + pages = {210-220} +} + +@Article{VieiraS, + title = {Using deep learning to investigate the neuroimaging correlates of psychiatric and neurological +disorders: Methods and applications.}, + author = {Vieira S. Pinaya W.H.L. & Mechelli A.}, + journal = {Neurosci Biobehav Rev}, + year = {2017}, + volume = {74}, + pages = {58-75} +} + +@Article{GreeneD, + title = {Multivariate pattern classification of pediatric Tourette syndrome using functional connectivity +MRI.}, + author = {Greene D.J.}, + journal = {Dev Sci}, + year = {2016}, + volume = {19}, + pages = {581-598} +} + +@Article{ShirerW, + title = {Decoding Subject-Driven Cognitive States with Whole-Brain Connectivity Patterns.}, + author = {Shirer W.R. Ryali S. Rykhlevskaia E. Menon V. & Greicius M.D.}, + journal = {Cereb Cortex}, + year = {2012}, + volume = {22}, + pages = {158-165} +} + +@Article{EggebrechtA, + title = {Joint Attention and Brain Functional Connectivity in Infants and Toddlers.}, + author = {Eggebrecht A.T.}, + journal = {Cereb Cortex}, + year = {2017}, + volume = {27}, + pages = {1709-1720} +} + +@Article{WheelockM:2018, + title = {Altered functional network connectivity relates to motor development in children born +very preterm.}, + author = {Wheelock M.D.}, + journal = {NeuroImage}, + year = {2018}, + volume = {183}, + pages = {574-583} +} + +@Article{WheelockM:2019, + title = {Sex differences in functional connectivity during fetal brain development.}, + author = {Wheelock M.D.}, + journal = {Neurosci}, + year = {2019}, + volume = {36}, + pages = {100632} +} + +@Article{WheelockM:2021, + title = {Altered brain-behavior relationships underlie attention impairment in very preterm children.}, + author = {Wheelock M.D.}, + journal = {Cereb Cortex}, + year = {2021}, + volume = {31}, + pages = {1383-1394} +} + +@Article{RudolphM, + title = {Maternal IL-6 during pregnancy can be estimated from newborn brain connectivity and +predicts future working memory in offspring.}, + author = {Rudolph M.D.}, + journal = {Nat Neurosci}, + year = {2018}, + volume = {21}, + pages = {765-772} +} + +@Article{MaronKatz, + title = {A large-scale perspective on stress-induced alterations in resting-state networks.}, + author = {Maron-Katz A. Vaisvaser S. Lin T. Hendler T. & Shamir R.A.}, + journal = {Sci Rep}, + year = {2016}, + volume = {6}, + pages = {21503} +} + +@Article{MarrusN, + title = {Walking, Gross Motor Development, and Brain Functional Connectivity in Infants and Toddlers.}, + author = {Marrus N.}, + journal = {Cereb Cortex}, + year = {2018}, + volume = {28}, + pages = {750-763} +} + +@Article{FeczkoE, + title = {Subtyping cognitive profiles in Autism Spectrum Disorder using a Functional Random Forest +algorithm.}, + author = {Feczko E.}, + journal = {NeuroImage}, + year = {2018}, + volume = {172}, + pages = {674-688} +} + +@Article{ThomasonM, + title = {Prenatal lead exposure impacts cross-hemispheric and long-rage connectivity in the human +fetal brain.}, + author = {Thomason M.E.}, + journal = {NeuroImage}, + year = {2019}, + volume = {191}, + pages = {186=192} +} + +@Article{McKinnonC, + title = {Restricted and Repetitive Behavior and Brain Functional Connectivity in Infants at Risk +for Developing Autism Spectrum Disorder}, + author = {McKinnon C.J.}, + journal = {Biol Psychiatry Cogn Neurosci Neuroimaging}, + year = {2019}, + volume = {4}, + pages = {50-61} +} + +@Article{MarekS, + title = {Identifying reproducible individual differences in childhood funcational brain networks: +An ABCD study}, + author = {Marek S.}, + journal = {Dev Cogn Neurosci}, + year = {2019}, + volume = {40}, + pages = {100706} +} + +@Article{ShehzadZ, + title = {A multivariate distance-based analytic framework for connectome-wide association studies}, + author = {Shehzad Z.}, + journal = {NeuroImage}, + year = {2014}, + volume = {93}, + pages = {74-94} +} + +@Article{SharmaA, + title = {Common Dimensional Reward Deficits Across Mood and Psychotic Disorders: A Connectome-Wide +Association Study.}, + author = {Sharma A.}, + journal = {Am J Psychiatry}, + year = {2017}, + volume = {174}, + pages = {657-666} +} + +@Article{RaichleM, + title = {The Brain's Default Mode Network}, + author = {Raichle M.E.}, + journal = {Annu Rev Neurosci}, + year = {2015}, + volume = {38}, + pages = {433-447} +} + +@Article{ZaleskyA, + title = {Network-based statistic: Identifying differences in brain networks}, + author = {Zalesky A. Fornito A. & Bullmore E.T.}, + journal = {NeuroImage}, + year = {2010}, + volume = {53}, + pages = {1197-1207} +} + +@Article{RubinovM, + title = {Complex network measures of brain connectivity: Uses and interpretations.}, + author = {Rubinov M. & Sporns O.}, + journal = {NeuroImage}, + year = {2010}, + volume = {52}, + pages = {1059-1069} +} + +@Article{BahramiM, + title = {A MATLAB toolbox for multivariate analysis of brain networks.}, + author = {Bahrami M. Laurienti P.J. & Simpson S.L.}, + journal = {Hum Brain Mapp}, + year = {2019}, + volume = {40}, + pages = {175-186} +} + +@Article{GinestetC, + title = {Statistical network analysis for functional MRI: summary networks and group comparisons.}, + author = {Ginestet C.E. Fournal A.P. & Simmons A.}, + journal = {Front Comput Neurosci}, + year = {2014}, + volume = {8} +} + +@Article{SimpsonS, + title = {A permutation testing framework to compare groups of brain networks.}, + author = {Simpson S.L. Lyday R.G. Hayasaka S. Marsh A.P. & Laurienti P.J.}, + journal = {Front Comput Neurosci}, + year = {2013}, + volume = {7} +} + +@Article{NicholsT, + title = {Controlling the familywise error rate in functional neuroimaging: a comparative review.}, + author = {Nichols T. & Hayasaka S.}, + journal = {Stat Methods Med Res}, + year = {2003}, + volume = {12}, + pages = {419-446} +} + +@Article{BellecP, + title = {Impact of the resolutation of brain parcels on connectome-wide association studines in fMRI.}, + author = {Bellec. P.}, + journal = {NeuroImage}, + year = {2015}, + volume = {123}, + pages = {212-228} +} + +@Article{RosvallM, + title = {Maps of random walks on complex networks reveal community structure.}, + author = {Rosvall M. & Bergstrom C.T.}, + journal = {Proc Natl Acad Sci}, + year = {2008}, + volume = {105}, + pages = {1118-1123} +} + +@Article{ThomasY, + title = {The organization of the human cerebral cortex.}, + author = {Thomas Yeo B.T.}, + journal = {J Neurophysical}, + year = {2011}, + volume = {106}, + pages = {1125-1165} +} + +@Article{GlasserM, + title = {A multi-modal parcellation of human cerebral cortex.}, + author = {Glasser M.F.}, + journal = {Nature}, + year = {2016}, + volume = {536}, + pages = {171-178} +} + +@Article{ShenX, + title = {Groupwise whole-brain parcellation from resting-state fMRI data for network node identification.}, + author = {Shen X. Tokoglu F. Papademetris X. & Constable R.T.}, + journal = {NeuroImage}, + year = {2013}, + volume = {82}, + pages = {403-415} +} + +@Article{CraddockR, + title = {A whole brain fMRI atlas generated via spatially constrained spectral clustering.}, + author = {Craddock R.C. James G.A. Holtzheimer P.E. Hu X.P. & Mayberg H.S.}, + journal = {Hum Brain Mapp}, + year = {2012}, + volume = {33}, + pages = {1914-1928} +} + +@Article{AckermanM, + title = {A general modular framework for gene set enrichment analysis.}, + author = {Ackermann M. & Strimmer K.}, + journal = {BMC Bioinformatics}, + year = {2009}, + volume = {10}, + pages = {47} +} + +@Article{vandenHeuvelM, + title = {Proportional thresholding in resting-state fMRI functional connectivity networks and +consequences for patient-control connectome studies: Issues and recommendations.}, + author = {van den Heuval M.P.}, + journal = {NeuroImage}, + year = {2017}, + volume = {152}, + pages = {437-449} +} + +@Article{MoothaV, + title = {PGC-1 |alpha| -responsive genes involved in oxidative phosphorylation are coordinately +downregulated in human diabetes.}, + author = {Mootha V.K.}, + journal = {Nat Genet}, + year = {2003}, + volume = {34}, + pages = {267-273} +} + +@Article{ZahnJ, + title = {Transcriptional Profiling of Aging in Human Muscle Reveals a Common Aging Signature.}, + author = {Zahn J.M.}, + journal = {PLoS Genet}, + year = {2006}, + volume = {2}, + pages = {115} +} + +@Article{ChenJ, + title = {Significance analysis of groups of genes in expression profiling studies.}, + author = {Chen J.J. Lee T. Delongchamp R.R. Chen T. & Tsai C.-A.}, + journal = {Bioinformatics}, + year = {2007}, + volume = {23}, + pages = {2104-2112} +} + +@Article{NewtonM, + title = {Random-set methods identify distinct aspects of the enrichment signal in gene-set analysis.}, + author = {Newton M.A. Quintana F.A. Boon J. Sengupta S. & Ahlquist P.}, + journal = {Ann Appl Stat}, + year = {2007}, + volume = {1} +} + +@Article{YaariG, + title = {Quantitative set analysis for gene expression: a method to quantify gene set differential expression +including gene-gene correlations.}, + author = {Yaari G. Bolen C.R. Thakar J. & Kleinstein S.H.}, + journal = {Nucleic Acids Res}, + year = {2013}, + volume = {41}, + pages = {170-170} +} + +@Article{EfronB, + title = {On testing the significance of sets of genes.}, + author = {Efron B. & Tibshirani R.}, + journal = {Ann Appl Stat}, + year = {2007}, + volume = {1} +} + +@Article{SeitzmanB, + title = {A set of functionally-defined brain regions with improved representation of the subcortex and cerebellum.}, + author = {Seitzman B.A.}, + journal = {NeuroImage}, + year = {2020}, + volume = {206}, + pages = {116290} +} + +.. |oumlat| unicode:: U+00D6 .. O umlaut +.. |alpha| unicode:: U+03B1 .. alpha \ No newline at end of file diff --git a/docs/source/setup.rst b/docs/source/setup.rst new file mode 100644 index 00000000..5aeda87f --- /dev/null +++ b/docs/source/setup.rst @@ -0,0 +1,38 @@ +Setup +==================== + +Add NLA Folders to MATLAB Path +------------------------------------- + +In order to for any NLA functions to work, MATLAB must be able to find them on the path. To do this, in +the MATLAB file explorer, navigate to where you have downloaded or cloned the NetworkLevelAnalysis +folder to. Right click the folder, hover over 'Add to Path' in the context menu, and click the 'Selected +Folders and Subfolders' option. +**NOTE**: If you only add the base 'NetworkLevelAnalysis' folder to the path the code will not work, you must +pick the 'Selected Folders and Subfolders' option + +Running the GUI +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +To open the GUI, navigate to the root directory of the NetworkLevelAnalysis package in MATLAB and run +the command ``NLA_GUI`` via the MATLAB command line. +**Note**: Running the GUI through an X11-based remote connection (eg: MobaXTerm or similar) can be very +laggy in some cases. It is strongly recommended to use the GUI through a more modern remote protocol +such as VNC instead. + +Running as a Pipeline Script +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +To run NLA via a script instead, open the file main_pipeline.m (located in the root directory of the +NetworkLevelAnalysis package) in MATLAB, and proceed through the stages of the pipeline. There is also +a pipeline for precalculated data located in precalculated_pipeline.m +**Note**: The pipeline scripts are more complex and easy-to-mess-up than the GUI, and should only be used +if you have a good reason to do so. + +Using Individual NLA Functions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +To use NLA functions within your own code or scripts, add the ``NetworkLevelAnalysis`` folder to your +path. Most NLA functions are contained within the ``+nla`` namespace and its sub-namespaces. +Functions and packages can also be imported. ``import nla.TestPool`` imports the ``TestPool`` allowing +the user to just type ``TestPool()`` to initialize it. \ No newline at end of file