athms · yarikoptic · Oct 24, 2024 · Oct 24, 2024 · Oct 24, 2024 · Oct 24, 2024
diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml
@@ -0,0 +1,25 @@
+# Codespell configuration is within pyproject.toml
+---
+name: Codespell
+
+on:
+  push:
+    branches: [master]
+  pull_request:
+    branches: [master]
+
+permissions:
+  contents: read
+
+jobs:
+  codespell:
+    name: Check for spelling errors
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Annotate locations with typos
+        uses: codespell-project/codespell-problem-matcher@v1
+      - name: Codespell
+        uses: codespell-project/actions-codespell@v2
diff --git a/README.md b/README.md
@@ -73,7 +73,7 @@ We use [WebDataset](https://github.com/webdataset/webdataset) to read data durin
 
 **Upstream:** All upstream data contain three core entries for I) the parcelated BOLD data (`bold.pyd`), II) its repetition time (`t_r.pyd`), and III) a key (`__key__`) indicating the specific subject / task / run that this .tar file corresponds to. 
 
-**Downstream:** Each .tar file of the two downstream datasets contains one sample for each trial of its experiment run. In addition to the three entries listed for the upstream data, each trial also contains information describing the associated mental state. In our downstream adapatation analyses, we utilize the `task_label.pyd` (MDTB data) and `label_across_tasks.pyd` (HCP data) entries to assign numerical labels to each mental state during training. Note that the MDTB data is indicated with its OpenNeuro identifier (`ds002105`) in the `data/downstream/` directory. 
+**Downstream:** Each .tar file of the two downstream datasets contains one sample for each trial of its experiment run. In addition to the three entries listed for the upstream data, each trial also contains information describing the associated mental state. In our downstream adaptation analyses, we utilize the `task_label.pyd` (MDTB data) and `label_across_tasks.pyd` (HCP data) entries to assign numerical labels to each mental state during training. Note that the MDTB data is indicated with its OpenNeuro identifier (`ds002105`) in the `data/downstream/` directory. 
 
 For details on the additional preprocessing applied to [fmriprep](https://fmriprep.org/en/stable/)'s derivatives for each dataset, see the scripts contained in `scripts/dataprep/`.
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -29,3 +29,10 @@ webdataset = "0.1.103"
 [build-system]
 requires = ["poetry-core>=1.0.0"]
 build-backend = "poetry.core.masonry.api"
+
+[tool.codespell]
+# Ref: https://github.com/codespell-project/codespell#using-a-config-file
+skip = '.git*,*.lock'
+check-hidden = true
+# ignore-regex = ''
+# ignore-words-list = ''
diff --git a/scripts/analyses/fig5_downstream_performance.py b/scripts/analyses/fig5_downstream_performance.py
@@ -123,7 +123,7 @@ def fig_downstream_performance(
 
 def get_args() -> argparse.ArgumentParser:
     parser = argparse.ArgumentParser(
-        description='figure 5 of the manuscript; downstream model adapatation performances'
+        description='figure 5 of the manuscript; downstream model adaptation performances'
     )
 
     parser.add_argument(

diff --git a/scripts/analyses/sfig4_5_downstream_learning_curves.py b/scripts/analyses/sfig4_5_downstream_learning_curves.py
@@ -199,7 +199,7 @@ def sfig_downstream_learning_curves(
 
 def get_argparse() -> argparse.ArgumentParser:
     parser = argparse.ArgumentParser(
-        description='appendix figure 4-5 of the manuscript; downstream adapatation learning curves'
+        description='appendix figure 4-5 of the manuscript; downstream adaptation learning curves'
     )
 
     parser.add_argument(

diff --git a/scripts/analyses/sfig6_downstream_performance_replication.py b/scripts/analyses/sfig6_downstream_performance_replication.py
@@ -9,7 +9,7 @@
 
 
 def sfig_downstream_performance_replication(config: Dict=None) -> None:
-    """Script's main funtion; creates Appendix Figure 6 by wrapping
+    """Script's main function; creates Appendix Figure 6 by wrapping
     fig_downstream_performance() from scripts/analyses/fig5_downstream-performance.py"""
 
     if config is None:

diff --git a/scripts/analyses/sfig7_downstream_learning_curves_replication.py b/scripts/analyses/sfig7_downstream_learning_curves_replication.py
@@ -9,7 +9,7 @@
 
 
 def sfig_downstream_learning_curves_replication(config: Dict=None) -> None:
-    """Script's main funtion; creates Appendix Figure 7 by wrapping
+    """Script's main function; creates Appendix Figure 7 by wrapping
     fig_downstream_performance() from scripts/analyses/fig5_downstream-performance.py"""
 
     if config is None:

diff --git a/scripts/dataprep/downstream/mdtb_dataprep.py b/scripts/dataprep/downstream/mdtb_dataprep.py
@@ -287,7 +287,7 @@ def get_args() -> argparse.Namespace:
         metavar='DIR',
         default='../data/downstream/',
         type=str,
-        help='path where .tar files for fMRI runs wil be stored '
+        help='path where .tar files for fMRI runs will be stored '
              '(default: ../data/downstream)'
     )
     parser.add_argument(

diff --git a/scripts/dataprep/upstream/dataprep.py b/scripts/dataprep/upstream/dataprep.py
@@ -192,7 +192,7 @@ def get_args() -> argparse.ArgumentParser:
         default=-1,
         type=float,
         help='repetition time / TR of BOLD data (in seconds); '
-             'will be infered from data files, if not set (or set to -1).'
+             'will be inferred from data files, if not set (or set to -1).'
     )
     parser.add_argument(
         '--check-fmriprep-bug',

diff --git a/scripts/train.py b/scripts/train.py
@@ -394,7 +394,7 @@ def make_model(model_config: Dict=None):
 def get_config(args: argparse.Namespace=None) -> Dict:
     """
     Make config from command line arguments (as created by get_args()).
-    Performs additional formating of args required for calling train().
+    Performs additional formatting of args required for calling train().
     """
 
     if args is None:

diff --git a/src/batcher/make.py b/src/batcher/make.py
@@ -45,7 +45,7 @@ def make_batcher(
         run files.
     sample_random_seq: bool
         If True, the sequences are sampled randomly from
-        the data run files, given the spefied
+        the data run files, given the specified
         sequence length (seq_min and seq_max) and the
         specified gap consecutive sequences (bert_seq_gap_min,
         bert_seq_gap_max) for BERT-style training.

diff --git a/src/decoder/make.py b/src/decoder/make.py
@@ -68,7 +68,7 @@ def make_decoder(
         (as generated by src.embedder.prep_batch).
     decode(outputs: Dict):
         Make decoding prediction, given outputs generated by
-        caling forward().    
+        calling forward().    
     switch_decoding_mode(is_decoding_mode: bool):
         Switch model to decoding mode (is_decoding_mode=True).
         Relevant for adaptation of pre-trained models
@@ -112,4 +112,4 @@ def make_decoder(
         return LinearBaseline(**kwargs)
 
     else:
-        raise ValueError(f'{architecture}-architecture unkown.')
+        raise ValueError(f'{architecture}-architecture unknown.')
diff --git a/tests/test_adapt.py b/tests/test_adapt.py
@@ -60,7 +60,7 @@ def test_adapt_decoding() -> None:
                     'training_style': pre_training_style,
                 }
             )
-        # adapt mdoel
+        # adapt model
         adapt_trainer = run_train_process(
             config={
                     **ADAPT_CONFIG,

diff --git a/tests/test_checkpoints.py b/tests/test_checkpoints.py
@@ -115,7 +115,7 @@ def test_checkpoint_forward_pass() -> None:
         model.eval()
         loaded_model.eval()
         batch_prepped = model.embedder.prep_batch(batch)
-        # test embdder forward pass
+        # test embedder forward pass
         inputs_embeds = model.embedder(batch=batch_prepped)
         inputs_embeds_loaded = loaded_model.embedder(batch=batch_prepped)
         assert torch.equal(