ChEB-AI
diff --git a/‎.github/workflows/black.yml‎
Lines changed: 0 additions & 10 deletions b/‎.github/workflows/black.yml‎
Lines changed: 0 additions & 10 deletions
diff --git a/‎.github/workflows/pre-commit.yml‎
Lines changed: 19 additions & 0 deletions b/‎.github/workflows/pre-commit.yml‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎.github/workflows/python-publish.yml‎
Lines changed: 69 additions & 0 deletions b/‎.github/workflows/python-publish.yml‎
Lines changed: 69 additions & 0 deletions
diff --git a/‎.github/workflows/test.yml‎
Lines changed: 15 additions & 4 deletions b/‎.github/workflows/test.yml‎
Lines changed: 15 additions & 4 deletions
diff --git a/‎.github/workflows/token_consistency.yaml‎
Lines changed: 0 additions & 6 deletions b/‎.github/workflows/token_consistency.yaml‎
Lines changed: 0 additions & 6 deletions
diff --git a/‎.gitignore‎
Lines changed: 13 additions & 0 deletions b/‎.gitignore‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 16 additions & 22 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 16 additions & 22 deletions
diff --git a/‎README.md‎
Lines changed: 64 additions & 20 deletions b/‎README.md‎
Lines changed: 64 additions & 20 deletions
diff --git a/‎chebai/callbacks.py‎
Lines changed: 1 addition & 1 deletion b/‎chebai/callbacks.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎chebai/callbacks/epoch_metrics.py‎
Lines changed: 2 additions & 1 deletion b/‎chebai/callbacks/epoch_metrics.py‎
Lines changed: 2 additions & 1 deletion
@@ -0,0 +1,19 @@
+name: Pre-commit Check
+
+on:
+  push:
+    branches: [main, master]
+  pull_request:
+
+jobs:
+  pre-commit:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+
+    - uses: actions/setup-python@v5
+      with:
+        python-version: '3.10'
+
+    - name: Run pre-commit
+      uses: pre-commit/action@v3.0.1
@@ -0,0 +1,69 @@
+# This workflow will upload a Python Package to PyPI when a release is created
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries
+
+# This workflow uses actions that are not certified by GitHub.
+# They are provided by a third-party and are governed by
+# separate terms of service, privacy policy, and support
+# documentation.
+
+name: Upload Python Package
+
+on:
+  release:
+    types: [published]
+
+permissions:
+  contents: read
+
+jobs:
+  release-build:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Build release distributions
+        run: |
+          python -m pip install build
+          python -m build
+
+      - name: Upload distributions
+        uses: actions/upload-artifact@v4
+        with:
+          name: release-dists
+          path: dist/
+
+  pypi-publish:
+    runs-on: ubuntu-latest
+    needs:
+      - release-build
+    permissions:
+      # IMPORTANT: this permission is mandatory for trusted publishing
+      id-token: write
+
+    # Dedicated environments with protections for publishing are strongly recommended.
+    # For more information, see: https://docs.github.com/en/actions/deployment/targeting-different-environments/using-environments-for-deployment#deployment-protection-rules
+    environment:
+      name: pypi
+      # OPTIONAL: uncomment and update to include your PyPI project URL in the deployment status:
+      url: https://pypi.org/p/chebai
+      #
+      # ALTERNATIVE: if your GitHub Release name is the PyPI project version string
+      # ALTERNATIVE: exactly, uncomment the following line instead:
+      # url: https://pypi.org/project/YOURPROJECT/${{ github.event.release.name }}
+
+    steps:
+      - name: Retrieve release distributions
+        uses: actions/download-artifact@v4
+        with:
+          name: release-dists
+          path: dist/
+
+      - name: Publish release distributions to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          packages-dir: dist/
@@ -9,19 +9,30 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.9", "3.10", "3.11"]
+        python-version: ["3.10", "3.11", "3.12"]
 
     steps:
       - uses: actions/checkout@v4
+
       - name: Set up Python ${{ matrix.python-version }}
         uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
+
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
           python -m pip install --upgrade pip setuptools wheel
           python -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
-          python -m pip install -e .
-      - name: Display Python version
-        run: python -m unittest discover -s tests/unit
+          python -m pip install -e .[dev]
+
+      - name: Display Python & Installed Packages
+        run: |
+          python --version
+          pip freeze
+
+      - name: Run Unit Tests
+        run: python -m unittest discover -s tests/unit -v
+        env:
+          ACTIONS_STEP_DEBUG: true  # Enable debug logs
+          ACTIONS_RUNNER_DEBUG: true # Additional debug logs from Github Actions itself
@@ -13,21 +13,17 @@ on:
       - "chebai/preprocessing/bin/smiles_token/tokens.txt"
       - "chebai/preprocessing/bin/smiles_token_unlabeled/tokens.txt"
       - "chebai/preprocessing/bin/selfies/tokens.txt"
-      - "chebai/preprocessing/bin/protein_token/tokens.txt"
       - "chebai/preprocessing/bin/graph_properties/tokens.txt"
       - "chebai/preprocessing/bin/graph/tokens.txt"
       - "chebai/preprocessing/bin/deepsmiles_token/tokens.txt"
-      - "chebai/preprocessing/bin/protein_token_3_gram/tokens.txt"
   pull_request:
     paths:
       - "chebai/preprocessing/bin/smiles_token/tokens.txt"
       - "chebai/preprocessing/bin/smiles_token_unlabeled/tokens.txt"
       - "chebai/preprocessing/bin/selfies/tokens.txt"
-      - "chebai/preprocessing/bin/protein_token/tokens.txt"
       - "chebai/preprocessing/bin/graph_properties/tokens.txt"
       - "chebai/preprocessing/bin/graph/tokens.txt"
       - "chebai/preprocessing/bin/deepsmiles_token/tokens.txt"
-      - "chebai/preprocessing/bin/protein_token_3_gram/tokens.txt"
 
 jobs:
   check_tokens:
@@ -58,11 +54,9 @@ jobs:
             "chebai/preprocessing/bin/smiles_token/tokens.txt"
             "chebai/preprocessing/bin/smiles_token_unlabeled/tokens.txt"
             "chebai/preprocessing/bin/selfies/tokens.txt"
-            "chebai/preprocessing/bin/protein_token/tokens.txt"
             "chebai/preprocessing/bin/graph_properties/tokens.txt"
             "chebai/preprocessing/bin/graph/tokens.txt"
             "chebai/preprocessing/bin/deepsmiles_token/tokens.txt"
-            "chebai/preprocessing/bin/protein_token_3_gram/tokens.txt"
           )
           echo "TOKENS_FILES=${TOKENS_FILES[*]}" >> $GITHUB_ENV
 
 
@@ -167,3 +167,16 @@ cython_debug/
 /logs
 /results_buffer
 electra_pretrained.ckpt
+
+build
+.virtual_documents
+.jupyter
+chebai.egg-info
+lightning_logs
+logs
+.isort.cfg
+/.vscode
+
+*.out
+*.err
+*.sh
@@ -1,25 +1,19 @@
 repos:
--   repo: https://github.com/psf/black
-    rev: "24.2.0"
-    hooks:
-    -   id: black
-    -   id: black-jupyter # for formatting jupyter-notebook
+# Use `pre-commit autoupdate` to update all the hook.
 
--   repo: https://github.com/pycqa/isort
-    rev: 5.13.2
-    hooks:
-      - id: isort
-        name: isort (python)
-        args: ["--profile=black"]
+- repo: https://github.com/astral-sh/ruff-pre-commit
+  # Ruff version. https://docs.astral.sh/ruff/integrations/#pre-commit
+  rev: v0.14.11
+  hooks:
+    # Run the linter.
+    - id: ruff-check
+      args: [ --fix ]
+    # Run the formatter.
+    - id: ruff-format
 
--   repo: https://github.com/asottile/seed-isort-config
-    rev: v2.2.0
-    hooks:
-    -   id: seed-isort-config
-
--   repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.6.0
-    hooks:
-    -   id: check-yaml
-    -   id: end-of-file-fixer
-    -   id: trailing-whitespace
+- repo: https://github.com/pre-commit/pre-commit-hooks
+  rev: v6.0.0
+  hooks:
+    - id: check-yaml
+    - id: end-of-file-fixer
+    - id: trailing-whitespace
@@ -3,22 +3,9 @@
 ChEBai is a deep learning library designed for the integration of deep learning methods with chemical ontologies, particularly ChEBI.
 The library emphasizes the incorporation of the semantic qualities of the ontology into the learning process.
 
-## Note for developers
+##  News
 
-If you have used ChEBai before PR #39, the file structure in which your ChEBI-data is saved has changed. This means that
-datasets will be freshly generated. The data however is the same. If you want to keep the old data (including the old
-splits), you can use a migration script. It copies the old data to the new location for a specific ChEBI class
-(including chebi version and other parameters). The script can be called by specifying the data module from a config
-```
-python chebai/preprocessing/migration/chebi_data_migration.py migrate --datamodule=[path-to-data-config]
-```
-or by specifying the class name (e.g. `ChEBIOver50`) and arguments separately
-```
-python chebai/preprocessing/migration/chebi_data_migration.py migrate --class_name=[data-class] [--chebi_version=[version]]
-```
-The new dataset will by default generate random data splits (with a given seed).
-To reuse a fixed data split, you have to provide the path of the csv file generated during the migration:
-`--data.init_args.splits_file_path=[path-to-processed_data]/splits.csv`
+Starting in version 1.1, we support regression tasks!
 
 ## Installation
 
@@ -33,9 +20,31 @@ git clone https://github.com/ChEB-AI/python-chebai.git
 
 ```
 cd python-chebai
-pip install .
+pip install -e .
 ```
 
+Some packages are not installed by default but can be added with the following extras:
+```
+pip install chebai[dev]
+```
+installs additional packages useful to people who want to contribute to the library.
+This includes `pre-commit`, which runs automatic formatting before each commit.
+To set up `pre-commit` for your workflow, run `pre-commit install`.
+For more details, see the [`pre-commit` documentation](https://pre-commit.com).
+
+```
+pip install chebai[plot]
+```
+installs additional packages useful for plotting and visualisation.
+```
+pip install chebai[wandb]
+```
+installs the [Weights & Biases](https://wandb.ai) integration for automated logging of training runs.
+```
+pip install chebai[all]
+```
+installs all optional dependencies.
+
 ## Usage
 
 The training and inference is abstracted using the Pytorch Lightning modules.
@@ -54,14 +63,19 @@ python -m chebai fit --trainer=configs/training/default_trainer.yml --model=conf
 ```
 A command with additional options may look like this:
 ```
-python3 -m chebai fit --trainer=configs/training/default_trainer.yml --model=configs/model/electra.yml --model.train_metrics=configs/metrics/micro-macro-f1.yml --model.test_metrics=configs/metrics/micro-macro-f1.yml --model.val_metrics=configs/metrics/micro-macro-f1.yml --model.pretrained_checkpoint=electra_pretrained.ckpt --model.load_prefix=generator. --data=configs/data/chebi50.yml --model.out_dim=1446 --model.criterion=configs/loss/bce.yml --data.init_args.batch_size=10 --trainer.logger.init_args.name=chebi50_bce_unweighted --data.init_args.num_workers=9 --model.pass_loss_kwargs=false --data.init_args.chebi_version=231 --data.init_args.data_limit=1000
+python3 -m chebai fit --trainer=configs/training/default_trainer.yml --model=configs/model/electra.yml --model.train_metrics=configs/metrics/micro-macro-f1.yml --model.test_metrics=configs/metrics/micro-macro-f1.yml --model.val_metrics=configs/metrics/micro-macro-f1.yml --model.pretrained_checkpoint=electra_pretrained.ckpt --model.load_prefix=generator. --data=configs/data/chebi/chebi50.yml --model.criterion=configs/loss/bce.yml --data.init_args.batch_size=10 --trainer.logger.init_args.name=chebi50_bce_unweighted --data.init_args.num_workers=9 --model.pass_loss_kwargs=false --data.init_args.chebi_version=231 --data.init_args.data_limit=1000
 ```
 
-### Fine-tuning for Toxicity prediction
+### Fine-tuning for classification tasks, e.g. Toxicity prediction
 ```
 python -m chebai fit --config=[path-to-your-tox21-config] --trainer.callbacks=configs/training/default_callbacks.yml  --model.pretrained_checkpoint=[path-to-pretrained-model]
 ```
 
+### Fine-tuning for regression tasks, e.g. solubility prediction
+```
+python -m chebai fit --config=[path-to-your-esol-config] --trainer.callbacks=configs/training/solCur_callbacks.yml  --model.pretrained_checkpoint=[path-to-pretrained-model]
+```
+
 ### Predicting classes given SMILES strings
 ```
 python3 -m chebai predict_from_file --model=[path-to-model-config] --checkpoint_path=[path-to-model] --input_path={path-to-file-containing-smiles] [--classes_path=[path-to-classes-file]] [--save_to=[path-to-output]]
@@ -72,8 +86,21 @@ The `classes_path` is the path to the dataset's `raw/classes.txt` file that cont
 
 ## Evaluation
 
-An example for evaluating a model trained on the ontology extension task is given in `tutorials/eval_model_basic.ipynb`.
-It takes in the finetuned model as input for performing the evaluation.
+You can evaluate a model trained on the ontology extension task in one of two ways:
+
+### 1. Using the Jupyter Notebook
+An example notebook is provided at `tutorials/eval_model_basic.ipynb`.
+- Load your finetuned model and run the evaluation cells to compute metrics on the test set.
+
+### 2. Using the Lightning CLI
+Alternatively, you can evaluate the model via the CLI:
+
+```bash
+python -m chebai test --trainer=configs/training/default_trainer.yml --trainer.devices=1 --trainer.num_nodes=1 --ckpt_path=[path-to-finetuned-model] --model=configs/model/electra.yml --model.test_metrics=configs/metrics/micro-macro-f1.yml --data=configs/data/chebi/chebi50.yml --data.init_args.batch_size=32 --data.init_args.num_workers=10 --data.init_args.chebi_version=[chebi-version] --model.pass_loss_kwargs=false --model.criterion=configs/loss/bce.yml --model.criterion.init_args.beta=0.99 --data.init_args.splits_file_path=[path-to-splits-file]
+```
+
+> **Note**: It is recommended to use `devices=1` and `num_nodes=1` during testing; multi-device settings use a `DistributedSampler`, which may replicate some samples to maintain equal batch sizes, so using a single device ensures that each sample or batch is evaluated exactly once.
+
 
 ## Cross-validation
 You can do inner k-fold cross-validation, i.e., train models on k train-validation splits that all use the same test
@@ -87,3 +114,20 @@ and the fold to be used in the current optimisation run as
 ```
 To train K models, you need to do K such calls, each with a different `fold_index`. On the first call with a given
 `inner_k_folds`, all folds will be created and stored in the data directory
+
+## Note for developers
+
+If you have used ChEBai before PR #39, the file structure in which your ChEBI-data is saved has changed. This means that
+datasets will be freshly generated. The data however is the same. If you want to keep the old data (including the old
+splits), you can use a migration script. It copies the old data to the new location for a specific ChEBI class
+(including chebi version and other parameters). The script can be called by specifying the data module from a config
+```
+python chebai/preprocessing/migration/chebi_data_migration.py migrate --datamodule=[path-to-data-config]
+```
+or by specifying the class name (e.g. `ChEBIOver50`) and arguments separately
+```
+python chebai/preprocessing/migration/chebi_data_migration.py migrate --class_name=[data-class] [--chebi_version=[version]]
+```
+The new dataset will by default generate random data splits (with a given seed).
+To reuse a fixed data split, you have to provide the path of the csv file generated during the migration:
+`--data.init_args.splits_file_path=[path-to-processed_data]/splits.csv`
@@ -80,7 +80,7 @@ def write_on_epoch_end(
             else:
                 labels = [None for _ in idents]
             output = torch.sigmoid(p["output"]["logits"]).tolist()
-            for i, l, o in zip(idents, labels, output):
+            for i, l, o in zip(idents, labels, output):  # noqa: E741
                 pred_list.append(dict(ident=i, labels=l, predictions=o))
         with open(os.path.join(self.output_dir, self.target_file), "wt") as fout:
             json.dump(pred_list, fout)
@@ -62,7 +62,8 @@ def update(self, preds: torch.Tensor, labels: torch.Tensor) -> None:
             labels (torch.Tensor): Ground truth labels.
         """
         tps = torch.sum(
-            torch.logical_and(preds > self.threshold, labels.to(torch.bool)), dim=0
+            torch.logical_and(preds > self.threshold, labels.to(torch.bool)),
+            dim=0,
         )
         self.true_positives += tps
         self.positive_predictions += torch.sum(preds > self.threshold, dim=0)