MultiQC · ewels · Dec 14, 2025 · Dec 16, 2025 · Dec 16, 2025 · Dec 16, 2025
diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml
@@ -0,0 +1,24 @@
+name: "Lint"
+
+on:
+  push:
+    branches: [main, master]
+  pull_request:
+
+jobs:
+  pre-commit:
+    name: Pre-commit
+    runs-on: ubuntu-latest
+    timeout-minutes: 5
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Run pre-commit
+        uses: pre-commit/action@v3.0.1
diff --git a/.github/workflows/linux.yaml b/.github/workflows/linux.yaml
@@ -1,43 +1,64 @@
 name: "Build - Linux"
-on: [push, pull_request]
+
+on:
+  push:
+    branches: [main, master]
+  pull_request:
 
 jobs:
-  run_multiqc:
+  test:
     name: Linux - Python ${{ matrix.python-version }}
     runs-on: ubuntu-latest
     strategy:
+      fail-fast: false
       matrix:
-        python-version: [3.6, 3.7, 3.8, 3.9]
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
     timeout-minutes: 10
 
     steps:
-      # Check out MultiQC code
-      - uses: actions/checkout@v2
+      - name: Checkout code
+        uses: actions/checkout@v4
 
-      # Set up Python
       - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v1
+        uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
 
-      # Update pip and install beautifulsoup4 for CI tests (CSP checking)
-      - name: Install dependencies for CI tests
+      - name: Install dependencies
         run: |
-          python -m pip install --upgrade pip setuptools beautifulsoup4 multiqc
+          python -m pip install --upgrade pip setuptools wheel
+          pip install multiqc
 
-      # Install MultiQC
       - name: Install MultiQC_SAV
         run: pip install .
 
-      # Run all of the tests!
-      - name: MiSeq
-        run: multiqc -m SAV test_data/MiSeq
+      - name: Test MiSeq
+        run: |
+          multiqc -m SAV test_data/MiSeq -o test_output/MiSeq --strict
+          test -f test_output/MiSeq/multiqc_report.html || (echo "ERROR: Report not generated" && exit 1)
+          test -f test_output/MiSeq/multiqc_data/multiqc.log || (echo "ERROR: Log file missing" && exit 1)
+          grep -q "SAV" test_output/MiSeq/multiqc_data/multiqc.log || (echo "ERROR: SAV module not found in log" && exit 1)
 
-      - name: HiSeq
-        run: multiqc -m SAV test_data/HiSeq
+      - name: Test HiSeq
+        run: |
+          multiqc -m SAV test_data/HiSeq -o test_output/HiSeq --strict
+          test -f test_output/HiSeq/multiqc_report.html || (echo "ERROR: Report not generated" && exit 1)
+          grep -q "SAV" test_output/HiSeq/multiqc_data/multiqc.log || (echo "ERROR: SAV module not found in log" && exit 1)
 
-      - name: NextSeq
-        run: multiqc -m SAV test_data/NextSeq500
+      - name: Test NextSeq500
+        run: |
+          multiqc -m SAV test_data/NextSeq500 -o test_output/NextSeq500 --strict
+          test -f test_output/NextSeq500/multiqc_report.html || (echo "ERROR: Report not generated" && exit 1)
+          grep -q "SAV" test_output/NextSeq500/multiqc_data/multiqc.log || (echo "ERROR: SAV module not found in log" && exit 1)
 
-      - name: NovaSeq
-        run: multiqc -m SAV test_data/NovaSeq
+      - name: Test NextSeq2000
+        run: |
+          multiqc -m SAV test_data/NextSeq2000 -o test_output/NextSeq2000 --strict
+          test -f test_output/NextSeq2000/multiqc_report.html || (echo "ERROR: Report not generated" && exit 1)
+          grep -q "SAV" test_output/NextSeq2000/multiqc_data/multiqc.log || (echo "ERROR: SAV module not found in log" && exit 1)
+
+      - name: Test NovaSeq
+        run: |
+          multiqc -m SAV test_data/NovaSeq -o test_output/NovaSeq --strict
+          test -f test_output/NovaSeq/multiqc_report.html || (echo "ERROR: Report not generated" && exit 1)
+          grep -q "SAV" test_output/NovaSeq/multiqc_data/multiqc.log || (echo "ERROR: SAV module not found in log" && exit 1)
diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml
@@ -0,0 +1,31 @@
+name: "Publish to PyPI"
+
+on:
+  release:
+    types: [published]
+
+jobs:
+  publish:
+    name: Publish to PyPI
+    runs-on: ubuntu-latest
+    environment: pypi
+    permissions:
+      id-token: write
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Install build dependencies
+        run: pip install build
+
+      - name: Build package
+        run: python -m build
+
+      - name: Publish to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
diff --git a/.gitignore b/.gitignore
@@ -139,4 +139,4 @@ data
 
 # MacOS cruft
 .DS_Store
-workspace.code*
+workspace.code*
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,55 @@
+minimum_pre_commit_version: "2.9.2"
+
+repos:
+  # Meta hooks for verification
+  - repo: meta
+    hooks:
+      - id: identity
+      - id: check-hooks-apply
+
+  # Standard pre-commit hooks
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: "v5.0.0"
+    hooks:
+      - id: check-added-large-files
+        args: ["--maxkb=1000"]
+      - id: check-merge-conflict
+      - id: check-yaml
+      - id: check-toml
+      - id: debug-statements
+      - id: end-of-file-fixer
+        exclude: ^(test_data|docs/example)/
+      - id: trailing-whitespace
+        exclude: ^(test_data|docs/example)/
+      - id: mixed-line-ending
+        args: ["--fix=lf"]
+        exclude: ^(test_data|docs/example)/
+
+  # Line ending normalization
+  - repo: https://github.com/Lucas-C/pre-commit-hooks
+    rev: "v1.5.5"
+    hooks:
+      - id: remove-crlf
+        exclude: ^(test_data|docs/example)/
+
+  # Prettier for markdown, yaml, json formatting
+  - repo: https://github.com/pre-commit/mirrors-prettier
+    rev: "v3.1.0"
+    hooks:
+      - id: prettier
+        types_or: [markdown, yaml, json]
+        additional_dependencies:
+          - "prettier@3.1.0"
+
+  # Ruff formatting
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: "v0.8.3"
+    hooks:
+      - id: ruff-format
+
+  # Ruff linting with auto-fix
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: "v0.8.3"
+    hooks:
+      - id: ruff
+        args: [--fix, --exit-non-zero-on-fix]
diff --git a/.prettierignore b/.prettierignore
@@ -0,0 +1,30 @@
+# Build artifacts
+dist/
+build/
+*.egg-info/
+
+# Python cache
+__pycache__/
+*.pyc
+
+# Test data
+test_data/
+
+# Virtual environments
+venv/
+.venv/
+env/
+
+# Coverage
+htmlcov/
+.coverage
+
+# IDE
+.idea/
+.vscode/
+
+# Example reports
+docs/example/
+
+# Lock files
+*.lock
diff --git a/.prettierrc.js b/.prettierrc.js
@@ -0,0 +1,10 @@
+module.exports = {
+  printWidth: 120,
+  tabWidth: 2,
+  useTabs: false,
+  semi: true,
+  singleQuote: false,
+  trailingComma: "all",
+  bracketSpacing: true,
+  proseWrap: "preserve",
+};
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -0,0 +1,132 @@
+# CLAUDE.md
+
+This file provides guidance for Claude Code (claude.ai/code) when working with the MultiQC_SAV plugin.
+
+## Project Overview
+
+MultiQC_SAV is a plugin for [MultiQC](https://multiqc.info/) that parses InterOp files from Illumina sequencers and generates tables and graphs for quality control metrics. It leverages the [Illumina InterOp Python API](https://github.com/Illumina/interop) to read binary metric files.
+
+## Project Structure
+
+```
+MultiQC_SAV/
+├── multiqc_sav/                    # Main plugin package
+│   ├── __init__.py                 # Package identifier
+│   ├── multiqc_sav.py              # Plugin hook/config registration
+│   └── modules/
+│       └── sav.py                  # Main SAV module implementation
+├── test_data/                      # Test datasets for various sequencers
+│   ├── HiSeq/
+│   ├── MiSeq/
+│   ├── NextSeq500/
+│   ├── NextSeq2000/
+│   └── NovaSeq/
+├── docs/                           # Documentation and example reports
+├── .github/workflows/              # CI/CD workflows
+│   ├── linux.yaml                  # Build and test workflow
+│   └── lint.yaml                   # Linting workflow
+├── pyproject.toml                  # Project configuration and dependencies
+├── .pre-commit-config.yaml         # Pre-commit hooks configuration
+├── .prettierrc.js                  # Prettier formatter config
+└── .prettierignore                 # Prettier exclusions
+```
+
+## Development Setup
+
+```bash
+# Install in development mode with dev dependencies
+pip install -e ".[dev]"
+
+# Install pre-commit hooks
+pre-commit install
+```
+
+## Common Commands
+
+```bash
+# Run linting
+ruff check .
+ruff format --check .
+
+# Auto-fix linting issues
+ruff check --fix .
+ruff format .
+
+# Run prettier on markdown/yaml
+prettier --check "**/*.{md,yaml,yml,json}"
+
+# Run tests with test data
+multiqc -m SAV test_data/MiSeq
+multiqc -m SAV test_data/HiSeq
+multiqc -m SAV test_data/NextSeq500
+multiqc -m SAV test_data/NextSeq2000
+multiqc -m SAV test_data/NovaSeq
+
+# Run all pre-commit hooks
+pre-commit run --all-files
+```
+
+## Code Style Guidelines
+
+- **Line length**: 120 characters
+- **Python version**: 3.9+
+- **Formatting**: Ruff (format + lint)
+- **Type hints**: Required for all function signatures
+- **Docstrings**: Required for all public functions
+- **Imports**: Sorted by isort (via ruff)
+
+## MultiQC Module Architecture
+
+### Plugin Registration
+
+The plugin uses entry points in `pyproject.toml`:
+
+```toml
+[project.entry-points."multiqc.hooks.v1"]
+config_loaded = "multiqc_sav.multiqc_sav:update_config"
+
+[project.entry-points."multiqc.modules.v1"]
+SAV = "multiqc_sav.modules.sav:SAV"
+```
+
+### Hook System
+
+`multiqc_sav.py` contains the `update_config()` hook which:
+
+- Registers the SAV module in the module order
+- Sets module tags (DNA, RNA, BCL, Demultiplex)
+- Disables the built-in InterOp module to avoid duplicate data
+- Configures search patterns for RunInfo.xml and RunParameters.xml
+
+### Main Module
+
+`modules/sav.py` contains the `SAV` class which extends `BaseMultiqcModule`:
+
+1. **File Discovery**: Uses `find_log_files("SAV/xml")` to locate XML files
+2. **Run Info Parsing**: Extracts metadata from RunInfo.xml
+3. **Metrics Loading**: Uses InterOp API to read binary metric files
+4. **Data Processing**: Parses metrics into pandas DataFrames
+5. **Visualization**: Generates MultiQC plots (tables, bargraphs, heatmaps, linegraphs, scatter plots)
+
+## Supported Sequencers
+
+- MiSeq
+- HiSeq 3000/4000
+- NextSeq 500/2000
+- NovaSeq 6000
+
+## Key Dependencies
+
+- `interop>=1.1.23` - Illumina InterOp Python API for reading binary metrics
+- `multiqc>=1.10` - MultiQC framework
+- `pandas` - Data manipulation
+- `numpy` - Numerical operations
+
+## Testing
+
+Tests are run via GitHub Actions on Python 3.9-3.12. Each test verifies that the module can process test data without errors by running MultiQC with the SAV module flag.
+
+## CI/CD
+
+- **Build workflow** (`linux.yaml`): Tests installation and module execution
+- **Lint workflow** (`lint.yaml`): Runs ruff format check, ruff lint check, and prettier