From 34d7d26db13432968a72c1f21214021d8d261122 Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Mon, 11 May 2026 07:26:54 -0400 Subject: [PATCH] chore: add project files --- .github/CODEOWNERS | 1 + .github/workflows/checks.yaml | 33 ++++++ .github/workflows/pr-priority-label.yaml | 23 ++++ .gitignore | 144 +++++++++++++++++++++++ .pre-commit-config.yaml | 28 +++++ LICENSE | 21 ++++ README.md | 28 +++++ pyproject.toml | 105 +++++++++++++++++ 8 files changed, 383 insertions(+) create mode 100644 .github/CODEOWNERS create mode 100644 .github/workflows/checks.yaml create mode 100644 .github/workflows/pr-priority-label.yaml create mode 100644 .gitignore create mode 100644 .pre-commit-config.yaml create mode 100644 LICENSE create mode 100644 pyproject.toml diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 0000000..0b3a8da --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1 @@ +* @GenomicMedLab/mci-knowledge-pilot-analysis diff --git a/.github/workflows/checks.yaml b/.github/workflows/checks.yaml new file mode 100644 index 0000000..916754a --- /dev/null +++ b/.github/workflows/checks.yaml @@ -0,0 +1,33 @@ +name: checks +on: [push, pull_request] +jobs: + lint: + name: lint + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + + - name: Setup Python + uses: actions/setup-python@v6 + with: + python-version: "3.13" + + - name: Install uv + uses: astral-sh/setup-uv@v7 + with: + enable-cache: true + + - name: Install dependencies + run: uv sync --extra dev + + - name: Check style + run: uv run ruff check && uv run ruff format --check + + precommit_hooks: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + + - uses: j178/prek-action@v1 + with: + extra_args: '--all-files --skip "ruff-format" --skip "ruff-check"' diff --git a/.github/workflows/pr-priority-label.yaml b/.github/workflows/pr-priority-label.yaml new file mode 100644 index 0000000..0bef462 --- /dev/null +++ b/.github/workflows/pr-priority-label.yaml @@ -0,0 +1,23 @@ +name: Pull Request Has Priority Label +on: + pull_request: + types: [opened, labeled, unlabeled, synchronize] +jobs: + pr-priority-label: + runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write + outputs: + status: ${{ steps.check-labels.outputs.status }} + steps: + - id: check-labels + uses: mheap/github-action-required-labels@v5 + with: + mode: exactly + count: 1 + labels: "priority:*" + use_regex: true + add_comment: true + message: "PRs require a priority label. Please add one." + exit_type: failure diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8997605 --- /dev/null +++ b/.gitignore @@ -0,0 +1,144 @@ +# Ignore xlsx temp file +\~* + +# Ignore system files +.DS_Store + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ +docs/build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# Package/executable/environment management +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ +.python-version +Pipfile.lock +uv.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# allow pytest override with pytest.ini +pytest.ini + +# IDE materials +.idea/ +.vim/ +*.swp diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..6fcb5f2 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,28 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v6.0.0 # pre-commit-hooks version + hooks: + - id: detect-aws-credentials + args: [ --allow-missing-credentials ] + - repo: builtin + hooks: + - id: trailing-whitespace + - id: check-added-large-files + args: ['--maxkb=1500'] + - id: check-case-conflict + - id: end-of-file-fixer + - id: fix-byte-order-marker + - id: check-json + - id: check-toml + - id: check-yaml + - id: mixed-line-ending + args: [ --fix=lf ] + - id: check-merge-conflict + - id: detect-private-key + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.14.10 # ruff version + hooks: + - id: ruff-format + - id: ruff-check + args: [ --fix, --exit-non-zero-on-fix ] +minimum_prek_version: 0.2.23 diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..202496e --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 Wagner Laboratory at the Institute for Genomic Medicine + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index 985c436..79e0f57 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,33 @@ # Molecular Characterization Initiative (MCI) Knowledge Pilot +[![Actions status](https://github.com/genomicmedlab/mci_knowledge_pilot/actions/workflows/checks.yaml/badge.svg)](https://github.com/genomicmedlab/mci_knowledge_pilot/actions/checks.yaml) + This repository contains a pilot effort to transform semi-structured somatic cancer variant classification knowledge into computable clinical assertions using [GA4GH Genomic Knowledge Standards (GKS)](https://www.ga4gh.org/work_stream/genomic-knowledge-standards/). Previously captured clinical significance classifications from spreadsheets were programmatically mapped to the [GA4GH Variant Annotation Specification (VA-Spec)](https://www.ga4gh.org/product/variant-annotation/) to evaluate approaches for reducing knowledge silos and improving interoperability. + +--- + +## Development + +Clone the repo and create a virtual environment: + +```shell +git clone https://github.com/genomicmedlab/mci_knowledge_pilot +cd mci_knowledge_pilot +python3 -m virtualenv venv +source venv/bin/activate +``` + +Install development dependencies and `prek`: + +```shell +python3 -m pip install -e '.[dev]' +prek install +``` + +Check style with `ruff`: + +```shell +python3 -m ruff format . && python3 -m ruff check --fix . +``` diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..ff92af3 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,105 @@ +[project] +name = "mci_knowledge_pilot" +version = "0.1.0" +authors = [ + {name = "Matthew Cannon"}, + {name = "Anastasia Bratulin"}, + {name = "Kori Kuzma"}, + {name = "Daniel Puthawala"}, + {name = "Alex Wagner"}, +] +readme = "README.md" +requires-python = ">=3.13,<3.14" +description = "Transform semi-structured somatic cancer variant classification knowledge into computable clinical assertions using GA4GH GKS" +license = "MIT" +license-files = ["LICENSE"] +dependencies = [ + "ipykernel" +] + +[project.optional-dependencies] +dev = [ + "prek>=0.2.23", + "ruff==0.14.10", +] + +[tool.ruff] +extend-include = ["*.ipynb"] + +[tool.ruff.lint] +select = ["ALL"] +ignore = [ + # unused + "AIR", + "ERA", + "YTT", + "BLE", + "FBT", + "CPY", + "DJ", + "EXE", + "FIX", + "FA", + "PYI", + "TD", + "C90", + "NPY", + "PD", + # ignore for compatibility with formatter + "D206", + "D300", + "W191", + "E111", + "E114", + "E117", + "E501", + "W191", + "S321", + "COM812", + "COM819", + "Q000", + "Q001", + "Q002", + "Q003", + # don't require types on *args, **kwargs + "ANN002", + "ANN003", + # subjective pylint thresholds + "PLR0904", + "PLR091", + "PLR1702", + "PLC0206", + # misc unnecessary stuff + "S321", + "D203", + "D205", + "D213", + "D400", + "D415", +] + +[tool.ruff.lint.per-file-ignores] +# ANN001 - missing-type-function-argument +# ANN2 - missing-return-type +# D100 - undocumented-public-module +# D102 - undocumented-public-class +# D103 - undocumented-public-function +# S101 - assert +# B011 - assert-false +# INP001 - implicit-namespace-package +# PLR2004 - magic-value-comparison +"tests/*" = [ + "ANN001", + "ANN2", + "D", + "S101", + "B011", + "INP001", + "PLR2004", +] + +[tool.ruff.lint.flake8-annotations] +mypy-init-return = true + +[tool.ruff.format] +docstring-code-format = true