From ef18ca25ee4fddf40f52529666c6c57e0221a728 Mon Sep 17 00:00:00 2001 From: "Tristan F.-R." Date: Wed, 18 Mar 2026 02:55:55 +0000 Subject: [PATCH] chore: delete --- .devcontainer/Dockerfile | 1 - .devcontainer/devcontainer.json | 23 - .github/workflows/publish.yml | 114 - .gitignore | 169 - .gitmodules | 3 - .pre-commit-config.yaml | 33 - .python-version | 1 - .vscode/extensions.json | 4 - .vscode/settings.json | 5 - .yamlfmt.yaml | 2 - CONTRIBUTING.md | 132 - LICENSE | 21 - README.md | 68 +- __init__.py | 0 _typos.toml | 12 - cache/.gitignore | 2 - cache/README.md | 32 - cache/Snakefile | 33 - cache/__init__.py | 127 - cache/biomart/README.md | 4 - cache/biomart/ensg-ensp.xml | 9 - cache/cli.py | 30 - cache/directory.py | 258 - cache/util.py | 11 - configs/dmmm.yaml | 113 - configs/pra.yaml | 58 - datasets/README.md | 18 - datasets/__init__.py | 0 datasets/contributing/.gitignore | 4 - datasets/contributing/README.md | 10 - datasets/contributing/raw_generation.py | 84 - datasets/depmap/.gitignore | 3 - datasets/depmap/README.md | 63 - datasets/depmap/Snakefile | 67 - .../depmap/scripts/cell_line_processing.py | 222 - datasets/depmap/scripts/uniprot_mapping.py | 84 - datasets/diseases/.gitignore | 9 - datasets/diseases/README.md | 61 - datasets/diseases/Snakefile | 77 - datasets/diseases/figs/DISEASES-board.jpg | Bin 242801 -> 0 bytes datasets/diseases/scripts/files.py | 43 - datasets/diseases/scripts/gold_standard.py | 77 - datasets/diseases/scripts/inputs.py | 35 - datasets/diseases/scripts/interactome.py | 21 - datasets/diseases/viz/viz.ipynb | 509 -- datasets/egfr/.gitignore | 2 - datasets/egfr/README.md | 18 - datasets/egfr/Snakefile | 61 - datasets/egfr/scripts/map_ensembl.py | 42 - .../egfr/scripts/process_gold_standard.py | 16 - datasets/egfr/scripts/process_interactome.py | 20 - datasets/egfr/scripts/process_prizes.py | 20 - datasets/hiv/.gitignore | 4 - datasets/hiv/README.md | 29 - datasets/hiv/Snakefile | 56 - datasets/hiv/scripts/name_mapping.py | 26 - datasets/hiv/scripts/prepare.py | 38 - datasets/hiv/scripts/spras_formatting.py | 35 - datasets/rn_muscle_skeletal/.gitignore | 2 - datasets/rn_muscle_skeletal/README.md | 18 - datasets/rn_muscle_skeletal/Snakefile | 29 - .../rn_muscle_skeletal/curated/sources.txt | 14 - .../rn_muscle_skeletal/curated/targets.txt | 15 - datasets/rn_muscle_skeletal/process.py | 23 - .../reproduction/raw/ResponseNetNetwork.json | 4868 ----------------- datasets/synthetic_data/.gitignore | 4 - datasets/synthetic_data/README.md | 40 - datasets/synthetic_data/Snakefile | 131 - datasets/synthetic_data/__init__.py | 0 datasets/synthetic_data/explore/.gitignore | 1 - .../explore/pathway_statistics.py | 68 - .../panther_pathways/.gitignore | 3 - .../synthetic_data/panther_pathways/README.md | 9 - .../synthetic_data/panther_pathways/Snakefile | 23 - .../panther_pathways/__init__.py | 0 .../panther_pathways/fetch_from_owl.py | 21 - datasets/synthetic_data/pathways.jsonc | 37 - datasets/synthetic_data/scripts/__init__.py | 0 .../synthetic_data/scripts/fetch_pathway.py | 50 - .../synthetic_data/scripts/interactome.py | 68 - .../scripts/list_curated_pathways.py | 28 - .../scripts/map_transcription_factors.py | 21 - .../scripts/panther_spras_formatting.py | 111 - .../scripts/process_panther_pathway.py | 75 - datasets/synthetic_data/scripts/sampling.py | 138 - .../synthetic_data/scripts/util/__init__.py | 0 .../synthetic_data/scripts/util/parser.py | 20 - datasets/synthetic_data/util/__init__.py | 0 .../synthetic_data/util/parse_pc_pathways.py | 25 - datasets/yeast_osmotic_stress/.gitignore | 2 - datasets/yeast_osmotic_stress/README.md | 31 - datasets/yeast_osmotic_stress/Snakefile | 59 - .../yeast_osmotic_stress/process_prizes.py | 18 - pyproject.toml | 44 - run_snakemake.sh | 25 - spras | 1 - tools/README.md | 8 - tools/__init__.py | 0 tools/mapping/__init__.py | 0 tools/mapping/ensembl_uniprot.py | 56 - tools/sample.py | 105 - tools/trim.py | 10 - uv.lock | 1167 ---- web/.gitignore | 21 - web/.prettierrc | 12 - web/README.md | 26 - web/astro.config.mts | 8 - web/package.json | 28 - web/pnpm-lock.yaml | 3685 ------------- web/public/favicon.svg | 9 - web/src/components/Colors.astro | 41 - web/src/components/MediumZoom.astro | 5 - web/src/components/Visualization.astro | 47 - web/src/components/VisualizationScript.astro | 37 - web/src/layouts/BaseLayout.astro | 53 - web/src/lib/commit.ts | 6 - web/src/lib/dataset.ts | 13 - web/src/lib/outputStyle.ts | 123 - web/src/lib/paths.ts | 19 - web/src/pages/[uid]/index.astro | 90 - web/src/pages/description.md | 25 - web/src/pages/index.astro | 76 - .../[...slug]/index.astro | 45 - .../pages/type-category-dataset/index.astro | 29 - .../[type]-[category]/index.astro | 27 - web/src/pages/type-category/index.astro | 24 - web/tsconfig.json | 5 - 127 files changed, 2 insertions(+), 14839 deletions(-) delete mode 100644 .devcontainer/Dockerfile delete mode 100644 .devcontainer/devcontainer.json delete mode 100644 .github/workflows/publish.yml delete mode 100644 .gitignore delete mode 100644 .gitmodules delete mode 100644 .pre-commit-config.yaml delete mode 100644 .python-version delete mode 100644 .vscode/extensions.json delete mode 100644 .vscode/settings.json delete mode 100644 .yamlfmt.yaml delete mode 100644 CONTRIBUTING.md delete mode 100644 LICENSE delete mode 100644 __init__.py delete mode 100644 _typos.toml delete mode 100644 cache/.gitignore delete mode 100644 cache/README.md delete mode 100644 cache/Snakefile delete mode 100644 cache/__init__.py delete mode 100644 cache/biomart/README.md delete mode 100644 cache/biomart/ensg-ensp.xml delete mode 100644 cache/cli.py delete mode 100644 cache/directory.py delete mode 100644 cache/util.py delete mode 100644 configs/dmmm.yaml delete mode 100644 configs/pra.yaml delete mode 100644 datasets/README.md delete mode 100644 datasets/__init__.py delete mode 100644 datasets/contributing/.gitignore delete mode 100644 datasets/contributing/README.md delete mode 100644 datasets/contributing/raw_generation.py delete mode 100644 datasets/depmap/.gitignore delete mode 100644 datasets/depmap/README.md delete mode 100644 datasets/depmap/Snakefile delete mode 100644 datasets/depmap/scripts/cell_line_processing.py delete mode 100644 datasets/depmap/scripts/uniprot_mapping.py delete mode 100644 datasets/diseases/.gitignore delete mode 100644 datasets/diseases/README.md delete mode 100644 datasets/diseases/Snakefile delete mode 100644 datasets/diseases/figs/DISEASES-board.jpg delete mode 100644 datasets/diseases/scripts/files.py delete mode 100644 datasets/diseases/scripts/gold_standard.py delete mode 100644 datasets/diseases/scripts/inputs.py delete mode 100644 datasets/diseases/scripts/interactome.py delete mode 100644 datasets/diseases/viz/viz.ipynb delete mode 100644 datasets/egfr/.gitignore delete mode 100644 datasets/egfr/README.md delete mode 100644 datasets/egfr/Snakefile delete mode 100644 datasets/egfr/scripts/map_ensembl.py delete mode 100644 datasets/egfr/scripts/process_gold_standard.py delete mode 100644 datasets/egfr/scripts/process_interactome.py delete mode 100644 datasets/egfr/scripts/process_prizes.py delete mode 100644 datasets/hiv/.gitignore delete mode 100644 datasets/hiv/README.md delete mode 100644 datasets/hiv/Snakefile delete mode 100644 datasets/hiv/scripts/name_mapping.py delete mode 100644 datasets/hiv/scripts/prepare.py delete mode 100644 datasets/hiv/scripts/spras_formatting.py delete mode 100644 datasets/rn_muscle_skeletal/.gitignore delete mode 100644 datasets/rn_muscle_skeletal/README.md delete mode 100644 datasets/rn_muscle_skeletal/Snakefile delete mode 100644 datasets/rn_muscle_skeletal/curated/sources.txt delete mode 100644 datasets/rn_muscle_skeletal/curated/targets.txt delete mode 100644 datasets/rn_muscle_skeletal/process.py delete mode 100644 datasets/rn_muscle_skeletal/reproduction/raw/ResponseNetNetwork.json delete mode 100644 datasets/synthetic_data/.gitignore delete mode 100644 datasets/synthetic_data/README.md delete mode 100644 datasets/synthetic_data/Snakefile delete mode 100644 datasets/synthetic_data/__init__.py delete mode 100644 datasets/synthetic_data/explore/.gitignore delete mode 100644 datasets/synthetic_data/explore/pathway_statistics.py delete mode 100644 datasets/synthetic_data/panther_pathways/.gitignore delete mode 100644 datasets/synthetic_data/panther_pathways/README.md delete mode 100644 datasets/synthetic_data/panther_pathways/Snakefile delete mode 100644 datasets/synthetic_data/panther_pathways/__init__.py delete mode 100644 datasets/synthetic_data/panther_pathways/fetch_from_owl.py delete mode 100644 datasets/synthetic_data/pathways.jsonc delete mode 100644 datasets/synthetic_data/scripts/__init__.py delete mode 100644 datasets/synthetic_data/scripts/fetch_pathway.py delete mode 100644 datasets/synthetic_data/scripts/interactome.py delete mode 100644 datasets/synthetic_data/scripts/list_curated_pathways.py delete mode 100644 datasets/synthetic_data/scripts/map_transcription_factors.py delete mode 100644 datasets/synthetic_data/scripts/panther_spras_formatting.py delete mode 100644 datasets/synthetic_data/scripts/process_panther_pathway.py delete mode 100644 datasets/synthetic_data/scripts/sampling.py delete mode 100644 datasets/synthetic_data/scripts/util/__init__.py delete mode 100644 datasets/synthetic_data/scripts/util/parser.py delete mode 100644 datasets/synthetic_data/util/__init__.py delete mode 100644 datasets/synthetic_data/util/parse_pc_pathways.py delete mode 100644 datasets/yeast_osmotic_stress/.gitignore delete mode 100644 datasets/yeast_osmotic_stress/README.md delete mode 100644 datasets/yeast_osmotic_stress/Snakefile delete mode 100644 datasets/yeast_osmotic_stress/process_prizes.py delete mode 100644 pyproject.toml delete mode 100755 run_snakemake.sh delete mode 160000 spras delete mode 100644 tools/README.md delete mode 100644 tools/__init__.py delete mode 100644 tools/mapping/__init__.py delete mode 100644 tools/mapping/ensembl_uniprot.py delete mode 100644 tools/sample.py delete mode 100644 tools/trim.py delete mode 100644 uv.lock delete mode 100644 web/.gitignore delete mode 100644 web/.prettierrc delete mode 100644 web/README.md delete mode 100644 web/astro.config.mts delete mode 100644 web/package.json delete mode 100644 web/pnpm-lock.yaml delete mode 100644 web/public/favicon.svg delete mode 100644 web/src/components/Colors.astro delete mode 100644 web/src/components/MediumZoom.astro delete mode 100644 web/src/components/Visualization.astro delete mode 100644 web/src/components/VisualizationScript.astro delete mode 100644 web/src/layouts/BaseLayout.astro delete mode 100644 web/src/lib/commit.ts delete mode 100644 web/src/lib/dataset.ts delete mode 100644 web/src/lib/outputStyle.ts delete mode 100644 web/src/lib/paths.ts delete mode 100644 web/src/pages/[uid]/index.astro delete mode 100644 web/src/pages/description.md delete mode 100644 web/src/pages/index.astro delete mode 100644 web/src/pages/type-category-dataset/[...slug]/index.astro delete mode 100644 web/src/pages/type-category-dataset/index.astro delete mode 100644 web/src/pages/type-category/[type]-[category]/index.astro delete mode 100644 web/src/pages/type-category/index.astro delete mode 100644 web/tsconfig.json diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile deleted file mode 100644 index 8bb8e28f..00000000 --- a/.devcontainer/Dockerfile +++ /dev/null @@ -1 +0,0 @@ -FROM mcr.microsoft.com/devcontainers/anaconda:1-3 diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json deleted file mode 100644 index 4dda9426..00000000 --- a/.devcontainer/devcontainer.json +++ /dev/null @@ -1,23 +0,0 @@ -// Small devcontainer which loads anaconda. All postinstallation steps have to be done manually. -// This comes with snakemake and docker-in-docker. - -// For format details, see https://aka.ms/devcontainer.json. For config options, see the -// README at: https://github.com/devcontainers/templates/tree/main/src/anaconda -{ - "name": "Anaconda (Python 3)", - "build": { - "context": "..", - "dockerfile": "Dockerfile" - }, - "features": { - "ghcr.io/devcontainers/features/docker-in-docker:2": {}, - // For yamlfmt - "ghcr.io/devcontainers/features/go:1": {}, - // For web display - "ghcr.io/devcontainers/features/node:1": {}, - // For scripting - "ghcr.io/va-h/devcontainers-features/uv:1": {}, - // For paxtools - "ghcr.io/devcontainers/features/java:1": {} - } -} diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml deleted file mode 100644 index 5b22e90c..00000000 --- a/.github/workflows/publish.yml +++ /dev/null @@ -1,114 +0,0 @@ -name: Test SPRAS - -on: - pull_request: - branches: [main] - push: - branches: [main] - -# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages -permissions: - contents: read - pages: write - id-token: write - -jobs: - pre-commit: - name: Run pre-commit checks - runs-on: ubuntu-latest - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - name: Setup Python - uses: actions/setup-python@v4 - with: - python-version: '3.11' - - name: Run pre-commit checks - uses: pre-commit/action@v3.0.0 - checks: - name: Run workflow - runs-on: ubuntu-latest - steps: - - name: Checkout repository - uses: actions/checkout@v4 - with: - submodules: true - - name: Install uv for scripting - uses: astral-sh/setup-uv@v6.1.0 - with: - version: "0.7.13" - - name: Setup conda - uses: conda-incubator/setup-miniconda@v2 - with: - activate-environment: spras - environment-file: spras/environment.yml - auto-activate-base: false - miniconda-version: 'latest' - # Install spras in the environment using pip - - name: Install spras in conda env - shell: bash --login {0} - run: pip install ./spras - # Log conda environment contents - - name: Log conda environment - shell: bash --login {0} - run: conda list - - name: Fetch Artifact Cache - id: fetch-cache - uses: actions/cache/restore@v4 - with: - path: cache/artifacts - key: cache-artifacts - - name: Process raw data through Snakemake - run: sh run_snakemake.sh - - name: Cache Artifact Cache - id: cache-cache - uses: actions/cache/save@v4 - with: - path: cache/artifacts - key: cache-artifacts - - name: Run Snakemake workflow for DMMMs - shell: bash --login {0} - run: snakemake --cores 4 --configfile configs/dmmm.yaml --show-failed-logs -s spras/Snakefile - - name: Run Snakemake workflow for PRAs - shell: bash --login {0} - run: snakemake --cores 4 --configfile configs/pra.yaml --show-failed-logs -s spras/Snakefile - - name: Setup PNPM - uses: pnpm/action-setup@v4 - with: - version: 10 - - name: Install web dependencies - working-directory: ./web - run: pnpm install - - name: Run web builder - working-directory: ./web - run: pnpm build - - name: Upload built website distribution folder - uses: actions/upload-artifact@v4 - with: - name: build - path: web/dist - pages: - needs: checks - if: github.event_name != 'pull_request' - runs-on: ubuntu-latest - environment: - name: github-pages - url: ${{ steps.deployment.outputs.page_url }} - concurrency: - group: 'pages' - cancel-in-progress: true - steps: - - name: Download Artifacts - uses: actions/download-artifact@v4 - with: - name: build - path: dist - - name: Setup Pages - uses: actions/configure-pages@v2 - - name: Upload artifact - uses: actions/upload-pages-artifact@v3 - with: - path: dist - - name: Deploy to GitHub Pages - id: deployment - uses: actions/deploy-pages@v4 diff --git a/.gitignore b/.gitignore deleted file mode 100644 index 196f12a8..00000000 --- a/.gitignore +++ /dev/null @@ -1,169 +0,0 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -parts/ -sdist/ -var/ -wheels/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -cover/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -.pybuilder/ -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -# For a library or package, you might want to ignore these files since the code is -# intended to run in multiple environments; otherwise, check them in: -# .python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# poetry -# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. -# This is especially recommended for binary packages to ensure reproducibility, and is more -# commonly ignored for libraries. -# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control -#poetry.lock - -# pdm -# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. -#pdm.lock -# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it -# in version control. -# https://pdm.fming.dev/latest/usage/project/#working-with-version-control -.pdm.toml -.pdm-python -.pdm-build/ - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# pytype static type analyzer -.pytype/ - -# Cython debug symbols -cython_debug/ - -# PyCharm -.idea/ - -# Snakemake -.snakemake - -# Output -/output -/web/output - -# pnpm -.pnpm-store - -# mac -.DS_Store diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index 11f80da4..00000000 --- a/.gitmodules +++ /dev/null @@ -1,3 +0,0 @@ -[submodule "spras"] - path = spras - url = https://github.com/Reed-CompBio/spras diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml deleted file mode 100644 index f687b4c2..00000000 --- a/.pre-commit-config.yaml +++ /dev/null @@ -1,33 +0,0 @@ -# See https://pre-commit.com for more information -# See https://pre-commit.com/hooks.html for more hooks -default_language_version: - # Match this to the version specified in environment.yml - python: python3.11 -repos: - - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 # Use the ref you want to point at - hooks: - # Attempts to load all yaml files to verify syntax. - - id: check-yaml - # Attempts to load all TOML files to verify syntax. - - id: check-toml - # Trims trailing whitespace. - - id: trailing-whitespace - # Preserves Markdown hard linebreaks. - args: [--markdown-linebreak-ext=md] - # Do not trim whitespace from all files, input files may need trailing whitespace for empty values in columns. - types_or: [markdown, python, yaml] - # Skip this Markdown file, which has an example of an input text file within it. - exclude: input/README.md - - repo: https://github.com/astral-sh/ruff-pre-commit - rev: 'v0.15.4' - hooks: - - id: ruff - - repo: https://github.com/google/yamlfmt - rev: v0.17.0 - hooks: - - id: yamlfmt - - repo: https://github.com/crate-ci/typos - rev: v1.34.0 - hooks: - - id: typos diff --git a/.python-version b/.python-version deleted file mode 100644 index 24ee5b1b..00000000 --- a/.python-version +++ /dev/null @@ -1 +0,0 @@ -3.13 diff --git a/.vscode/extensions.json b/.vscode/extensions.json deleted file mode 100644 index 22a15055..00000000 --- a/.vscode/extensions.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "recommendations": ["astro-build.astro-vscode"], - "unwantedRecommendations": [] -} diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index bebd33fc..00000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "editor.rulers": [ - 150 - ] -} \ No newline at end of file diff --git a/.yamlfmt.yaml b/.yamlfmt.yaml deleted file mode 100644 index 9d3236aa..00000000 --- a/.yamlfmt.yaml +++ /dev/null @@ -1,2 +0,0 @@ -formatter: - retain_line_breaks_single: true diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md deleted file mode 100644 index b6766532..00000000 --- a/CONTRIBUTING.md +++ /dev/null @@ -1,132 +0,0 @@ -# Contributing - -## Helping Out - -There are `TODOs` that better enhance the reproducibility and accuracy of datasets or analysis of algorithm outputs, as well as -[open resolvable issues](https://github.com/Reed-CompBio/spras-benchmarking/). - -## Adding a dataset - -**Check that your data provider isn't already a dataset in `datasets`.** There are some datasets that are able to serve more data, and only use -a subset of it: these datasets can be extended for your needs. - -The goal of a dataset is to take raw data and produce data to be fed to SPRAS. -We'll follow along with `datasets/contributing`. This mini-tutorial assumes that you already have familiarity with SPRAS -[as per its contributing guide](https://spras.readthedocs.io/en/latest/contributing/index.html). - -### Uploading raw data - -This is a fake dataset: the data can be generated by running `datasets/contributing/raw_generation.py`, where the following artifacts will output: -- `sources.txt` -- `targets.txt` -- `gold-standard.tsv` -- `interactome.tsv` - -Unlike in this example, the data used in other datasets comes from other sources (whether that's supplementary info in a paper, or out of -biological databases like UniProt.) These artifacts can be large, and occasionally update, so we store them in Google Drive for caching and download -them when we want to reconstruct a dataset. - -Note that the four artifacts above change every time `raw_generation.py` is run. Upload those artifacts to Google Drive in a folder of your choice. -Share the file and allow for _Anyone with the link_ to _View_ the file. - -Once shared, copying the URL should look something like: - -> https://drive.google.com/file/d/1Agte0Aezext-8jLhGP4GmaF3tS7gHX-h/view?usp=sharing - -We always drop the entire `/view?...` suffix, and replace `/file/d/` with `/uc?id=`, which turns the URL to a direct download link, which is internally -downloaded with [gdown](https://github.com/wkentaro/gdown). Those post-processing steps should make the URL now look as so: - -> https://drive.google.com/uc?id=1Agte0Aezext-8jLhGP4GmaF3tS7gHX-h - -Now, add a directive to `cache/directory.py` under `Contributing`. Since this doesn't have an online URL, this should use `CacheItem.cache_only`, to -indicate that no other online database serves this URL. - -Your new directive under the `directory` dictionary should look something as so, with one entry for every artifact: - -```python -..., -"Contributing": { - "interactome.tsv": CacheItem.cache_only( - name="Randomly-generated contributing interactome", - cached="https://drive.google.com/uc?id=..." - ), - ... -} -``` - -### Setting up a workflow - -Now, we need to make these files SPRAS-compatible. To do this, we'll set up a `Snakefile`, which will handle: -- Artifact downloading -- Script running. - -`sources.txt` and `targets.txt` are already in a SPRAS-ready format, but we need to process `gold-standard.tsv` and `interactome.tsv`. - -Create a `Snakefile` under your dataset with the top-level directives: - -```python -# This provides the `produce_fetch_rules` util to allows us to automatically fetch the Google Drive data. -include: "../../cache/Snakefile" - -rule all: - input: - # The two files we will be passing to SPRAS - "raw/sources.txt", - "raw/targets.txt", - # The two files we will be processing - "processed/gold-standard.tsv", - "processed/interactome.tsv" -``` - -We'll generate four `fetch` rules, or rules that tell Snakemake to download the data we uploaded to Google Drive earlier. - -```python -produce_fetch_rules({ - # The value array is a path into the dictionary from `cache/directory.py`. - "raw/sources.txt": ["Contributing", "sources.txt"], - # and so on for targets, gold-standard, and interactome: - # note that excluding these three stops the Snakemake file from working by design! - ... -}) -``` - -Create two scripts that make `gold-standard.tsv` and `interactome.tsv` SPRAS-ready, consulting -the [SPRAS file format documentation](https://spras.readthedocs.io/en/latest/output.html). You can use any dependencies inside the top-level -`pyproject.toml`, and you can test out your scripts with `uv run diff --git a/web/src/components/Visualization.astro b/web/src/components/Visualization.astro deleted file mode 100644 index a9b2dae6..00000000 --- a/web/src/components/Visualization.astro +++ /dev/null @@ -1,47 +0,0 @@ ---- -import VisualizationScript from "./VisualizationScript.astro"; - -interface Props { - interactome: string; -} - -const { interactome } = Astro.props; - -const edgeLimit = 300; - -const noHeaderInteractomeArray = interactome.trim().split("\n").slice(1); -const noHeaderInteractome = noHeaderInteractomeArray.length > edgeLimit ? "BIG" : noHeaderInteractomeArray.join("\n"); ---- - - - -{ - noHeaderInteractome === "" ? ( -

There is nothing to visualize.

- ) : ( -
- -
- ) -} - -{noHeaderInteractome !== "" && } diff --git a/web/src/components/VisualizationScript.astro b/web/src/components/VisualizationScript.astro deleted file mode 100644 index e8328b2c..00000000 --- a/web/src/components/VisualizationScript.astro +++ /dev/null @@ -1,37 +0,0 @@ - diff --git a/web/src/layouts/BaseLayout.astro b/web/src/layouts/BaseLayout.astro deleted file mode 100644 index c44f966d..00000000 --- a/web/src/layouts/BaseLayout.astro +++ /dev/null @@ -1,53 +0,0 @@ ---- -import dayjs from "dayjs"; -import { revision, shortRevision } from "../lib/commit"; -import "@fontsource-variable/noto-sans"; -const buildDate = dayjs(); ---- - - - - - - - - SPRAS Benchmark Results - - - - - -
-
- -
- -
- - diff --git a/web/src/lib/commit.ts b/web/src/lib/commit.ts deleted file mode 100644 index 0d85c64b..00000000 --- a/web/src/lib/commit.ts +++ /dev/null @@ -1,6 +0,0 @@ -import { execSync } from "child_process"; - -const decoder = new TextDecoder(); - -export const revision = decoder.decode(execSync("git rev-parse HEAD")).trim(); -export const shortRevision = revision.substring(0, 6); diff --git a/web/src/lib/dataset.ts b/web/src/lib/dataset.ts deleted file mode 100644 index d14e8699..00000000 --- a/web/src/lib/dataset.ts +++ /dev/null @@ -1,13 +0,0 @@ -import { parse } from "yaml"; - -import dmmmYaml from "../../public/data/configs/dmmm.yaml?raw"; -import praYaml from "../../public/data/configs/dmmm.yaml?raw"; - -const configs: Record> = { - dmmm: parse(dmmmYaml), - pra: parse(praYaml), -}; - -export const datasets = Object.entries(configs) - .map(([type, entry]) => (entry["datasets"] as Record[]).map((dataset) => ({ ...dataset, type }))) - .flat(); diff --git a/web/src/lib/outputStyle.ts b/web/src/lib/outputStyle.ts deleted file mode 100644 index 4c848d3b..00000000 --- a/web/src/lib/outputStyle.ts +++ /dev/null @@ -1,123 +0,0 @@ -interface Output { - dataType: string; - datasetCategory: string; - datasetName?: string; - algorithm: string; - paramsHash: string; -} - -function extractPrefix(name: string, prefixName: string, prefixes: string[]): { prefix: string; name: string } { - const foundPrefix = prefixes.find((prefix) => name.startsWith(prefix)); - - if (!foundPrefix) { - throw new Error(`${name} should begin with a ${prefixName} (one of ${prefixes})`); - } - - return { - prefix: foundPrefix, - name: name.substring(foundPrefix.length), - }; -} - -const dataTypes = ["pra", "dmmm"]; - -export function extractDatasetType(name: string): { type: string; name: string } { - const { prefix, name: newName } = extractPrefix(name, "dataset type", dataTypes); - return { type: prefix, name: newName }; -} - -const dataCategories = { - diseases: { - name: "DISEASES", - directory: "diseases", - }, - depmap: { - name: "DepMap", - directory: "depmap", - }, - hiv: { - name: "HIV", - directory: "hiv", - }, - rn: { - name: "ResponseNet", - directory: "rn_muscle_skeletal", - }, - yeast: { - name: "Yeast", - directory: "yeast_osmotic_stress", - }, - egfr: { - name: "EGFR", - directory: "egfr" - } -}; - -// TODO: replace this once we have proper dataset categories -export function extractDatasetCategory(name: string): { category: string; name: string } { - const { prefix, name: newName } = extractPrefix(name, "dataset category", Object.keys(dataCategories)); - return { category: prefix, name: newName.slice(1) }; -} - -export function parseOutputString(str: string): Output { - const components = str.split("-"); - let dataType: string | undefined; - let datasetCategory: string | undefined; - let datasetName: string | undefined; - let algorithm: string | undefined; - let paramsHash: string | undefined; - - if (components.length === 5) { - // This is a slug URL (type-...) - [dataType, datasetCategory, datasetName, algorithm, paramsHash] = components; - } else if (components.length === 4) { - // This is also a slug URL w/o a name - [dataType, datasetCategory, algorithm, paramsHash] = components; - } else if (components.length === 3) { - // This is fetched straight from the folder - we ignored -params previously - [datasetName, algorithm, paramsHash] = components; - } else { - throw new Error(`Unexpected length of components in ${components}.`); - } - - // We didn't get a data type in the first passthrough - lets extract the data - // type from the name - if (!dataType || !datasetCategory) { - if (!datasetName) throw new Error(`datasetName ${datasetName} isn't set - this is an internal error.`); - const { type, name: name1 } = extractDatasetType(datasetName); - const { category, name } = extractDatasetCategory(name1); - dataType = type; - datasetCategory = category; - datasetName = name; - } - - return { - dataType, - datasetCategory, - datasetName, - algorithm, - paramsHash, - }; -} - -export function addOptional(name: string | undefined, settings: { prefix?: string; suffix?: string }): string { - return name ? `${settings.prefix ?? ""}${name}${settings.suffix ?? ""}` : ""; -} - -export function styleOutput(output: Output): string { - return `${output.dataType}-${output.datasetCategory}-${addOptional(output.datasetName, { suffix: "-" })}${output.algorithm}-${output.paramsHash}`; -} - -export function asFolderName(output: Output): string { - return `${output.dataType}${output.datasetCategory}${addOptional(output.datasetName, { prefix: "_" })}-${output.algorithm}-params-${output.paramsHash}`; -} - -export function algorithmDocumentationUrl(algorithm: string): string { - const map: Record = { - omicsintegrator1: "oi1", - omicsintegrator2: "oi2", - }; - - const foundAlgorithm = algorithm in map ? map[algorithm] : algorithm; - return `https://spras.readthedocs.io/en/latest/prms/${foundAlgorithm}.html`; -} diff --git a/web/src/lib/paths.ts b/web/src/lib/paths.ts deleted file mode 100644 index 9048e8e4..00000000 --- a/web/src/lib/paths.ts +++ /dev/null @@ -1,19 +0,0 @@ -import { extractDatasetCategory, extractDatasetType } from "./outputStyle"; -import { globSync } from 'glob' - -export function getDataFiles(): string[] { - // We prefer this over import.meta.glob, as import.meta.glob currently - // leads to OOM for large raw imports, and OOM is especially plausible on CD. - const dataFiles = globSync("public/data/output/**"); - return dataFiles.map((path) => path.substring("public/data/output/".length)); -} - -export function getDatasets() { - const files = getDataFiles(); - return files - .filter((file) => file.startsWith("logs/datasets-")) - .map((file) => file.substring("logs/datasets-".length)) - .map((file) => file.slice(0, -".yaml".length)) - .map((file) => extractDatasetType(file)) - .map(({ type, name }) => ({ type, ...extractDatasetCategory(name) })); -} diff --git a/web/src/pages/[uid]/index.astro b/web/src/pages/[uid]/index.astro deleted file mode 100644 index 9037417c..00000000 --- a/web/src/pages/[uid]/index.astro +++ /dev/null @@ -1,90 +0,0 @@ ---- -import BaseLayout from "../../layouts/BaseLayout.astro"; -import Visualization from "../../components/Visualization.astro"; -import { - addOptional, - algorithmDocumentationUrl, - asFolderName, - parseOutputString, - styleOutput, -} from "../../lib/outputStyle"; -import { getDataFiles } from "../../lib/paths"; -import { Code } from "astro:components"; - -export function getStaticPaths() { - const filteredPaths = new Set( - getDataFiles() - // We can safely filter for these prefixes, as datasets start with their type. - // Specifically, we do not want to make pages for our prepared inputs and logs. - .filter((path) => !path.startsWith("prepared")) - .filter((path) => !path.startsWith("logs")) - // Then, we don't want to make pages for our root-level files - .filter((path) => path.includes("/")) - // We specifically want the folder names - .map((path) => path.split("/")[0]) - // And we want to only have the dataset a-b-c-d params, not analysis ones - .filter((path) => path.split("-").length === 4) - // Then, we exclude -params- - .map((path) => path.replace("-params", "")), - ); - - return [...filteredPaths].map((path) => ({ params: { uid: styleOutput(parseOutputString(path)) } })); -} - -const { uid } = Astro.params; -const output = parseOutputString(uid); -// We get the raw files associated to this specific run -const subPaths = getDataFiles().filter((path) => path.startsWith(asFolderName(output))); - -// The parameter config content -const parametersCode = ( - await import(`../../../../output/logs/parameters-${output.algorithm}-params-${output.paramsHash}.yaml?raw`) -).default; - -// The interactome content -const interactome = (await import(`../../../../output/${asFolderName(output)}/pathway.txt?raw`)).default; ---- - - - - -

{uid}

- - (go to home) - -

Parameters

- - - - For information about the algorithm parameters, see the associated documentation page. For information about the dataset itself, go to the respective dataset page. - -

Output Files

- - - -

Visualization

- - -
diff --git a/web/src/pages/description.md b/web/src/pages/description.md deleted file mode 100644 index fb0d2c96..00000000 --- a/web/src/pages/description.md +++ /dev/null @@ -1,25 +0,0 @@ -# SPRAS Benchmarking - -[SPRAS](https://github.com/Reed-CompBio/spras) is a -utility software designed for performing [signaling pathway](https://en.wikipedia.org/wiki/Cell_signaling#Signal_transduction_pathways) reconstruction -with various algorithms. [SPRAS's documentation](https://spras.readthedocs.io/en/latest/) has more information about its inner workings. - -This benchmarking repository ([see the GitHub](https://github.com/Reed-CompBio/spras-benchmarking/)) is meant to display the performance -of all of the algorithms currently supported by SPRAS on signaling pathways and diseases (to test out DMMMs), -comparing their reconstructions with manually curated golden datasets, or synthetically provided datasets from various databases. - -All information provided is orchestrated through our GitHub Actions pipeline, and heavy processing is soon to be moved to [HTCondor](https://htcondor.org/). - -## Format - -Each run's slug has the type, the (dataset) category, the dataset, the -algorithm, and the parameters [hash](https://en.wikipedia.org/wiki/Hash_function). - -There are also pages related to different categories of these runs: - -- type-category-dataset -- type-category - -The type classifies a dataset and the algorithms it runs on. In this case, PRA datasets run on all algorithms, while disease module datasets only run on DMMM algorithms. - -The category classifies what provider a dataset comes from. diff --git a/web/src/pages/index.astro b/web/src/pages/index.astro deleted file mode 100644 index 35a8be75..00000000 --- a/web/src/pages/index.astro +++ /dev/null @@ -1,76 +0,0 @@ ---- -import Colors from "../components/Colors.astro"; -import BaseLayout from "../layouts/BaseLayout.astro"; -import { parseOutputString } from "../lib/outputStyle"; -import { getStaticPaths } from "./[uid]/index.astro"; - -import Description from "./description.md"; ---- - - - - - - - - - -

Runs ({getStaticPaths().length})

- -
diff --git a/web/src/pages/type-category-dataset/[...slug]/index.astro b/web/src/pages/type-category-dataset/[...slug]/index.astro deleted file mode 100644 index 01ec65f1..00000000 --- a/web/src/pages/type-category-dataset/[...slug]/index.astro +++ /dev/null @@ -1,45 +0,0 @@ ---- -import MediumZoom from "../../../components/MediumZoom.astro"; -import BaseLayout from "../../../layouts/BaseLayout.astro"; -import { getDatasets } from "../../../lib/paths"; - -export function getStaticPaths() { - return getDatasets().map(({ name, category, type }) => ({ - params: { slug: `${type}-${category}${name ? `-${name}` : ""}`, name, category, type }, - })); -} - -const { slug } = Astro.params; -const { type, category, name } = getStaticPaths().find((path) => path.params.slug == slug)!.params; ---- - - - - -

{type}-{category}{name ? `-${name}` : ""}

- - (go to home) -
- (go to type-category-datasets) - -

Principal Component Analysis

- - -

Jaccard Similarity Heatmap

- -
- - diff --git a/web/src/pages/type-category-dataset/index.astro b/web/src/pages/type-category-dataset/index.astro deleted file mode 100644 index 03383160..00000000 --- a/web/src/pages/type-category-dataset/index.astro +++ /dev/null @@ -1,29 +0,0 @@ ---- -import BaseLayout from "../../layouts/BaseLayout.astro"; -import { getStaticPaths } from "./[...slug]/index.astro"; -import Colors from "../../components/Colors.astro"; ---- - - - -

Type-Category-Datasets

- -

This contains analysis associated with datasets running on a particular algorithm type under some category.

- - -
diff --git a/web/src/pages/type-category/[type]-[category]/index.astro b/web/src/pages/type-category/[type]-[category]/index.astro deleted file mode 100644 index a4bb9f66..00000000 --- a/web/src/pages/type-category/[type]-[category]/index.astro +++ /dev/null @@ -1,27 +0,0 @@ ---- -import BaseLayout from "../../../layouts/BaseLayout.astro"; -import { getDatasets } from "../../../lib/paths"; - -export function getStaticPaths() { - return getDatasets() - .map((obj) => ({ ...obj, id: `${obj.type}-${obj.category}` })) - .filter( - (value, index, self) => - // Removing duplicate entries - index === self.findIndex((t) => t.id === value.id), - ) - .map(({ category, type }) => ({ - params: { category, type }, - })); -} - -const { category, type } = Astro.params; ---- - - -

{type}-{category}

- - (go to home) -
- (go to type-category) -
diff --git a/web/src/pages/type-category/index.astro b/web/src/pages/type-category/index.astro deleted file mode 100644 index 797f96c3..00000000 --- a/web/src/pages/type-category/index.astro +++ /dev/null @@ -1,24 +0,0 @@ ---- -import BaseLayout from "../../layouts/BaseLayout.astro"; -import { getStaticPaths } from "./[type]-[category]/index.astro"; -import Colors from "../../components/Colors.astro"; ---- - - - -

Type-Datasets

- -

This contains analysis associated with datasets running on a particular algorithm type.

- - -
diff --git a/web/tsconfig.json b/web/tsconfig.json deleted file mode 100644 index 8bf91d3b..00000000 --- a/web/tsconfig.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "extends": "astro/tsconfigs/strict", - "include": [".astro/types.d.ts", "**/*"], - "exclude": ["dist"] -}