diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile deleted file mode 100644 index 8bb8e28f..00000000 --- a/.devcontainer/Dockerfile +++ /dev/null @@ -1 +0,0 @@ -FROM mcr.microsoft.com/devcontainers/anaconda:1-3 diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json deleted file mode 100644 index 4dda9426..00000000 --- a/.devcontainer/devcontainer.json +++ /dev/null @@ -1,23 +0,0 @@ -// Small devcontainer which loads anaconda. All postinstallation steps have to be done manually. -// This comes with snakemake and docker-in-docker. - -// For format details, see https://aka.ms/devcontainer.json. For config options, see the -// README at: https://github.com/devcontainers/templates/tree/main/src/anaconda -{ - "name": "Anaconda (Python 3)", - "build": { - "context": "..", - "dockerfile": "Dockerfile" - }, - "features": { - "ghcr.io/devcontainers/features/docker-in-docker:2": {}, - // For yamlfmt - "ghcr.io/devcontainers/features/go:1": {}, - // For web display - "ghcr.io/devcontainers/features/node:1": {}, - // For scripting - "ghcr.io/va-h/devcontainers-features/uv:1": {}, - // For paxtools - "ghcr.io/devcontainers/features/java:1": {} - } -} diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml deleted file mode 100644 index 5b22e90c..00000000 --- a/.github/workflows/publish.yml +++ /dev/null @@ -1,114 +0,0 @@ -name: Test SPRAS - -on: - pull_request: - branches: [main] - push: - branches: [main] - -# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages -permissions: - contents: read - pages: write - id-token: write - -jobs: - pre-commit: - name: Run pre-commit checks - runs-on: ubuntu-latest - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - name: Setup Python - uses: actions/setup-python@v4 - with: - python-version: '3.11' - - name: Run pre-commit checks - uses: pre-commit/action@v3.0.0 - checks: - name: Run workflow - runs-on: ubuntu-latest - steps: - - name: Checkout repository - uses: actions/checkout@v4 - with: - submodules: true - - name: Install uv for scripting - uses: astral-sh/setup-uv@v6.1.0 - with: - version: "0.7.13" - - name: Setup conda - uses: conda-incubator/setup-miniconda@v2 - with: - activate-environment: spras - environment-file: spras/environment.yml - auto-activate-base: false - miniconda-version: 'latest' - # Install spras in the environment using pip - - name: Install spras in conda env - shell: bash --login {0} - run: pip install ./spras - # Log conda environment contents - - name: Log conda environment - shell: bash --login {0} - run: conda list - - name: Fetch Artifact Cache - id: fetch-cache - uses: actions/cache/restore@v4 - with: - path: cache/artifacts - key: cache-artifacts - - name: Process raw data through Snakemake - run: sh run_snakemake.sh - - name: Cache Artifact Cache - id: cache-cache - uses: actions/cache/save@v4 - with: - path: cache/artifacts - key: cache-artifacts - - name: Run Snakemake workflow for DMMMs - shell: bash --login {0} - run: snakemake --cores 4 --configfile configs/dmmm.yaml --show-failed-logs -s spras/Snakefile - - name: Run Snakemake workflow for PRAs - shell: bash --login {0} - run: snakemake --cores 4 --configfile configs/pra.yaml --show-failed-logs -s spras/Snakefile - - name: Setup PNPM - uses: pnpm/action-setup@v4 - with: - version: 10 - - name: Install web dependencies - working-directory: ./web - run: pnpm install - - name: Run web builder - working-directory: ./web - run: pnpm build - - name: Upload built website distribution folder - uses: actions/upload-artifact@v4 - with: - name: build - path: web/dist - pages: - needs: checks - if: github.event_name != 'pull_request' - runs-on: ubuntu-latest - environment: - name: github-pages - url: ${{ steps.deployment.outputs.page_url }} - concurrency: - group: 'pages' - cancel-in-progress: true - steps: - - name: Download Artifacts - uses: actions/download-artifact@v4 - with: - name: build - path: dist - - name: Setup Pages - uses: actions/configure-pages@v2 - - name: Upload artifact - uses: actions/upload-pages-artifact@v3 - with: - path: dist - - name: Deploy to GitHub Pages - id: deployment - uses: actions/deploy-pages@v4 diff --git a/.gitignore b/.gitignore deleted file mode 100644 index 196f12a8..00000000 --- a/.gitignore +++ /dev/null @@ -1,169 +0,0 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -parts/ -sdist/ -var/ -wheels/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -cover/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -.pybuilder/ -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -# For a library or package, you might want to ignore these files since the code is -# intended to run in multiple environments; otherwise, check them in: -# .python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# poetry -# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. -# This is especially recommended for binary packages to ensure reproducibility, and is more -# commonly ignored for libraries. -# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control -#poetry.lock - -# pdm -# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. -#pdm.lock -# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it -# in version control. -# https://pdm.fming.dev/latest/usage/project/#working-with-version-control -.pdm.toml -.pdm-python -.pdm-build/ - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# pytype static type analyzer -.pytype/ - -# Cython debug symbols -cython_debug/ - -# PyCharm -.idea/ - -# Snakemake -.snakemake - -# Output -/output -/web/output - -# pnpm -.pnpm-store - -# mac -.DS_Store diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index 11f80da4..00000000 --- a/.gitmodules +++ /dev/null @@ -1,3 +0,0 @@ -[submodule "spras"] - path = spras - url = https://github.com/Reed-CompBio/spras diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml deleted file mode 100644 index f687b4c2..00000000 --- a/.pre-commit-config.yaml +++ /dev/null @@ -1,33 +0,0 @@ -# See https://pre-commit.com for more information -# See https://pre-commit.com/hooks.html for more hooks -default_language_version: - # Match this to the version specified in environment.yml - python: python3.11 -repos: - - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 # Use the ref you want to point at - hooks: - # Attempts to load all yaml files to verify syntax. - - id: check-yaml - # Attempts to load all TOML files to verify syntax. - - id: check-toml - # Trims trailing whitespace. - - id: trailing-whitespace - # Preserves Markdown hard linebreaks. - args: [--markdown-linebreak-ext=md] - # Do not trim whitespace from all files, input files may need trailing whitespace for empty values in columns. - types_or: [markdown, python, yaml] - # Skip this Markdown file, which has an example of an input text file within it. - exclude: input/README.md - - repo: https://github.com/astral-sh/ruff-pre-commit - rev: 'v0.15.4' - hooks: - - id: ruff - - repo: https://github.com/google/yamlfmt - rev: v0.17.0 - hooks: - - id: yamlfmt - - repo: https://github.com/crate-ci/typos - rev: v1.34.0 - hooks: - - id: typos diff --git a/.python-version b/.python-version deleted file mode 100644 index 24ee5b1b..00000000 --- a/.python-version +++ /dev/null @@ -1 +0,0 @@ -3.13 diff --git a/.vscode/extensions.json b/.vscode/extensions.json deleted file mode 100644 index 22a15055..00000000 --- a/.vscode/extensions.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "recommendations": ["astro-build.astro-vscode"], - "unwantedRecommendations": [] -} diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index bebd33fc..00000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "editor.rulers": [ - 150 - ] -} \ No newline at end of file diff --git a/.yamlfmt.yaml b/.yamlfmt.yaml deleted file mode 100644 index 9d3236aa..00000000 --- a/.yamlfmt.yaml +++ /dev/null @@ -1,2 +0,0 @@ -formatter: - retain_line_breaks_single: true diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md deleted file mode 100644 index b6766532..00000000 --- a/CONTRIBUTING.md +++ /dev/null @@ -1,132 +0,0 @@ -# Contributing - -## Helping Out - -There are `TODOs` that better enhance the reproducibility and accuracy of datasets or analysis of algorithm outputs, as well as -[open resolvable issues](https://github.com/Reed-CompBio/spras-benchmarking/). - -## Adding a dataset - -**Check that your data provider isn't already a dataset in `datasets`.** There are some datasets that are able to serve more data, and only use -a subset of it: these datasets can be extended for your needs. - -The goal of a dataset is to take raw data and produce data to be fed to SPRAS. -We'll follow along with `datasets/contributing`. This mini-tutorial assumes that you already have familiarity with SPRAS -[as per its contributing guide](https://spras.readthedocs.io/en/latest/contributing/index.html). - -### Uploading raw data - -This is a fake dataset: the data can be generated by running `datasets/contributing/raw_generation.py`, where the following artifacts will output: -- `sources.txt` -- `targets.txt` -- `gold-standard.tsv` -- `interactome.tsv` - -Unlike in this example, the data used in other datasets comes from other sources (whether that's supplementary info in a paper, or out of -biological databases like UniProt.) These artifacts can be large, and occasionally update, so we store them in Google Drive for caching and download -them when we want to reconstruct a dataset. - -Note that the four artifacts above change every time `raw_generation.py` is run. Upload those artifacts to Google Drive in a folder of your choice. -Share the file and allow for _Anyone with the link_ to _View_ the file. - -Once shared, copying the URL should look something like: - -> https://drive.google.com/file/d/1Agte0Aezext-8jLhGP4GmaF3tS7gHX-h/view?usp=sharing - -We always drop the entire `/view?...` suffix, and replace `/file/d/` with `/uc?id=`, which turns the URL to a direct download link, which is internally -downloaded with [gdown](https://github.com/wkentaro/gdown). Those post-processing steps should make the URL now look as so: - -> https://drive.google.com/uc?id=1Agte0Aezext-8jLhGP4GmaF3tS7gHX-h - -Now, add a directive to `cache/directory.py` under `Contributing`. Since this doesn't have an online URL, this should use `CacheItem.cache_only`, to -indicate that no other online database serves this URL. - -Your new directive under the `directory` dictionary should look something as so, with one entry for every artifact: - -```python -..., -"Contributing": { - "interactome.tsv": CacheItem.cache_only( - name="Randomly-generated contributing interactome", - cached="https://drive.google.com/uc?id=..." - ), - ... -} -``` - -### Setting up a workflow - -Now, we need to make these files SPRAS-compatible. To do this, we'll set up a `Snakefile`, which will handle: -- Artifact downloading -- Script running. - -`sources.txt` and `targets.txt` are already in a SPRAS-ready format, but we need to process `gold-standard.tsv` and `interactome.tsv`. - -Create a `Snakefile` under your dataset with the top-level directives: - -```python -# This provides the `produce_fetch_rules` util to allows us to automatically fetch the Google Drive data. -include: "../../cache/Snakefile" - -rule all: - input: - # The two files we will be passing to SPRAS - "raw/sources.txt", - "raw/targets.txt", - # The two files we will be processing - "processed/gold-standard.tsv", - "processed/interactome.tsv" -``` - -We'll generate four `fetch` rules, or rules that tell Snakemake to download the data we uploaded to Google Drive earlier. - -```python -produce_fetch_rules({ - # The value array is a path into the dictionary from `cache/directory.py`. - "raw/sources.txt": ["Contributing", "sources.txt"], - # and so on for targets, gold-standard, and interactome: - # note that excluding these three stops the Snakemake file from working by design! - ... -}) -``` - -Create two scripts that make `gold-standard.tsv` and `interactome.tsv` SPRAS-ready, consulting -the [SPRAS file format documentation](https://spras.readthedocs.io/en/latest/output.html). You can use any dependencies inside the top-level -`pyproject.toml`, and you can test out your scripts with `uv run diff --git a/web/src/components/Visualization.astro b/web/src/components/Visualization.astro deleted file mode 100644 index a9b2dae6..00000000 --- a/web/src/components/Visualization.astro +++ /dev/null @@ -1,47 +0,0 @@ ---- -import VisualizationScript from "./VisualizationScript.astro"; - -interface Props { - interactome: string; -} - -const { interactome } = Astro.props; - -const edgeLimit = 300; - -const noHeaderInteractomeArray = interactome.trim().split("\n").slice(1); -const noHeaderInteractome = noHeaderInteractomeArray.length > edgeLimit ? "BIG" : noHeaderInteractomeArray.join("\n"); ---- - - - -{ - noHeaderInteractome === "" ? ( -
There is nothing to visualize.
- ) : ( -- There are {noHeaderInteractomeArray.length} edges, which is over the {edgeLimit} edge maximum. Visualizing it - may lag your machine, and may also not be visually meaningful. If you do want to see this data visualized, using - the local Cytoscape analyzer SPRAS has may be a better option. -
-
-
- For information about the algorithm parameters, see the associated documentation page. For information about the dataset itself, go to the respective dataset page.
-
- type, the (dataset) category, the dataset, the
-algorithm, and the parameters [hash](https://en.wikipedia.org/wiki/Hash_function).
-
-There are also pages related to different categories of these runs:
-
-- type-category-dataset
-- type-category
-
-The type classifies a dataset and the algorithms it runs on. In this case, PRA datasets run on all algorithms, while disease module datasets only run on DMMM algorithms.
-
-The category classifies what provider a dataset comes from.
diff --git a/web/src/pages/index.astro b/web/src/pages/index.astro
deleted file mode 100644
index 35a8be75..00000000
--- a/web/src/pages/index.astro
+++ /dev/null
@@ -1,76 +0,0 @@
----
-import Colors from "../components/Colors.astro";
-import BaseLayout from "../layouts/BaseLayout.astro";
-import { parseOutputString } from "../lib/outputStyle";
-import { getStaticPaths } from "./[uid]/index.astro";
-
-import Description from "./description.md";
----
-
-
-
-
-
-This contains analysis associated with datasets running on a particular algorithm type under some category.
- -This contains analysis associated with datasets running on a particular algorithm type.
- -