Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 113 additions & 0 deletions .github/workflows/multi-omics-submission-test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
name: Integration - MARS PoC
on:
workflow_dispatch:
push:
branches: [ main ]
pull_request:
branches:
- main

jobs:
poc-mars-cli:
runs-on: ubuntu-latest

env:
# Where mars-cli will put .mars/
SETTINGS_DIR: ${{ github.workspace }}/mars_settings

# Paths for MARS repo and ISA template
REPOSITORY_SERVICES_PATH: ${{ github.workspace }}/MARS/repository-services
ISA_TEMPLATE_PATH: ${{ github.workspace }}/MARS/test-data/biosamples-input-isa.json

# Credentials from GitHub secrets
WEBIN_USERNAME: ${{ secrets.WEBIN_USERNAME }}
WEBIN_PASSWORD: ${{ secrets.WEBIN_PASSWORD }}
METABOLIGHTS_METADATA_USERNAME: ${{ secrets.METABOLIGHTS_METADATA_USERNAME }}
METABOLIGHTS_METADATA_PASSWORD: ${{ secrets.METABOLIGHTS_METADATA_PASSWORD }}
METABOLIGHTS_DATA_USERNAME: ${{ secrets.METABOLIGHTS_DATA_USERNAME }}
METABOLIGHTS_DATA_PASSWORD: ${{ secrets.METABOLIGHTS_DATA_PASSWORD }}

steps:
- name: Checkout MARS (for repository-services + test-data)
uses: actions/checkout@v5
with:
submodules: 'recursive'

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.13"

- name: Install mars-cli
run: |
pip install --upgrade pip
pip install .

- name: Start repository-services (docker compose up)
working-directory: ${{ env.REPOSITORY_SERVICES_PATH }}
run: |
docker compose up -d --build

- name: Wait for services to start
run: |
# Wait for both services to become healthy (max ~150s)
for i in {1..30}; do
echo "Health check attempt $i..."

ISAENA_OK=0
ISABIOSAMPLES_OK=0

curl -fsS http://localhost:8042/isaena >/dev/null && ISAENA_OK=1 || ISAENA_OK=0
curl -fsS http://localhost:8032/isabiosamples >/dev/null && ISABIOSAMPLES_OK=1 || ISABIOSAMPLES_OK=0

if [ "$ISAENA_OK" -eq 1 ] && [ "$ISABIOSAMPLES_OK" -eq 1 ]; then
echo "Both services are up ✅"
exit 0
fi

echo "Services not ready yet. isaena=$ISAENA_OK isabiosamples=$ISABIOSAMPLES_OK"
sleep 5
done

echo "Services did not become healthy in time ❌"
exit 1

- name: Prepare PoC - settings, ISA JSON, credentials, dataFiles
run: |
python scripts/prepare_poc_submission.py

- name: Run mars-cli submit
run: |
set -euo pipefail

# Build list of --data-files arguments from generated .fastq.gz files
DATA_ARGS=()
for f in poc_work/data/*.fastq.gz; do
echo "Using data file: ./$f"
DATA_ARGS+=( "./$f" )
done

echo "Running mars-cli with data args: $DATA_ARGS"

mars-cli --development submit \
--submit-to-metabolights False \
--file-transfer ftp \
--data-files $DATA_ARGS \
--credentials-file ./poc_work/credentials.json \
./poc_work/isa.json

- name: Upload run artifacts
uses: actions/upload-artifact@v4
with:
name: run-artifacts
path: |
poc_work/
mars_settings/
output_*.json

- name: Stop repository-services (docker compose down)
if: always()
working-directory: ${{ env.REPOSITORY_SERVICES_PATH }}
run: |
docker compose down --volumes

Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Test MARS
name: Test mars-cli
on:
push:
paths:
Expand Down
2 changes: 1 addition & 1 deletion MARS
Submodule MARS updated 80 files
+4 −1 .github/workflows/build-ena-service.yml
+37 −0 .github/workflows/deploy-astro.yml
+0 −56 .github/workflows/test-mars.yml
+1 −53 README.md
+24 −0 docs/.gitignore
+13 −0 docs/astro.config.mjs
+5,666 −0 docs/package-lock.json
+16 −0 docs/package.json
+ docs/public/favicon.png
+ docs/src/assets/logo-large.png
+ docs/src/assets/logo-small.png
+16 −0 docs/src/components/Footer.astro
+7 −0 docs/src/components/Home.astro
+32 −0 docs/src/components/NavBar.astro
+13 −0 docs/src/components/SchemaProperty.astro
+29 −0 docs/src/components/SchemaTable.astro
+14 −0 docs/src/content/config.ts
+29 −0 docs/src/layouts/Layout.astro
+8 −0 docs/src/pages/index.astro
+19 −0 docs/src/pages/mars-isa-json.astro
+1 −0 docs/src/styles/global.css
+5 −0 docs/tsconfig.json
+0 −2 mars-cli/.coveragerc
+0 −1 mars-cli/.python-version
+0 −367 mars-cli/README.md
+0 −1 mars-cli/_version.py
+0 −73 mars-cli/generate_config.py
+0 −271 mars-cli/mars_cli.py
+0 −0 mars-cli/mars_lib/__init__.py
+0 −149 mars-cli/mars_lib/authentication.py
+0 −64 mars-cli/mars_lib/biosamples-input-schema.json
+0 −298 mars-cli/mars_lib/biosamples_external_references.py
+0 −114 mars-cli/mars_lib/credential.py
+0 −64 mars-cli/mars_lib/ftp_upload.py
+0 −564 mars-cli/mars_lib/isa_json.py
+0 −54 mars-cli/mars_lib/logging.py
+0 −0 mars-cli/mars_lib/models/__init__.py
+0 −310 mars-cli/mars_lib/models/isa_json.py
+0 −67 mars-cli/mars_lib/models/repository_response.py
+0 −443 mars-cli/mars_lib/submit.py
+0 −147 mars-cli/mars_lib/target_repo.py
+0 −16 mars-cli/mars_lib/validation.py
+0 −2 mars-cli/pytest.ini
+0 −7 mars-cli/requirements.txt
+0 −58 mars-cli/setup.py
+0 −0 mars-cli/tests/__init__.py
+0 −92 mars-cli/tests/fixtures/SAMEA112654119.json
+0 −18 mars-cli/tests/fixtures/bad_credentials_file.json
+0 −2 mars-cli/tests/fixtures/bad_json.json
+0 −12 mars-cli/tests/fixtures/invalid_investigation.json
+0 −1,065 mars-cli/tests/fixtures/isa_jsons/0_Initial_ISA_JSON_in_model.json
+0 −1,083 mars-cli/tests/fixtures/isa_jsons/1_after_biosamples.json
+0 −1,118 mars-cli/tests/fixtures/isa_jsons/2_after_ena.json
+0 −57 mars-cli/tests/fixtures/mars_receipts/biosamples_success_response.json
+0 −119 mars-cli/tests/fixtures/mars_receipts/ena_success_response.json
+0 −45 mars-cli/tests/fixtures/mars_receipts/failure_reponse.json
+0 −14 mars-cli/tests/fixtures/max_credentials_file.json
+0 −6 mars-cli/tests/fixtures/min_credentials_file.json
+0 −12 mars-cli/tests/fixtures/minimal_valid_investigation.json
+0 −1 mars-cli/tests/fixtures/not_a_json_file.txt
+0 −67 mars-cli/tests/test_authentication.py
+0 −160 mars-cli/tests/test_biosample_external_references.py
+0 −19 mars-cli/tests/test_credential_manager.py
+0 −14 mars-cli/tests/test_credentials_example.json
+0 −29 mars-cli/tests/test_ftp_upload.py
+0 −26 mars-cli/tests/test_input.json
+0 −376 mars-cli/tests/test_isa_json.py
+3 −0 repository-services/isajson-ena/.gitignore
+22 −14 repository-services/isajson-ena/Dockerfile
+8 −1 repository-services/isajson-ena/build.gradle
+2 −2 ...isajson-ena/src/main/java/com/elixir/biohackaton/ISAToSRA/controller/WebinIsaToXmlSubmissionController.java
+8 −8 ...tory-services/isajson-ena/src/main/java/com/elixir/biohackaton/ISAToSRA/sra/service/MarsReceiptService.java
+2 −2 ...y-services/isajson-ena/src/main/java/com/elixir/biohackaton/ISAToSRA/sra/service/SRAAnalysisXmlCreator.java
+2 −2 ...rvices/isajson-ena/src/main/java/com/elixir/biohackaton/ISAToSRA/sra/service/WebinExperimentXmlCreator.java
+1 −1 ...-services/isajson-ena/src/main/java/com/elixir/biohackaton/ISAToSRA/sra/service/WebinProjectXmlCreator.java
+1 −1 ...tory-services/isajson-ena/src/main/java/com/elixir/biohackaton/ISAToSRA/sra/service/WebinRunXmlCreator.java
+2 −2 ...ry-services/isajson-ena/src/main/java/com/elixir/biohackaton/ISAToSRA/sra/service/WebinStudyXmlCreator.java
+2 −2 repository-services/isajson-ena/src/test/java/com/elixir/biohackaton/ISAToSRA/EnaReceiptToMarsTest.java
+1,723 −0 schemas/isa_json.schema.json
+1,745 −0 schemas/isa_json_schema_with_validators.json
194 changes: 194 additions & 0 deletions scripts/isa_generator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
# tests/utils/isa_generator.py
from __future__ import annotations

from pathlib import Path
from datetime import datetime, UTC
import uuid
import json
import gzip
import hashlib
from typing import Any, List, Tuple


def _timestamp_suffix() -> str:
"""Unique suffix for this run (used for data file names)."""
return datetime.now(UTC).strftime("%Y%m%d%H%M%S") + "_" + uuid.uuid4().hex[:6]


def _write_dummy_fastq_gz(path: Path) -> None:
"""
Write a tiny dummy FASTQ dataset into a .fastq.gz file.
Content doesn't matter, as long as it's valid-ish FASTQ text.
"""
path.parent.mkdir(parents=True, exist_ok=True)
with gzip.open(path, "wt") as fh:
fh.write(
"@read1\n"
"ACGTACGTACGTACGT\n"
"+\n"
"FFFFFFFFFFFFFFFF\n"
)


def _md5_of_file(path: Path) -> str:
"""
Compute MD5 checksum of the given file (binary content).
"""
h = hashlib.md5()
with path.open("rb") as fh:
for chunk in iter(lambda: fh.read(8192), b""):
h.update(chunk)
return h.hexdigest()


def _get_first_assay(isa_obj: dict[str, Any]) -> dict[str, Any] | None:
"""
Navigate to investigation.studies[0].assays[0] (if present).
"""
inv = isa_obj.get("investigation")
if inv is None:
inv = isa_obj

if not isinstance(inv, dict):
return None

studies = inv.get("studies") or []
if not isinstance(studies, list) or not studies:
return None

first_study = studies[0]
if not isinstance(first_study, dict):
return None

assays = first_study.get("assays") or []
if not isinstance(assays, list) or not assays:
return None

first_assay = assays[0]
if not isinstance(first_assay, dict):
return None

return first_assay


def _ensure_comment(comments: List[dict[str, Any]], name: str, value: str) -> None:
"""
Ensure there is a comment with the given name, updating it if it exists,
or appending a new one if not.
"""
for c in comments:
if isinstance(c, dict) and c.get("name") == name:
c["value"] = value
return
comments.append({"name": name, "value": value})


def _update_datafiles_with_generated_files(
assay: dict[str, Any],
data_dir: Path,
n_files: int,
) -> List[Path]:
"""
For the first assay, update its dataFiles entries with newly generated .fastq.gz files.

Behaviour per dataFiles[i] (for i < n_files):

- Generate a unique .fastq.gz file based on the existing 'name':
e.g. ENA_TEST2.R2.fastq.gz -> ENA_TEST2.R2_<suffix>.fastq.gz
(if name doesn't end with .fastq.gz, just append _<suffix>.fastq.gz)

- Write a dummy FASTQ into that file and compute its MD5.

- Update the dataFiles[i] object:
* "name" = new file name
* in "comments":
- "file name" -> new file name
- "file type" -> "fastq"
- "file checksum" -> MD5 of the .fastq.gz
- "checksum_method" -> "MD5"
(existing "accession", "submission date", etc. are kept as-is)
"""
data_files_json = assay.get("dataFiles") or []
if not isinstance(data_files_json, list):
return []

generated_paths: List[Path] = []
suffix = _timestamp_suffix()

# We only touch up to n_files entries, and only those that look like objects
for i, df_json in enumerate(data_files_json):
if i >= n_files:
break
if not isinstance(df_json, dict):
continue

original_name = df_json.get("name")
if not isinstance(original_name, str) or not original_name:
continue

# Build unique .fastq.gz name
if original_name.endswith(".fastq.gz"):
base = original_name[:-len(".fastq.gz")]
new_name = f"{base}_{suffix}.fastq.gz"
else:
new_name = f"{original_name}_{suffix}.fastq.gz"

file_path = data_dir / new_name
_write_dummy_fastq_gz(file_path)
md5 = _md5_of_file(file_path)

# Update the JSON entry
df_json["name"] = new_name

comments = df_json.get("comments")
if not isinstance(comments, list):
comments = []
df_json["comments"] = comments

_ensure_comment(comments, "file name", new_name)
_ensure_comment(comments, "file type", "fastq")
_ensure_comment(comments, "file checksum", md5)
_ensure_comment(comments, "checksum_method", "MD5")
# DO NOT touch 'accession' or 'submission date' if present

generated_paths.append(file_path)

return generated_paths


def generate_isa_json_with_data(
work_dir: Path,
template_path: Path,
n_files: int = 2,
) -> Tuple[Path, List[Path]]:
"""
PoC behaviour:

1. Load ISA-JSON template from template_path.
2. Find investigation.studies[0].assays[0].dataFiles.
3. For up to n_files entries in dataFiles, generate UNIQUE .fastq.gz files
and update:
- dataFiles[i]["name"]
- dataFiles[i]["comments"] entries for file name, type, checksum, method.
4. Write the resulting ISA-JSON to work_dir / 'isa.json'.

We DO NOT change other identifiers or comments (including 'target_repository').
"""
work_dir.mkdir(parents=True, exist_ok=True)

isa_obj = json.loads(template_path.read_text())

assay = _get_first_assay(isa_obj)
generated_paths: List[Path] = []
if assay is not None:
data_dir = work_dir / "data"
generated_paths = _update_datafiles_with_generated_files(
assay=assay,
data_dir=data_dir,
n_files=n_files,
)

isa_path = work_dir / "isa.json"
isa_path.write_text(json.dumps(isa_obj, indent=2))

return isa_path, generated_paths
Loading
Loading