diff --git a/conformance-test.sh b/conformance-test.sh index 6df14a63c..852687557 100755 --- a/conformance-test.sh +++ b/conformance-test.sh @@ -65,8 +65,8 @@ elif [ "${CONTAINER}" == "docker" ]; then elif [ "${CONTAINER}" == "podman" ]; then podman pull docker.io/node:slim elif [ "${CONTAINER}" == "singularity" ]; then - export CWL_SINGULARITY_CACHE="$SCRIPT_DIRECTORY/sifcache" - mkdir --parents "${CWL_SINGULARITY_CACHE}" + export CWL_SINGULARITY_IMAGES="$SCRIPT_DIRECTORY/sifcache" + mkdir --parents "${CWL_SINGULARITY_IMAGES}" fi # Setup environment diff --git a/cwltool/argparser.py b/cwltool/argparser.py index 08001c4e7..5b2437db2 100644 --- a/cwltool/argparser.py +++ b/cwltool/argparser.py @@ -366,6 +366,16 @@ def arg_parser() -> argparse.ArgumentParser: help="Do not try to pull Docker images", dest="pull_image", ) + container_group.add_argument( + "--singularity-sandbox-path", + default=None, + type=str, + help="Singularity/Apptainer sandbox image base path. " + "Will use a pre-existing sandbox image. " + "Will be prepended to the dockerPull path. " + "Equivalent to use CWL_SINGULARITY_IMAGES variable. ", + dest="image_base_path", + ) container_group.add_argument( "--force-docker-pull", action="store_true", diff --git a/cwltool/context.py b/cwltool/context.py index 4e106ac48..de383da4e 100644 --- a/cwltool/context.py +++ b/cwltool/context.py @@ -151,6 +151,7 @@ def __init__(self, kwargs: dict[str, Any] | None = None) -> None: self.streaming_allowed: bool = False self.singularity: bool = False + self.image_base_path: str | None = None self.podman: bool = False self.debug: bool = False self.compute_checksum: bool = True diff --git a/cwltool/docker.py b/cwltool/docker.py index 69fecc830..8734ea36e 100644 --- a/cwltool/docker.py +++ b/cwltool/docker.py @@ -201,6 +201,7 @@ def get_from_requirements( pull_image: bool, force_pull: bool, tmp_outdir_prefix: str, + image_base_path: str | None = None, ) -> str | None: if not shutil.which(self.docker_exec): raise WorkflowException(f"{self.docker_exec} executable is not available") diff --git a/cwltool/job.py b/cwltool/job.py index c46778b35..c0c8dd130 100644 --- a/cwltool/job.py +++ b/cwltool/job.py @@ -627,6 +627,7 @@ def get_from_requirements( pull_image: bool, force_pull: bool, tmp_outdir_prefix: str, + image_base_path: str | None = None, ) -> str | None: pass @@ -788,6 +789,7 @@ def run( runtimeContext.pull_image, runtimeContext.force_docker_pull, runtimeContext.tmp_outdir_prefix, + runtimeContext.image_base_path, ) ) if img_id is None: diff --git a/cwltool/singularity.py b/cwltool/singularity.py index 76f1fc488..d5e78ef4d 100644 --- a/cwltool/singularity.py +++ b/cwltool/singularity.py @@ -1,5 +1,6 @@ """Support for executing Docker format containers using Singularity {2,3}.x or Apptainer 1.x.""" +import json import logging import os import os.path @@ -7,9 +8,10 @@ import shutil import sys from collections.abc import Callable, MutableMapping -from subprocess import check_call, check_output # nosec +from subprocess import check_call, check_output, run # nosec from typing import cast +from mypy_extensions import mypyc_attr from schema_salad.sourceline import SourceLine from spython.main import Client from spython.main.parse.parsers.docker import DockerParser @@ -145,6 +147,30 @@ def _normalize_sif_id(string: str) -> str: return string.replace("/", "_") + ".sif" +@mypyc_attr(allow_interpreted_subclasses=True) +def _inspect_singularity_image(path: str) -> bool: + """Inspect singularity image to be sure it is not an empty directory.""" + cmd = [ + "singularity", + "inspect", + "--json", + path, + ] + try: + result = run(cmd, capture_output=True, text=True) # nosec + except Exception: + return False + + if result.returncode == 0: + try: + output = json.loads(result.stdout) + except json.JSONDecodeError: + return False + if output.get("data", {}).get("attributes", {}): + return True + return False + + class SingularityCommandLineJob(ContainerCommandLineJob): def __init__( self, @@ -164,6 +190,7 @@ def get_image( pull_image: bool, tmp_outdir_prefix: str, force_pull: bool = False, + sandbox_base_path: str | None = None, ) -> bool: """ Acquire the software container image in the specified dockerRequirement. @@ -185,11 +212,21 @@ def get_image( elif is_version_2_6() and "SINGULARITY_PULLFOLDER" in os.environ: cache_folder = os.environ["SINGULARITY_PULLFOLDER"] + if "CWL_SINGULARITY_IMAGES" in os.environ: + image_base_path = os.environ["CWL_SINGULARITY_IMAGES"] + else: + image_base_path = cache_folder if cache_folder else "" + + if not sandbox_base_path: + sandbox_base_path = os.path.abspath(image_base_path) + else: + sandbox_base_path = os.path.abspath(sandbox_base_path) + if "dockerFile" in dockerRequirement: - if cache_folder is None: # if environment variables were not set - cache_folder = create_tmp_dir(tmp_outdir_prefix) + if image_base_path is None: # if environment variables were not set + image_base_path = create_tmp_dir(tmp_outdir_prefix) - absolute_path = os.path.abspath(cache_folder) + absolute_path = os.path.abspath(image_base_path) if "dockerImageId" in dockerRequirement: image_name = dockerRequirement["dockerImageId"] image_path = os.path.join(absolute_path, image_name) @@ -229,6 +266,15 @@ def get_image( ) found = True elif "dockerImageId" not in dockerRequirement and "dockerPull" in dockerRequirement: + # looking for local singularity sandbox image and handle it as a local image + sandbox_image_path = os.path.join(sandbox_base_path, dockerRequirement["dockerPull"]) + if os.path.isdir(sandbox_image_path) and _inspect_singularity_image(sandbox_image_path): + dockerRequirement["dockerImageId"] = dockerRequirement["dockerPull"] + _logger.info( + "Using local Singularity sandbox image found in %s", + sandbox_image_path, + ) + return True match = re.search(pattern=r"([a-z]*://)", string=dockerRequirement["dockerPull"]) img_name = _normalize_image_id(dockerRequirement["dockerPull"]) candidates.append(img_name) @@ -241,16 +287,26 @@ def get_image( if not match: dockerRequirement["dockerPull"] = "docker://" + dockerRequirement["dockerPull"] elif "dockerImageId" in dockerRequirement: + sandbox_image_path = os.path.join(sandbox_base_path, dockerRequirement["dockerImageId"]) if os.path.isfile(dockerRequirement["dockerImageId"]): found = True + # handling local singularity sandbox image + elif os.path.isdir(sandbox_image_path) and _inspect_singularity_image( + sandbox_image_path + ): + _logger.info( + "Using local Singularity sandbox image found in %s", + sandbox_image_path, + ) + return True candidates.append(dockerRequirement["dockerImageId"]) candidates.append(_normalize_image_id(dockerRequirement["dockerImageId"])) if is_version_3_or_newer(): candidates.append(_normalize_sif_id(dockerRequirement["dockerImageId"])) targets = [os.getcwd()] - if "CWL_SINGULARITY_CACHE" in os.environ: - targets.append(os.environ["CWL_SINGULARITY_CACHE"]) + if "CWL_SINGULARITY_IMAGES" in os.environ: + targets.append(os.environ["CWL_SINGULARITY_IMAGES"]) if is_version_2_6() and "SINGULARITY_PULLFOLDER" in os.environ: targets.append(os.environ["SINGULARITY_PULLFOLDER"]) for target in targets: @@ -268,10 +324,10 @@ def get_image( if (force_pull or not found) and pull_image: cmd: list[str] = [] if "dockerPull" in dockerRequirement: - if cache_folder: + if image_base_path: env = os.environ.copy() if is_version_2_6(): - env["SINGULARITY_PULLFOLDER"] = cache_folder + env["SINGULARITY_PULLFOLDER"] = image_base_path cmd = [ "singularity", "pull", @@ -286,14 +342,14 @@ def get_image( "pull", "--force", "--name", - "{}/{}".format(cache_folder, dockerRequirement["dockerImageId"]), + "{}/{}".format(image_base_path, dockerRequirement["dockerImageId"]), str(dockerRequirement["dockerPull"]), ] _logger.info(str(cmd)) check_call(cmd, env=env, stdout=sys.stderr) # nosec dockerRequirement["dockerImageId"] = "{}/{}".format( - cache_folder, dockerRequirement["dockerImageId"] + image_base_path, dockerRequirement["dockerImageId"] ) found = True else: @@ -348,6 +404,7 @@ def get_from_requirements( pull_image: bool, force_pull: bool, tmp_outdir_prefix: str, + image_base_path: str | None = None, ) -> str | None: """ Return the filename of the Singularity image. @@ -357,16 +414,24 @@ def get_from_requirements( if not bool(shutil.which("singularity")): raise WorkflowException("singularity executable is not available") - if not self.get_image(cast(dict[str, str], r), pull_image, tmp_outdir_prefix, force_pull): + if not self.get_image( + cast(dict[str, str], r), + pull_image, + tmp_outdir_prefix, + force_pull, + sandbox_base_path=image_base_path, + ): raise WorkflowException("Container image {} not found".format(r["dockerImageId"])) - if "CWL_SINGULARITY_CACHE" in os.environ: - cache_folder = os.environ["CWL_SINGULARITY_CACHE"] - img_path = os.path.join(cache_folder, cast(str, r["dockerImageId"])) + if "CWL_SINGULARITY_IMAGES" in os.environ: + image_base_path = os.environ["CWL_SINGULARITY_IMAGES"] + image_path = os.path.join(image_base_path, cast(str, r["dockerImageId"])) + elif image_base_path: + image_path = os.path.join(image_base_path, cast(str, r["dockerImageId"])) else: - img_path = cast(str, r["dockerImageId"]) + image_path = cast(str, r["dockerImageId"]) - return os.path.abspath(img_path) + return os.path.abspath(image_path) @staticmethod def append_volume(runtime: list[str], source: str, target: str, writable: bool = False) -> None: diff --git a/tests/sing_local_sandbox_img_id_test.cwl b/tests/sing_local_sandbox_img_id_test.cwl new file mode 100755 index 000000000..9c44a0cc5 --- /dev/null +++ b/tests/sing_local_sandbox_img_id_test.cwl @@ -0,0 +1,14 @@ +#!/usr/bin/env cwl-runner +cwlVersion: v1.0 +class: CommandLineTool + +requirements: + DockerRequirement: + dockerImageId: container_repo/alpine + +inputs: + message: string + +outputs: [] + +baseCommand: echo diff --git a/tests/sing_local_sandbox_test.cwl b/tests/sing_local_sandbox_test.cwl new file mode 100755 index 000000000..64d6f6b1c --- /dev/null +++ b/tests/sing_local_sandbox_test.cwl @@ -0,0 +1,14 @@ +#!/usr/bin/env cwl-runner +cwlVersion: v1.0 +class: CommandLineTool + +requirements: + DockerRequirement: + dockerPull: container_repo/alpine + +inputs: + message: string + +outputs: [] + +baseCommand: echo diff --git a/tests/test_docker.py b/tests/test_docker.py index 26534d541..30d0d0cf0 100644 --- a/tests/test_docker.py +++ b/tests/test_docker.py @@ -185,7 +185,7 @@ def test_podman_required_secfile(tmp_path: Path) -> None: def test_singularity_required_secfile(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: singularity_dir = tmp_path / "singularity" singularity_dir.mkdir() - monkeypatch.setenv("CWL_SINGULARITY_CACHE", str(singularity_dir)) + monkeypatch.setenv("CWL_SINGULARITY_IMAGES", str(singularity_dir)) result_code, stdout, stderr = get_main_output( [ @@ -247,7 +247,7 @@ def test_singularity_required_missing_secfile( ) -> None: singularity_dir = tmp_path / "singularity" singularity_dir.mkdir() - monkeypatch.setenv("CWL_SINGULARITY_CACHE", str(singularity_dir)) + monkeypatch.setenv("CWL_SINGULARITY_IMAGES", str(singularity_dir)) result_code, stdout, stderr = get_main_output( [ "--singularity", diff --git a/tests/test_iwdr.py b/tests/test_iwdr.py index 52da1fb5b..42cda6f81 100644 --- a/tests/test_iwdr.py +++ b/tests/test_iwdr.py @@ -275,7 +275,7 @@ def test_iwdr_permutations_singularity( twelfth.touch() outdir = str(tmp_path_factory.mktemp("outdir")) singularity_dir = str(tmp_path_factory.mktemp("singularity")) - monkeypatch.setenv("CWL_SINGULARITY_CACHE", singularity_dir) + monkeypatch.setenv("CWL_SINGULARITY_IMAGES", singularity_dir) err_code, stdout, _ = get_main_output( [ "--outdir", @@ -340,7 +340,7 @@ def test_iwdr_permutations_singularity_inplace( twelfth.touch() outdir = str(tmp_path_factory.mktemp("outdir")) singularity_dir = str(tmp_path_factory.mktemp("singularity")) - monkeypatch.setenv("CWL_SINGULARITY_CACHE", singularity_dir) + monkeypatch.setenv("CWL_SINGULARITY_IMAGES", singularity_dir) assert ( main( [ diff --git a/tests/test_js_sandbox.py b/tests/test_js_sandbox.py index 2c5df6339..def8e996c 100644 --- a/tests/test_js_sandbox.py +++ b/tests/test_js_sandbox.py @@ -109,7 +109,7 @@ def test_value_from_two_concatenated_expressions_singularity( factory.loading_context.debug = True factory.runtime_context.debug = True with monkeypatch.context() as m: - m.setenv("CWL_SINGULARITY_CACHE", str(singularity_cache)) + m.setenv("CWL_SINGULARITY_IMAGES", str(singularity_cache)) m.setenv("PATH", new_paths) echo = factory.make(get_data("tests/wf/vf-concat.cwl")) file = {"class": "File", "location": get_data("tests/wf/whale.txt")} diff --git a/tests/test_singularity.py b/tests/test_singularity.py index 1139dfbc7..42fc0d394 100644 --- a/tests/test_singularity.py +++ b/tests/test_singularity.py @@ -1,11 +1,18 @@ """Tests to find local Singularity image.""" +import json +import os import shutil +import subprocess +from collections.abc import Callable from pathlib import Path +from typing import Any import pytest +from mypy_extensions import KwArg, VarArg from cwltool.main import main +from cwltool.singularity import _inspect_singularity_image from .util import ( get_data, @@ -63,7 +70,7 @@ def test_singularity_workflow(tmp_path: Path) -> None: def test_singularity_iwdr(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: singularity_dir = tmp_path / "singularity" singularity_dir.mkdir() - monkeypatch.setenv("CWL_SINGULARITY_CACHE", str(singularity_dir)) + monkeypatch.setenv("CWL_SINGULARITY_IMAGES", str(singularity_dir)) result_code = main( [ "--singularity", @@ -159,3 +166,150 @@ def test_singularity3_docker_image_id_in_tool(tmp_path: Path) -> None: ] ) assert result_code1 == 0 + + +@needs_singularity +def test_singularity_local_sandbox_image(tmp_path: Path) -> None: + workdir = tmp_path / "working_dir" + workdir.mkdir() + # build a sandbox image + container_path = workdir / "container_repo" + container_path.mkdir() + cmd = [ + "singularity", + "build", + "--sandbox", + str(container_path / "alpine"), + "docker://alpine:latest", + ] + build = subprocess.run(cmd, capture_output=True, text=True) + if build.returncode == 0: + # test that we can work in sub directories + with working_directory(workdir): + result_code, _, _ = get_main_output( + [ + "--singularity", + "--disable-pull", + get_data("tests/sing_local_sandbox_test.cwl"), + "--message", + "hello", + ] + ) + assert result_code == 0 + result_code, _, _ = get_main_output( + [ + "--singularity", + "--disable-pull", + get_data("tests/sing_local_sandbox_img_id_test.cwl"), + "--message", + "hello", + ] + ) + assert result_code == 0 + # test with --singularity-sandbox-path option: + result_code, out, err = get_main_output( + [ + "--singularity", + "--disable-pull", + "--singularity-sandbox-path", + f"{workdir}", + get_data("tests/sing_local_sandbox_test.cwl"), + "--message", + "hello", + ] + ) + assert result_code == 0 + + # test with CWL_SINGULARITY_IMAGES env variable set: + os.environ["CWL_SINGULARITY_IMAGES"] = str(workdir) + result_code, _, _ = get_main_output( + [ + "--singularity", + "--disable-pull", + get_data("tests/sing_local_sandbox_test.cwl"), + "--message", + "hello", + ] + ) + assert result_code == 0 + else: + pytest.skip(f"Failed to build the singularity image: {build.stderr}") + + +@needs_singularity +def test_singularity_inspect_image(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + """Test inspect a real image works.""" + workdir = tmp_path / "working_dir" + workdir.mkdir() + repo_path = workdir / "container_repo" + image_path = repo_path / "alpine" + + # test image exists + repo_path.mkdir() + cmd = [ + "singularity", + "build", + "--sandbox", + str(image_path), + "docker://alpine:latest", + ] + build = subprocess.run(cmd, capture_output=True, text=True) + if build.returncode == 0: + # Verify the path is a correct container image + res_inspect = _inspect_singularity_image(str(image_path)) + assert res_inspect is True + else: + pytest.skip(f"singularity sandbox image build didn't worked: {build.stderr}") + + +class _DummyResult: # noqa: B903 + def __init__(self, rc: int, out: str) -> None: + self.returncode = rc + self.stdout = out + + +def _make_run_result( + returncode: int, stdout: str +) -> Callable[[VarArg(Any), KwArg(Any)], _DummyResult]: + """Mock subprocess.run returning returncode and stdout.""" + + def _runner(*args: Any, **kwargs: Any) -> _DummyResult: + return _DummyResult(returncode, stdout) + + return _runner + + +def test_json_decode_error_branch(monkeypatch: pytest.MonkeyPatch) -> None: + """Test json can't decode inspect result.""" + monkeypatch.setattr("cwltool.singularity.run", _make_run_result(0, "not-a-json")) + + def _raise_json_error(s: str) -> None: + # construct and raise an actual JSONDecodeError + raise json.JSONDecodeError("Expecting value", s, 0) + + monkeypatch.setattr("json.loads", _raise_json_error) + + assert _inspect_singularity_image("/tmp/image") is False + + +def test_singularity_sandbox_image_not_exists() -> None: + image_path = "/tmp/not_existing/image" + res_inspect = _inspect_singularity_image(image_path) + assert res_inspect is False + + +def test_singularity_sandbox_not_an_image(tmp_path: Path) -> None: + image_path = tmp_path / "image" + image_path.mkdir() + res_inspect = _inspect_singularity_image(str(image_path)) + assert res_inspect is False + + +def test_inspect_image_wrong_sb_call(monkeypatch: pytest.MonkeyPatch) -> None: + + def mock_failed_subprocess(*args: Any, **kwargs: Any) -> None: + raise subprocess.CalledProcessError(returncode=1, cmd=args[0]) + + monkeypatch.setattr("cwltool.singularity.run", mock_failed_subprocess) + res_inspect = _inspect_singularity_image("/tmp/container_repo/alpine") + assert res_inspect is False diff --git a/tests/test_tmpdir.py b/tests/test_tmpdir.py index 18a588cf8..168ce28a1 100644 --- a/tests/test_tmpdir.py +++ b/tests/test_tmpdir.py @@ -285,6 +285,99 @@ def test_dockerfile_singularity_build(monkeypatch: pytest.MonkeyPatch, tmp_path: shutil.rmtree(subdir) +@needs_singularity +def test_singularity_get_image_from_sandbox( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + """Test that SingularityCommandLineJob.get_image correctly handle sandbox image.""" + + (tmp_path / "out").mkdir(exist_ok=True) + tmp_outdir_prefix = tmp_path / "out" + tmp_outdir_prefix.mkdir(exist_ok=True) + (tmp_path / "tmp").mkdir(exist_ok=True) + tmpdir_prefix = str(tmp_path / "tmp") + runtime_context = RuntimeContext( + {"tmpdir_prefix": tmpdir_prefix, "user_space_docker_cmd": None} + ) + builder = Builder( + {}, + [], + [], + {}, + schema.Names(), + [], + [], + {}, + None, + None, + StdFsAccess, + StdFsAccess(""), + None, + 0.1, + True, + False, + False, + "no_listing", + runtime_context.get_outdir(), + runtime_context.get_tmpdir(), + runtime_context.get_stagedir(), + INTERNAL_VERSION, + "singularity", + ) + + workdir = tmp_path / "working_dir" + workdir.mkdir() + repo_path = workdir / "container_repo" + repo_path.mkdir() + image_path = repo_path / "alpine" + image_path.mkdir() + + # directory exists but is not an image + monkeypatch.setattr( + "cwltool.singularity._inspect_singularity_image", lambda *args, **kwargs: False + ) + req = {"class": "DockerRequirement", "dockerPull": f"{image_path}"} + res = SingularityCommandLineJob( + builder, {}, CommandLineTool.make_path_mapper, [], [], "" + ).get_image( + req, + pull_image=False, + tmp_outdir_prefix=str(tmp_outdir_prefix), + force_pull=False, + ) + assert req["dockerPull"].startswith("docker://") + assert res is False + + # directory exists and is an image: + monkeypatch.setattr( + "cwltool.singularity._inspect_singularity_image", lambda *args, **kwargs: True + ) + req = {"class": "DockerRequirement", "dockerPull": f"{image_path}"} + res = SingularityCommandLineJob( + builder, {}, CommandLineTool.make_path_mapper, [], [], "" + ).get_image( + req, + pull_image=False, + tmp_outdir_prefix=str(tmp_outdir_prefix), + force_pull=False, + ) + assert req["dockerImageId"] == str(image_path) + assert res + + # test that dockerImageId is set and image exists: + req = {"class": "DockerRequirement", "dockerImageId": f"{image_path}"} + res = SingularityCommandLineJob( + builder, {}, CommandLineTool.make_path_mapper, [], [], "" + ).get_image( + req, + pull_image=False, + tmp_outdir_prefix=str(tmp_outdir_prefix), + force_pull=False, + ) + assert req["dockerImageId"] == str(image_path) + assert res + + def test_docker_tmpdir_prefix(tmp_path: Path) -> None: """Test that DockerCommandLineJob respects temp directory directives.""" (tmp_path / "3").mkdir()