diff --git a/Snakefile b/Snakefile index 3cd39ff8..5ad7aa18 100644 --- a/Snakefile +++ b/Snakefile @@ -262,6 +262,16 @@ def collect_prepared_input(wildcards): return prepared_inputs +# Look up a per-algorithm container image override from config for HTCondor transfer. +# When a local .sif path is configured, it will be included in htcondor_transfer_input_files +# so the HTCondor executor transfers it to the EP alongside the job. +def get_algorithm_image(wildcards): + images = container_settings.images + override = images.get(wildcards.algorithm, "") + if override.endswith('.sif'): + return override + return None + # Run the pathway reconstruction algorithm rule reconstruct: input: collect_prepared_input @@ -271,6 +281,8 @@ rule reconstruct: # same name regardless of the inputs or parameters, and these aren't renamed until after the container command # terminates output: pathway_file = SEP.join([out_dir, '{dataset}-{algorithm}-{params}', 'raw-pathway.txt']) + resources: + htcondor_transfer_input_files=get_algorithm_image run: # Create a copy so that the updates are not written to the parameters logfile params = reconstruction_params(wildcards.algorithm, wildcards.params).copy() diff --git a/config/config.yaml b/config/config.yaml index fc718e3c..ef51de73 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -50,6 +50,30 @@ containers: # requirements = versionGE(split(Target.CondorVersion)[1], "24.8.0") && (isenforcingdiskusage =!= true) enable_profiling: false + # Override the default container image for specific algorithms. + # Keys are algorithm names (as they appear in the algorithms list below). + # Values are interpreted based on the container framework: + # + # Image reference (e.g., "pathlinker:v3"): + # Prepends the registry prefix. Works with both Docker and Apptainer. + # + # Full image reference with registry (e.g., "ghcr.io/myorg/pathlinker:v3"): + # Used as-is (prefix NOT prepended). Works with both Docker and Apptainer. + # + # Local .sif file path (e.g., "images/pathlinker_v2.sif"): + # Apptainer/Singularity only. Skips pulling from registry and uses the + # pre-built .sif directly. When running via HTCondor with shared-fs-usage: none (set + # via the spras_profile config when running SPRAS against HTCondor), .sif paths listed + # here are automatically included in htcondor_transfer_input_files. + # Ignored with a warning if the framework is Docker. + # + # Example (one of each type): + # images: + # omicsintegrator1: "images/omics-integrator-1_v2.sif" # local .sif (Apptainer only) + # pathlinker: "pathlinker:v1234" # image name only (base_url/owner prepended) + # omicsintegrator2: "some-other-owner/oi2:latest" # owner/image (base_url prepended) + # mincostflow: "ghcr.io/reed-compbio/mincostflow:v2" # full registry reference (used as-is) + # This list of algorithms should be generated by a script which checks the filesystem for installs. # It shouldn't be changed by mere mortals. (alternatively, we could add a path to executable for each algorithm # in the list to reduce the number of assumptions of the program at the cost of making the config a little more involved) diff --git a/docs/contributing/index.rst b/docs/contributing/index.rst index f47e935b..44afc1ff 100644 --- a/docs/contributing/index.rst +++ b/docs/contributing/index.rst @@ -285,14 +285,24 @@ U``. Step 4: Make the Local Neighborhood wrapper accessible through SPRAS ********************************************************************** -Import the new class ``LocalNeighborhood`` in ``spras/runner.py`` and -add it to the ``algorithms`` dictionary so the wrapper functions can be -accessed. Add an entry for Local Neighborhood to the configuration file -``config/config.yaml`` and set ``include: true``. As a convention, -algorithm names are written in all lowercase without special characters. -Local Neighborhood has no other parameters. Optionally set ``include: -false`` for the other pathway reconstruction algorithms to make testing -faster. +Register the new algorithm by adding a single entry to the +``ALGORITHM_REGISTRY`` dictionary in ``spras/config/util.py``. The entry +maps the algorithm's lowercase name to a ``(module_path, class_name)`` +tuple that ``runner.py`` will load via ``importlib`` at startup: + +.. code:: python + + ALGORITHM_REGISTRY: dict[str, tuple[str, str]] = { + ... + "localneighborhood": ("spras.local_neighborhood", "LocalNeighborhood"), + } + +As a convention, algorithm names are written in all lowercase without +special characters. The same name is used as the key in the config file +``config/config.yaml``. Add an entry for Local Neighborhood there and +set ``include: true``. Local Neighborhood has no other parameters. +Optionally set ``include: false`` for the other pathway reconstruction +algorithms to make testing faster. The config file has an option ``owner`` under the ``containers.registry`` settings that controls which Docker Hub account @@ -448,8 +458,10 @@ of the main SPRAS repository. ``required_input`` files and the ``generate_inputs``, ``run``, and ``parse_output`` functions -#. Import the new class in ``spras/runner.py`` and add it to the - ``algorithms`` dictionary so the wrapper functions can be accessed +#. Register the new algorithm in ``ALGORITHM_REGISTRY`` in + ``spras/config/util.py`` by adding one entry ``"": + ("spras.", "")``; no changes to + ``spras/runner.py`` are needed #. Document the usage of the Docker wrapper and the assumptions made when implementing the wrapper diff --git a/spras/config/config.py b/spras/config/config.py index 0a4670a4..ebf10faa 100644 --- a/spras/config/config.py +++ b/spras/config/config.py @@ -25,6 +25,7 @@ from spras.config.container_schema import ProcessedContainerSettings from spras.config.revision import attach_spras_revision, spras_revision from spras.config.schema import DatasetSchema, RawConfig +from spras.config.util import AlgorithmName, get_valid_algorithm_names from spras.util import LoosePathLike, NpHashEncoder, hash_params_sha1_base32 config = None @@ -61,6 +62,21 @@ def __init__(self, raw_config: dict[str, Any]): self.hash_length = parsed_raw_config.hash_length # Container settings used by PRMs. self.container_settings = ProcessedContainerSettings.from_container_settings(parsed_raw_config.containers, self.hash_length) + + # Validate that all keys in containers.images are recognized algorithm names + if self.container_settings.images: + valid_names = get_valid_algorithm_names() + for key in self.container_settings.images: + try: + AlgorithmName(key) + except ValueError as err: + raise ValueError( + f"Unknown algorithm name '{key}' in configured containers.images. Is there a typo? " + f"Valid algorithm names are: {sorted(valid_names)}" + ) from err + + # The list of algorithms to run in the workflow. Each is a dict with 'name' as an expected key. + self.algorithms = None # A nested dict mapping algorithm names to dicts that map parameter hashes to parameter combinations. # Only includes algorithms that are set to be run with 'include: true'. self.algorithm_params: dict[str, dict[str, Any]] = dict() diff --git a/spras/config/container_schema.py b/spras/config/container_schema.py index c4f29310..70276f63 100644 --- a/spras/config/container_schema.py +++ b/spras/config/container_schema.py @@ -7,7 +7,8 @@ """ import warnings -from dataclasses import dataclass +from dataclasses import dataclass, field +from typing import Optional from pydantic import BaseModel, ConfigDict @@ -21,6 +22,11 @@ class ContainerFramework(CaseInsensitiveEnum): apptainer = 'apptainer' dsub = 'dsub' + @property + def is_singularity_family(self) -> bool: + """True for both 'singularity' and 'apptainer', which are treated as synonyms.""" + return self in (ContainerFramework.singularity, ContainerFramework.apptainer) + class ContainerRegistry(BaseModel): base_url: str = "docker.io" "The domain of the registry" @@ -34,17 +40,20 @@ class ContainerSettings(BaseModel): framework: ContainerFramework = ContainerFramework.docker unpack_singularity: bool = False - model_config = ConfigDict(extra='forbid') enable_profiling: bool = False "A Boolean indicating whether to enable container runtime profiling (apptainer/singularity only)" registry: ContainerRegistry + images: dict[str, str] = {} + "Per-algorithm container image overrides. Keys are algorithm names; values are image references or local .sif file paths." + model_config = ConfigDict(extra='forbid', use_attribute_docstrings=True) @dataclass class ProcessedContainerSettings: framework: ContainerFramework = ContainerFramework.docker unpack_singularity: bool = False + base_url: str = "docker.io" prefix: str = DEFAULT_CONTAINER_PREFIX enable_profiling: bool = False hash_length: int = 7 @@ -57,6 +66,10 @@ class ProcessedContainerSettings: We prefer this `hash_length` in our container-running logic to avoid a (future) dependency diamond. """ + images: dict[str, str] = field(default_factory=dict) + """Per-algorithm container image overrides from config.""" + image_override: Optional[str] = None + """Resolved image override for the current algorithm. Set at runtime by runner.run().""" @staticmethod def from_container_settings(settings: ContainerSettings, hash_length: int) -> "ProcessedContainerSettings": @@ -65,18 +78,23 @@ def from_container_settings(settings: ContainerSettings, hash_length: int) -> "P container_framework = settings.framework # Unpack settings for running in singularity mode. Needed when running PRM containers if already in a container. - if settings.unpack_singularity and container_framework != "singularity": - warnings.warn("unpack_singularity is set to True, but the container framework is not singularity. This setting will have no effect.", stacklevel=2) + if settings.unpack_singularity and not container_framework.is_singularity_family: + warnings.warn("unpack_singularity is set to True, but the container framework is not singularity or apptainer. This setting will have no effect.", stacklevel=2) unpack_singularity = settings.unpack_singularity # Grab registry from the config, and if none is provided default to docker + container_base_url = "docker.io" container_prefix = DEFAULT_CONTAINER_PREFIX + if settings.registry and settings.registry.base_url != "": + container_base_url = settings.registry.base_url if settings.registry and settings.registry.base_url != "" and settings.registry.owner != "": container_prefix = settings.registry.base_url + "/" + settings.registry.owner return ProcessedContainerSettings( framework=container_framework, unpack_singularity=unpack_singularity, + base_url=container_base_url, prefix=container_prefix, - hash_length=hash_length + hash_length=hash_length, + images=dict(settings.images), ) diff --git a/spras/config/util.py b/spras/config/util.py index 1a7e5e05..bffe7aae 100644 --- a/spras/config/util.py +++ b/spras/config/util.py @@ -48,6 +48,33 @@ def _missing_(cls, value: Any): yaml.representer.SafeRepresenter.represent_str, ) +# The single source of truth for all supported algorithms. +# Keys are algorithm names; values are (module_path, class_name) tuples for importlib loading. +# To register a new algorithm, add an entry here. +ALGORITHM_REGISTRY: dict[str, tuple[str, str]] = { + "allpairs": ("spras.allpairs", "AllPairs"), + "bowtiebuilder": ("spras.btb", "BowTieBuilder"), + "diamond": ("spras.diamond", "DIAMOnD"), + "domino": ("spras.domino", "DOMINO"), + "meo": ("spras.meo", "MEO"), + "mincostflow": ("spras.mincostflow", "MinCostFlow"), + "omicsintegrator1": ("spras.omicsintegrator1", "OmicsIntegrator1"), + "omicsintegrator2": ("spras.omicsintegrator2", "OmicsIntegrator2"), + "pathlinker": ("spras.pathlinker", "PathLinker"), + "responsenet": ("spras.responsenet", "ResponseNet"), + "rwr": ("spras.rwr", "RWR"), + "strwr": ("spras.strwr", "ST_RWR"), +} + +# Auto-generated enum from the registry keys. Inherits CaseInsensitiveEnum so +# AlgorithmName("PathLinker") resolves to AlgorithmName.pathlinker. +AlgorithmName = CaseInsensitiveEnum("AlgorithmName", {k: k for k in ALGORITHM_REGISTRY}) + +def get_valid_algorithm_names() -> set[str]: + """Return the set of valid algorithm name strings (lowercase).""" + return {member.value for member in AlgorithmName} + + class Empty(BaseModel): """ The empty base model. Used for specifying that an algorithm takes no parameters, diff --git a/spras/containers.py b/spras/containers.py index 124b9741..c30697f3 100644 --- a/spras/containers.py +++ b/spras/containers.py @@ -3,13 +3,15 @@ import re import subprocess import textwrap +import warnings +from dataclasses import dataclass from pathlib import Path, PurePath, PurePosixPath from typing import Iterator, List, Optional, Tuple, Union import docker import docker.errors -from spras.config.container_schema import ProcessedContainerSettings +from spras.config.container_schema import ContainerFramework, ProcessedContainerSettings from spras.logging import indent from spras.profiling import create_apptainer_container_stats, create_peer_cgroup from spras.util import hash_filename @@ -173,6 +175,95 @@ def env_to_items(environment: dict[str, str]) -> Iterator[str]: # TODO: we should also handle special escaping here. return (f"{key}={value}" for key, value in environment.items()) + +@dataclass(frozen=True) +class ResolvedImage: + """Result of resolve_container_image(). + + Carries the resolved image reference and whether it points to a local .sif file, + so downstream code never needs to re-inspect image_override. + """ + image: str # registry URI (e.g. 'docker.io/reedcompbio/pathlinker:v2') or local .sif path + is_local_sif: bool # True when image is a local .sif file + + +def resolve_container_image(container_suffix: str, container_settings: ProcessedContainerSettings) -> ResolvedImage: + """ + Resolve the container image from the algorithm's default suffix and any + per-algorithm image override in container_settings. + + This is the single place where ``container_settings.image_override`` is + inspected. All downstream code (docker runner, singularity runner, dsub + runner) receives the ``ResolvedImage`` or its ``.image`` string and never + re-reads the override. + + Warnings are emitted for suspicious override patterns (excessive path depth, + bare hostnames) and for .sif overrides on non-singularity frameworks. + + @param container_suffix: algorithm's default container name (e.g. 'pathlinker:v2') + @param container_settings: the processed container settings (may include image_override) + @return: a ResolvedImage with the image string and whether it's a local .sif + """ + image_override = container_settings.image_override + + # Default: combine registry prefix with the algorithm's container suffix + container = container_settings.prefix + "/" + container_suffix + + if image_override and image_override.endswith('.sif'): + # .sif overrides are only meaningful for apptainer/singularity. + if not container_settings.framework.is_singularity_family: + warnings.warn( + f"Image override '{image_override}' is a .sif file, but the container framework is " + f"'{container_settings.framework}'. .sif overrides are only supported with " + f"apptainer/singularity. Falling back to default image.", + stacklevel=2 + ) + return ResolvedImage(image=container, is_local_sif=False) + print(f'Container image override (local .sif): {image_override}', flush=True) + return ResolvedImage(image=image_override, is_local_sif=True) + elif image_override: + # Image reference override — determine how much of the URI is specified. + # Uses Docker's convention: if the first path component contains '.' or ':', + # it's a registry hostname and the reference is fully qualified. + if '/' in image_override: + first_component = image_override.split('/')[0] + if '.' in first_component or ':' in first_component: + # Full registry reference like "ghcr.io/myorg/image:tag" — use as-is + container = image_override + else: + # Owner/image like "some-other-owner/oi2:latest" — prepend base_url only + container = container_settings.base_url + "/" + image_override + else: + # Image name only like "pathlinker:v1234" — prepend full prefix (base_url/owner) + container = container_settings.prefix + "/" + image_override + + # Warn about suspicious override patterns that may indicate a typo or misconfiguration. + # We still pass them through since they may be valid in unusual registries. + parts = container.split('/') + if len(parts) > 3: + warnings.warn( + f"Container image override '{image_override}' resolves to '{container}' " + f"which has {len(parts)} path components. Standard container references " + f"have at most 3 (registry/owner/image). This may be a misconfiguration. " + f"Attempting to use '{container}' as-is.", + stacklevel=2 + ) + elif '/' not in image_override and ':' not in image_override and '.' in image_override: + warnings.warn( + f"Container image override '{image_override}' looks like a hostname " + f"without an image name or tag. Did you mean to include an image name " + f"(e.g., '{image_override}/image:tag')? " + f"Attempting to use '{container}' as-is.", + stacklevel=2 + ) + + print(f'Container image override: {container}', flush=True) + else: + print(f'Container image: {container}', flush=True) + + return ResolvedImage(image=container, is_local_sif=False) + + # TODO consider a better default environment variable # Follow docker-py's naming conventions (https://docker-py.readthedocs.io/en/stable/containers.html) # Technically the argument is an image, not a container, but we use container here. @@ -189,15 +280,14 @@ def run_container(container_suffix: str, command: List[str], volumes: List[Tuple @param network_disabled: Disables the network on the container. Only works for docker for now. This acts as a 'runtime assertion' that a container works w/o networking. @return: output from Singularity execute or Docker run """ - normalized_framework = container_settings.framework.casefold() - - container = container_settings.prefix + "/" + container_suffix - if normalized_framework == 'docker': - return run_container_docker(container, command, volumes, working_dir, environment, network_disabled) - elif normalized_framework == 'singularity' or normalized_framework == "apptainer": - return run_container_singularity(container, command, volumes, working_dir, out_dir, container_settings, environment) - elif normalized_framework == 'dsub': - return run_container_dsub(container, command, volumes, working_dir, environment) + resolved = resolve_container_image(container_suffix, container_settings) + + if container_settings.framework == ContainerFramework.docker: + return run_container_docker(resolved.image, command, volumes, working_dir, environment, network_disabled) + elif container_settings.framework.is_singularity_family: + return run_container_singularity(resolved, command, volumes, working_dir, out_dir, container_settings, environment) + elif container_settings.framework == ContainerFramework.dsub: + return run_container_dsub(resolved.image, command, volumes, working_dir, environment) else: raise ValueError(f'{container_settings.framework} is not a recognized container framework. Choose "docker", "dsub", "apptainer", or "singularity".') @@ -345,11 +435,70 @@ def run_container_docker(container: str, command: List[str], volumes: List[Tuple return out -def run_container_singularity(container: str, command: List[str], volumes: List[Tuple[PurePath, PurePath]], working_dir: str, out_dir: str, config: ProcessedContainerSettings, environment: Optional[dict[str, str]] = None): +def _prepare_singularity_image(resolved: ResolvedImage, config: ProcessedContainerSettings): + """ + Prepare the image that apptainer/singularity should run. + + Handles sandbox unpacking and Docker URI prefixing. Does **not** inspect + ``config.image_override`` — all override logic is in ``resolve_container_image()``. + + Returns a path or URI suitable for Client.execute() or the profiling command. + The four cases are: + 1. unpack + local .sif --> unpack the .sif into a sandbox, return sandbox path + 2. unpack + registry --> pull .sif from registry, unpack into sandbox, return sandbox path + 3. local .sif, no unpack --> return the .sif path directly + 4. registry, no unpack --> return "docker://" so apptainer pulls at runtime + """ + from spython.main import Client + + if config.unpack_singularity: + unpacked_dir = Path("unpacked") + unpacked_dir.mkdir(exist_ok=True) + + if resolved.is_local_sif: + # Use pre-built .sif directly, skip pulling from registry + image_path = resolved.image + base_cont = Path(resolved.image).stem + else: + # The incoming image string is of the format //: e.g. + # hub.docker.com/reedcompbio/spras:latest + # Here we first produce a .sif image using the image name and tag (base_cont) + # and then expand that image into a sandbox directory. For example, + # hub.docker.com/reedcompbio/spras:latest --> spras_latest.sif --> ./spras_latest/ + path_elements = resolved.image.split("/") + base_cont = path_elements[-1] + base_cont = base_cont.replace(":", "_") + sif_file = base_cont + ".sif" + + # Adding 'docker://' to the container indicates this is a Docker image Singularity must convert + image_path = Client.pull('docker://' + resolved.image, name=str(unpacked_dir / sif_file)) + + base_cont_path = unpacked_dir / Path(base_cont) + + # Check whether the directory for base_cont_path already exists. When running concurrent jobs, it's possible + # something else has already pulled/unpacked the container. + # Here, we expand the sif image from `image_path` to a directory indicated by `base_cont_path` + if not base_cont_path.exists(): + Client.build(recipe=image_path, image=str(base_cont_path), sandbox=True, sudo=False) + print(f'Resolved singularity image to sandbox: {base_cont_path}', flush=True) + return base_cont_path # sandbox directory + + if resolved.is_local_sif: + # Local .sif without unpacking — use directly + print(f'Resolved singularity image to local .sif: {resolved.image}', flush=True) + return resolved.image + + # No override, no unpacking — apptainer pulls and converts the Docker image at runtime + docker_uri = "docker://" + resolved.image + print(f'Resolved singularity image: {docker_uri}', flush=True) + return docker_uri + + +def run_container_singularity(resolved: ResolvedImage, command: List[str], volumes: List[Tuple[PurePath, PurePath]], working_dir: str, out_dir: str, config: ProcessedContainerSettings, environment: Optional[dict[str, str]] = None): """ Runs a command in the container using Singularity. Only available on Linux. - @param container: name of the DockerHub container without the 'docker://' prefix + @param resolved: the resolved container image (registry URI or local .sif path) @param command: command to run in the container @param volumes: a list of volumes to mount where each item is a (source, destination) tuple @param working_dir: the working directory in the container @@ -384,39 +533,8 @@ def run_container_singularity(container: str, command: List[str], volumes: List[ # https://docs.sylabs.io/guides/3.7/user-guide/environment_and_metadata.html#env-option singularity_options.extend(['--env', ",".join(env_to_items(environment))]) - # Handle unpacking singularity image if needed. Potentially needed for running nested unprivileged containers - expanded_image = None - if config.unpack_singularity: - # The incoming image string is of the format //: e.g. - # hub.docker.com/reedcompbio/spras:latest - # Here we first produce a .sif image using the image name and tag (base_cont) - # and then expand that image into a sandbox directory. For example, - # hub.docker.com/reedcompbio/spras:latest --> spras_latest.sif --> ./spras_latest/ - path_elements = container.split("/") - base_cont = path_elements[-1] - base_cont = base_cont.replace(":", "_").split(":")[0] - sif_file = base_cont + ".sif" - - # To allow caching unpacked singularity images without polluting git on local runs, - # we move all of the unpacked image files into a `.gitignore`d folder. - unpacked_dir = Path("unpacked") - unpacked_dir.mkdir(exist_ok=True) - - # Adding 'docker://' to the container indicates this is a Docker image Singularity must convert - image_path = Client.pull('docker://' + container, name=str(unpacked_dir / sif_file)) - - base_cont_path = unpacked_dir / Path(base_cont) - - # Check whether the directory for base_cont_path already exists. When running concurrent jobs, it's possible - # something else has already pulled/unpacked the container. - # Here, we expand the sif image from `image_path` to a directory indicated by `base_cont_path` - if not base_cont_path.exists(): - Client.build(recipe=image_path, image=str(base_cont_path), sandbox=True, sudo=False) - expanded_image = base_cont_path # This is the sandbox directory + image_to_run = _prepare_singularity_image(resolved, config) - # If not using the expanded sandbox image, we still need to prepend the docker:// prefix - # so apptainer knows to pull and convert the image format from docker to apptainer. - image_to_run = expanded_image if expanded_image else "docker://" + container if config.enable_profiling: # We won't end up using the spython client if profiling is enabled because # we need to run everything manually to set up the cgroup diff --git a/spras/runner.py b/spras/runner.py index fb9391f9..cd965daa 100644 --- a/spras/runner.py +++ b/spras/runner.py @@ -1,41 +1,32 @@ +import copy +import importlib from typing import Any, Mapping -# supported algorithm imports -from spras.allpairs import AllPairs -from spras.btb import BowTieBuilder -from spras.dataset import Dataset, DatasetSchema -from spras.diamond import DIAMOnD -from spras.domino import DOMINO -from spras.meo import MEO -from spras.mincostflow import MinCostFlow -from spras.omicsintegrator1 import OmicsIntegrator1 -from spras.omicsintegrator2 import OmicsIntegrator2 -from spras.pathlinker import PathLinker +from spras.config.dataset import DatasetSchema +from spras.config.util import ALGORITHM_REGISTRY, AlgorithmName +from spras.dataset import Dataset from spras.prm import PRM -from spras.responsenet import ResponseNet -from spras.rwr import RWR -from spras.strwr import ST_RWR from spras.util import LoosePathLike -algorithms: dict[str, type[PRM]] = { - "allpairs": AllPairs, - "bowtiebuilder": BowTieBuilder, - "diamond": DIAMOnD, - "domino": DOMINO, - "meo": MEO, - "mincostflow": MinCostFlow, - "omicsintegrator1": OmicsIntegrator1, - "omicsintegrator2": OmicsIntegrator2, - "pathlinker": PathLinker, - "responsenet": ResponseNet, - "rwr": RWR, - "strwr": ST_RWR, -} + +def _load_algorithms() -> dict[str, type[PRM]]: + """Load all algorithm classes from ALGORITHM_REGISTRY via importlib.""" + result = {} + for name, (module_path, class_name) in ALGORITHM_REGISTRY.items(): + mod = importlib.import_module(module_path) + result[name] = getattr(mod, class_name) + return result + + +# Eagerly load all algorithm classes once at import time so every call to +# get_algorithm() is a cheap dict lookup rather than a repeated importlib call. +algorithms = _load_algorithms() def get_algorithm(algorithm: str) -> type[PRM]: try: - return algorithms[algorithm.lower()] - except KeyError as exc: + algo_enum = AlgorithmName(algorithm) + return algorithms[algo_enum.value] + except (ValueError, KeyError) as exc: raise NotImplementedError(f'{algorithm} is not currently supported.') from exc def run(algorithm: str, inputs, output_file, args, container_settings): @@ -43,9 +34,13 @@ def run(algorithm: str, inputs, output_file, args, container_settings): A generic interface to the algorithm-specific run functions """ algorithm_runner = get_algorithm(algorithm) + # Resolve per-algorithm image override so containers.py can use it + settings = copy.copy(container_settings) + if settings.images and algorithm in settings.images: + settings.image_override = settings.images[algorithm] # We can't use config.config here else we would get a cyclic dependency. # Since args is a dict here, we use the 'run_typeless' utility PRM function. - algorithm_runner.run_typeless(inputs, output_file, args, container_settings) + algorithm_runner.run_typeless(inputs, output_file, args, settings) def get_required_inputs(algorithm: str): diff --git a/test/test_config.py b/test/test_config.py index 41551c38..cf4cdf0f 100644 --- a/test/test_config.py +++ b/test/test_config.py @@ -218,6 +218,39 @@ def test_config_container_registry(self): config.init_global(test_config) assert (config.config.container_settings.prefix == DEFAULT_CONTAINER_PREFIX) + def test_config_container_images(self): + test_config = get_test_config() + + # Default: no images key --> empty dict, image_override is None + config.init_global(test_config) + assert config.config.container_settings.images == {} + assert config.config.container_settings.image_override is None + + # Local .sif paths are stored as-is and signal HTCondor transfer via get_algorithm_image + test_config["containers"]["images"] = { + "pathlinker": "images/foo.sif", + "omicsintegrator1": "images/bar.sif", + } + config.init_global(test_config) + assert config.config.container_settings.images == { + "pathlinker": "images/foo.sif", + "omicsintegrator1": "images/bar.sif", + } + # Unconfigured algorithm → no .sif override, get_algorithm_image returns None + assert not config.config.container_settings.images.get("responsenet", "").endswith(".sif") + + # Non .sif image reference override (e.g. pinning a different tag) is stored as-is + test_config["containers"]["images"] = {"pathlinker": "pathlinker:v1234"} + config.init_global(test_config) + images = config.config.container_settings.images + assert images.get("pathlinker") == "pathlinker:v1234" + + def test_config_container_images_invalid_algorithm(self): + test_config = get_test_config() + test_config["containers"]["images"] = {"typo_algo": "some:image"} + with pytest.raises(ValueError, match="Unknown algorithm name 'typo_algo'"): + config.init_global(test_config) + def test_error_dataset_label(self): test_config = get_test_config() error_test_dicts = [{"label": "test$"}, {"label": "@test'"}, {"label": "[test]"}, {"label": "test-test"}, diff --git a/test/test_container_image_resolution.py b/test/test_container_image_resolution.py new file mode 100644 index 00000000..08b9c499 --- /dev/null +++ b/test/test_container_image_resolution.py @@ -0,0 +1,395 @@ +""" +Tests for the image resolution logic in containers.run_container(), +containers.resolve_container_image(), and containers._prepare_singularity_image(). +We mock the framework-specific runners so no Docker/Apptainer install is needed. +""" +import platform +import warnings +from pathlib import Path +from typing import NamedTuple, Optional +from unittest.mock import patch + +import pytest + +from spras.config.container_schema import ContainerFramework, ProcessedContainerSettings +from spras.containers import ResolvedImage, resolve_container_image + + +def make_settings(**overrides): + """Create a ProcessedContainerSettings with sensible defaults, then apply overrides. + + Accepts either ContainerFramework enum values or plain strings for 'framework'; + strings are resolved to their enum member so the property helpers work. + """ + defaults = dict( + framework=ContainerFramework.docker, + unpack_singularity=False, + base_url="docker.io", + prefix="docker.io/reedcompbio", + hash_length=7, + ) + defaults.update(overrides) + if isinstance(defaults["framework"], str): + defaults["framework"] = ContainerFramework(defaults["framework"]) + return ProcessedContainerSettings(**defaults) + + +CONTAINER_SUFFIX = "pathlinker:v2" +DUMMY_COMMAND = ["echo", "test"] +DUMMY_VOLUMES = [] +DUMMY_WORKDIR = "/workdir" +DUMMY_OUTDIR = "/output" + + +class TestRunContainerImageResolution: + """Test the 5 branches of image resolution in run_container(). + + Strategy: @patch replaces the real framework runner (run_container_docker / + run_container_singularity) with a MagicMock for the duration of the test. + The mock is injected as the last parameter of the test method (e.g. mock_docker). + After calling run_container(), we inspect mock.call_args[0][0] to read the + 'container' string that run_container() resolved and passed to the runner -- + this is the value we want to assert on, without needing Docker or Apptainer. + """ + + @patch("spras.containers.run_container_docker", return_value="ok") + def test_no_override_uses_default(self, mock_docker): + """No image_override combines prefix/suffix using defaults.""" + settings = make_settings(framework="docker") + from spras.containers import run_container + + run_container(CONTAINER_SUFFIX, DUMMY_COMMAND, DUMMY_VOLUMES, DUMMY_WORKDIR, DUMMY_OUTDIR, settings) + # call_args[0] is the positional args tuple; [0] is the first arg (the container image string) + container_arg = mock_docker.call_args[0][0] + assert container_arg == "docker.io/reedcompbio/pathlinker:v2" + + @patch("spras.containers.run_container_docker", return_value="ok") + def test_suffix_override_prepends_prefix(self, mock_docker): + """Suffix-style override like 'pathlinker:v1234' --> prefix/override.""" + settings = make_settings(framework="docker", image_override="pathlinker:v1234") + from spras.containers import run_container + + run_container(CONTAINER_SUFFIX, DUMMY_COMMAND, DUMMY_VOLUMES, DUMMY_WORKDIR, DUMMY_OUTDIR, settings) + container_arg = mock_docker.call_args[0][0] + assert container_arg == "docker.io/reedcompbio/pathlinker:v1234" + + @patch("spras.containers.run_container_docker", return_value="ok") + def test_full_registry_override_used_as_is(self, mock_docker): + """Full registry ref like 'ghcr.io/myorg/image:tag' --> used verbatim.""" + settings = make_settings(framework="docker", image_override="ghcr.io/myorg/pathlinker:v1234") + from spras.containers import run_container + + run_container(CONTAINER_SUFFIX, DUMMY_COMMAND, DUMMY_VOLUMES, DUMMY_WORKDIR, DUMMY_OUTDIR, settings) + container_arg = mock_docker.call_args[0][0] + assert container_arg == "ghcr.io/myorg/pathlinker:v1234" + + @patch("spras.containers.run_container_docker", return_value="ok") + def test_owner_image_override_prepends_base_url(self, mock_docker): + """Owner/image override like 'someowner/oi2:latest' --> prepend base_url only.""" + settings = make_settings(framework="docker", image_override="someowner/oi2:latest") + from spras.containers import run_container + + run_container(CONTAINER_SUFFIX, DUMMY_COMMAND, DUMMY_VOLUMES, DUMMY_WORKDIR, DUMMY_OUTDIR, settings) + container_arg = mock_docker.call_args[0][0] + assert container_arg == "docker.io/someowner/oi2:latest" + + @patch("spras.containers.run_container_docker", return_value="ok") + def test_sif_override_with_docker_warns_and_falls_back(self, mock_docker): + """.sif override + docker framework --> warning + fallback to default image.""" + settings = make_settings(framework="docker", image_override="images/pathlinker_v2.sif") + from spras.containers import run_container + + with pytest.warns(UserWarning, match=r"\.sif file.*apptainer/singularity"): + run_container(CONTAINER_SUFFIX, DUMMY_COMMAND, DUMMY_VOLUMES, DUMMY_WORKDIR, DUMMY_OUTDIR, settings) + container_arg = mock_docker.call_args[0][0] + assert container_arg == "docker.io/reedcompbio/pathlinker:v2" + + @patch("spras.containers.run_container_singularity", return_value="ok") + def test_sif_override_with_singularity_passes_sif(self, mock_singularity): + """.sif override + singularity --> first arg is a ResolvedImage with the .sif path.""" + settings = make_settings(framework="singularity", image_override="images/pathlinker_v1234.sif") + from spras.containers import run_container + + run_container(CONTAINER_SUFFIX, DUMMY_COMMAND, DUMMY_VOLUMES, DUMMY_WORKDIR, DUMMY_OUTDIR, settings) + resolved_arg = mock_singularity.call_args[0][0] + assert isinstance(resolved_arg, ResolvedImage) + assert resolved_arg.image == "images/pathlinker_v1234.sif" + assert resolved_arg.is_local_sif is True + + +class _OverrideCase(NamedTuple): + id: str + settings_kw: dict + expected_image: str + expected_is_sif: bool + warn_match: Optional[str] # None means no warning should be raised + + +_OVERRIDE_CASES = [ + _OverrideCase( + id="no_override_uses_default", + settings_kw=dict(framework="docker"), + expected_image="docker.io/reedcompbio/pathlinker:v2", + expected_is_sif=False, + warn_match=None), + _OverrideCase( + id="suffix_override_prepends_prefix", + settings_kw=dict(framework="docker", image_override="pathlinker:v1234"), + expected_image="docker.io/reedcompbio/pathlinker:v1234", + expected_is_sif=False, + warn_match=None), + _OverrideCase( + id="full_registry_override_used_as_is", + settings_kw=dict(framework="docker", image_override="ghcr.io/myorg/pathlinker:v1234"), + expected_image="ghcr.io/myorg/pathlinker:v1234", + expected_is_sif=False, + warn_match=None), + _OverrideCase( + id="owner_image_override_prepends_base_url", + settings_kw=dict(framework="docker", image_override="someowner/oi2:latest"), + expected_image="docker.io/someowner/oi2:latest", + expected_is_sif=False, + warn_match=None), + _OverrideCase( + id="sif_override_with_docker_warns_and_returns_default", + settings_kw=dict(framework="docker", image_override="images/pathlinker_v2.sif"), + expected_image="docker.io/reedcompbio/pathlinker:v2", + expected_is_sif=False, + warn_match=r"\.sif file.*apptainer/singularity"), + _OverrideCase( + id="sif_override_with_singularity_returns_sif", + settings_kw=dict(framework="singularity", image_override="images/pathlinker_v1234.sif"), + expected_image="images/pathlinker_v1234.sif", + expected_is_sif=True, + warn_match=None), + _OverrideCase( + id="excessive_path_depth_warns", + settings_kw=dict(framework="docker", image_override="a/b/c/d/e/f"), + expected_image="docker.io/a/b/c/d/e/f", + expected_is_sif=False, + warn_match="path components"), + _OverrideCase( + id="bare_hostname_warns", + settings_kw=dict(framework="docker", image_override="hello.world"), + expected_image="docker.io/reedcompbio/hello.world", + expected_is_sif=False, + warn_match="looks like a hostname"), + _OverrideCase( + id="valid_override_no_warning", + settings_kw=dict(framework="docker", image_override="myimage:v2"), + expected_image="docker.io/reedcompbio/myimage:v2", + expected_is_sif=False, + warn_match=None), + _OverrideCase( + id="custom_registry_prefix", + settings_kw=dict(framework="docker", base_url="ghcr.io", prefix="ghcr.io/myorg"), + expected_image="ghcr.io/myorg/pathlinker:v2", + expected_is_sif=False, + warn_match=None), + _OverrideCase( + id="owner_image_with_custom_base_url", + settings_kw=dict(framework="docker", base_url="ghcr.io", image_override="otherowner/img:v1"), + expected_image="ghcr.io/otherowner/img:v1", + expected_is_sif=False, + warn_match=None), +] + + +class TestResolveContainerImage: + """Direct unit tests for resolve_container_image() + """ + + @pytest.mark.parametrize( + "settings_kw, expected_image, expected_is_sif, warn_match", + [(c.settings_kw, c.expected_image, c.expected_is_sif, c.warn_match) for c in _OVERRIDE_CASES], + ids=[c.id for c in _OVERRIDE_CASES], + ) + def test_resolve(self, settings_kw, expected_image, expected_is_sif, warn_match): + settings = make_settings(**settings_kw) + if warn_match: + with pytest.warns(UserWarning, match=warn_match): + result = resolve_container_image(CONTAINER_SUFFIX, settings) + else: + with warnings.catch_warnings(): + warnings.simplefilter("error") + result = resolve_container_image(CONTAINER_SUFFIX, settings) + assert result.image == expected_image + assert result.is_local_sif == expected_is_sif + + +@pytest.mark.skipif(platform.system() != 'Linux', reason="_prepare_singularity_image imports spython which requires Linux") +class TestPrepareSingularityImage: + """Test _prepare_singularity_image(), the helper that prepares the image + that apptainer/singularity should run. + + Cases 3 and 4 (unpack_singularity=False) need no mocks at all. + Cases 1 and 2 (unpack_singularity=True) mock spython.main.Client so no + real apptainer installation is required. + + Case numbering matches _prepare_singularity_image's docstring: + 1. unpack + local .sif + 2. unpack + registry + 3. local .sif, no unpack + 4. registry, no unpack + """ + + @patch("spython.main.Client") + def test_local_sif_with_unpack_builds_sandbox(self, mock_client, tmp_path, monkeypatch): + """Case 1: local .sif + unpack --> skips pull, calls Client.build, returns sandbox path.""" + # Run in tmp_path so the 'unpacked' directory is created there + monkeypatch.chdir(tmp_path) + + settings = make_settings(framework="singularity", unpack_singularity=True) + from spras.containers import _prepare_singularity_image + + resolved = ResolvedImage(image="images/pathlinker_v2.sif", is_local_sif=True) + result = _prepare_singularity_image(resolved, settings) + + # Should NOT have called Client.pull (the .sif is already local) + mock_client.pull.assert_not_called() + # Should have called Client.build to unpack the .sif into a sandbox + mock_client.build.assert_called_once() + build_kwargs = mock_client.build.call_args + assert build_kwargs[1]["recipe"] == "images/pathlinker_v2.sif" + assert build_kwargs[1]["sandbox"] is True + # Return value is the sandbox directory: unpacked/ + assert result == Path("unpacked") / "pathlinker_v2" + + @patch("spython.main.Client") + def test_registry_with_unpack_pulls_and_builds(self, mock_client, tmp_path, monkeypatch): + """Case 2: registry + unpack --> calls Client.pull then Client.build, returns sandbox path.""" + monkeypatch.chdir(tmp_path) + mock_client.pull.return_value = str(tmp_path / "unpacked" / "pathlinker_v2.sif") + + settings = make_settings(framework="singularity", unpack_singularity=True) + from spras.containers import _prepare_singularity_image + + resolved = ResolvedImage(image="docker.io/reedcompbio/pathlinker:v2", is_local_sif=False) + result = _prepare_singularity_image(resolved, settings) + + # Should have pulled the docker image + mock_client.pull.assert_called_once() + pull_args = mock_client.pull.call_args + assert pull_args[0][0] == "docker://docker.io/reedcompbio/pathlinker:v2" + # Should have unpacked into sandbox + mock_client.build.assert_called_once() + assert mock_client.build.call_args[1]["sandbox"] is True + # Return value: unpacked/ with colon replaced by underscore + assert result == Path("unpacked") / "pathlinker_v2" + + def test_local_sif_no_unpack_returns_sif_path(self): + """Case 3: local .sif, no unpack --> returns the .sif path directly.""" + settings = make_settings(framework="singularity", unpack_singularity=False) + from spras.containers import _prepare_singularity_image + + resolved = ResolvedImage(image="images/pathlinker_v2.sif", is_local_sif=True) + result = _prepare_singularity_image(resolved, settings) + assert result == "images/pathlinker_v2.sif" + + def test_no_override_no_unpack_returns_docker_uri(self): + """Case 4: registry image, no unpack --> 'docker://'.""" + settings = make_settings(framework="singularity", unpack_singularity=False) + from spras.containers import _prepare_singularity_image + + resolved = ResolvedImage(image="docker.io/reedcompbio/pathlinker:v2", is_local_sif=False) + result = _prepare_singularity_image(resolved, settings) + assert result == "docker://docker.io/reedcompbio/pathlinker:v2" + + @patch("spython.main.Client") + def test_unpack_skips_build_if_sandbox_exists(self, mock_client, tmp_path, monkeypatch): + """If the sandbox directory already exists (e.g. from a concurrent job), skip Client.build.""" + monkeypatch.chdir(tmp_path) + # Pre-create the sandbox directory + (tmp_path / "unpacked" / "pathlinker_v2").mkdir(parents=True) + + settings = make_settings(framework="singularity", unpack_singularity=True) + from spras.containers import _prepare_singularity_image + + resolved = ResolvedImage(image="images/pathlinker_v2.sif", is_local_sif=True) + result = _prepare_singularity_image(resolved, settings) + + mock_client.pull.assert_not_called() + mock_client.build.assert_not_called() + assert result == Path("unpacked") / "pathlinker_v2" + + +@pytest.mark.skipif(platform.system() != 'Linux', reason="run_container_singularity is Linux-only") +class TestSifOverrideReachesClientExecute: + """Integration test: verify that a .sif override set on container_settings + flows all the way from run_container() through run_container_singularity() + and _prepare_singularity_image() down to Client.execute(image=). + + Only spython.main.Client is mocked -- all intermediate layers run for real. + Skipped on non-Linux because run_container_singularity raises NotImplementedError there. + """ + + @patch("spython.main.Client") + def test_sif_override_reaches_client_execute(self, mock_client): + from spras.containers import run_container + + settings = make_settings( + framework="singularity", + unpack_singularity=False, + image_override="images/pathlinker_v2.sif", + ) + + run_container( + CONTAINER_SUFFIX, DUMMY_COMMAND, DUMMY_VOLUMES, + DUMMY_WORKDIR, DUMMY_OUTDIR, settings, + ) + + mock_client.execute.assert_called_once() + assert mock_client.execute.call_args[1]["image"] == "images/pathlinker_v2.sif" + + +class TestEndToEndOverride: + """Test that image overrides set via containers.images in the config actually + flow through runner.run() --> algorithm.run() with the correct image_override + on the container_settings object. + + We mock PathLinker.run (the algorithm entry point) to avoid file I/O and + container execution, and inspect the container_settings it receives. + """ + + @patch("spras.pathlinker.PathLinker.run") + def test_sif_override_flows_through_runner(self, mock_run): + """runner.run() should set image_override from settings.images before calling the algorithm.""" + from spras.runner import run as runner_run + + settings = make_settings( + framework="singularity", + images={"pathlinker": "my_algorithm.sif"}, + ) + + runner_run( + "pathlinker", + {"nodetypes": "dummy_nodes.txt", "network": "dummy_edges.txt"}, + "output.txt", + {"k": 5}, + settings, + ) + + mock_run.assert_called_once() + # container_settings is the 4th positional arg: run(inputs, output_file, args, container_settings) + call_settings = mock_run.call_args[0][3] + assert call_settings.image_override == "my_algorithm.sif" + + @patch("spras.pathlinker.PathLinker.run") + def test_no_override_leaves_image_override_none(self, mock_run): + """When no image override is configured, image_override should remain None.""" + from spras.runner import run as runner_run + + settings = make_settings(framework="docker") + + runner_run( + "pathlinker", + {"nodetypes": "dummy_nodes.txt", "network": "dummy_edges.txt"}, + "output.txt", + {"k": 5}, + settings, + ) + + mock_run.assert_called_once() + # container_settings is the 4th positional arg: run(inputs, output_file, args, container_settings) + call_settings = mock_run.call_args[0][3] + assert call_settings.image_override is None