diff --git a/src/rapids_singlecell/__init__.py b/src/rapids_singlecell/__init__.py index 9217ac06..7861e69e 100644 --- a/src/rapids_singlecell/__init__.py +++ b/src/rapids_singlecell/__init__.py @@ -1,10 +1,22 @@ from __future__ import annotations -import cuml.internals.logger as logger - -from . import dcg, get, gr, pp, ptg, tl +from . import dcg, get, gr, logging, pp, ptg, tl +from ._settings import Verbosity, settings from ._version import __version__ +__all__ = [ + "Verbosity", + "__version__", + "dcg", + "get", + "gr", + "logging", + "pp", + "ptg", + "settings", + "tl", +] + def _detect_duplicate_installation(): """Warn if multiple rapids_singlecell variants are installed.""" @@ -36,4 +48,3 @@ def _detect_duplicate_installation(): _detect_duplicate_installation() -logger.set_level(2) diff --git a/src/rapids_singlecell/_settings/__init__.py b/src/rapids_singlecell/_settings/__init__.py new file mode 100644 index 00000000..bcc78d25 --- /dev/null +++ b/src/rapids_singlecell/_settings/__init__.py @@ -0,0 +1,52 @@ +"""scverse-style settings for rapids_singlecell. + +Built on :class:`scverse_misc.Settings` (a Pydantic ``BaseSettings``). +Setting :attr:`settings.verbosity` updates the rsc root logger and propagates +to :mod:`cuml.internals.logger` so both stay in sync. + +Environment variables are read with the ``RAPIDS_SINGLECELL_`` prefix +(e.g. ``RAPIDS_SINGLECELL_VERBOSITY=debug``). +""" + +from __future__ import annotations + +from typing import Annotated + +from pydantic import BeforeValidator, model_validator +from scverse_misc import Settings + +from .verbosity import Verbosity + +__all__ = ["Verbosity", "settings"] + + +def _coerce_verbosity(value: object) -> Verbosity | object: + if isinstance(value, str): + try: + return Verbosity[value.lower()] + except KeyError as e: + valid = ", ".join(Verbosity.__members__) + msg = f"Cannot set verbosity to {value!r}. Valid names are: {valid}" + raise ValueError(msg) from e + return value + + +class _RscSettings( + Settings, + exported_object_name="settings", + docstring_style="numpy", +): + verbosity: Annotated[Verbosity, BeforeValidator(_coerce_verbosity)] = ( + Verbosity.warning + ) + """Logging verbosity. Accepts a :class:`Verbosity`, its int value, or its name.""" + + @model_validator(mode="after") + def _apply_logging_level(self): + from rapids_singlecell.logging import _set_log_level + + _set_log_level(self.verbosity.level) + return self + + +settings = _RscSettings() diff --git a/src/rapids_singlecell/_settings/verbosity.py b/src/rapids_singlecell/_settings/verbosity.py new file mode 100644 index 00000000..31d0c16d --- /dev/null +++ b/src/rapids_singlecell/_settings/verbosity.py @@ -0,0 +1,63 @@ +from __future__ import annotations + +from contextlib import contextmanager +from enum import IntEnum +from logging import getLevelNamesMapping +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from collections.abc import Generator + from typing import Literal + + type _VerbosityName = Literal["error", "warning", "info", "hint", "debug"] + + +_VERBOSITY_TO_LOGLEVEL: dict[str, str] = { + "error": "ERROR", + "warning": "WARNING", + "info": "INFO", + "hint": "HINT", + "debug": "DEBUG", +} + + +class Verbosity(IntEnum): + """Logging verbosity levels for :attr:`rapids_singlecell.settings.verbosity`.""" + + error = 0 + """Error (`0`)""" + warning = 1 + """Warning (`1`)""" + info = 2 + """Info (`2`)""" + hint = 3 + """Hint (`3`)""" + debug = 4 + """Debug (`4`)""" + + @property + def level(self) -> int: + """The :ref:`logging level ` corresponding to this verbosity level.""" + return getLevelNamesMapping()[_VERBOSITY_TO_LOGLEVEL[self.name]] + + @contextmanager + def override( + self, verbosity: Verbosity | _VerbosityName | int + ) -> Generator[Verbosity, None, None]: + """Temporarily override verbosity. + + >>> import rapids_singlecell as rsc + >>> rsc.settings.verbosity = rsc.Verbosity.info + >>> with rsc.settings.verbosity.override(rsc.Verbosity.debug): + ... rsc.settings.verbosity + + >>> rsc.settings.verbosity + + """ + from . import settings + + settings.verbosity = verbosity + try: + yield self + finally: + settings.verbosity = self diff --git a/src/rapids_singlecell/decoupler_gpu/_helper/_log.py b/src/rapids_singlecell/decoupler_gpu/_helper/_log.py index 7e1c13b2..c9ef18ac 100644 --- a/src/rapids_singlecell/decoupler_gpu/_helper/_log.py +++ b/src/rapids_singlecell/decoupler_gpu/_helper/_log.py @@ -1,30 +1,23 @@ from __future__ import annotations -import logging - -logging.basicConfig( - level=logging.INFO, - format="%(asctime)s | [%(levelname)s] %(message)s", - datefmt="%Y-%m-%d %H:%M:%S", -) +from rapids_singlecell import logging as _rsc_logging def _log(message: str, level: str = "info", *, verbose: bool = False) -> None: - """ - Log a message with a specified logging level. + """Log a message via rapids_singlecell's root logger. Parameters ---------- message The message to log. level - The logging level. + The logging level (``"info"`` or ``"warn"``). verbose Whether to emit the log. """ - level = level.lower() - if verbose: - if level == "warn": - logging.warning(message) - elif level == "info": - logging.info(message) + if not verbose: + return + if level.lower() == "warn": + _rsc_logging.warning(message) + else: + _rsc_logging.info(message) diff --git a/src/rapids_singlecell/logging.py b/src/rapids_singlecell/logging.py new file mode 100644 index 00000000..025439d6 --- /dev/null +++ b/src/rapids_singlecell/logging.py @@ -0,0 +1,206 @@ +"""Logging utilities, modelled on :mod:`scanpy.logging`.""" + +from __future__ import annotations + +import logging +import sys +from datetime import UTC, datetime, timedelta +from functools import partial, update_wrapper +from logging import CRITICAL, DEBUG, ERROR, INFO, WARNING +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from typing import TextIO + + +__all__ = [ + "debug", + "error", + "hint", + "info", + "warning", +] + +HINT = (INFO + DEBUG) // 2 +logging.addLevelName(HINT, "HINT") + + +# Mapping from rsc Verbosity -> cuml log level (which is inverted: 0=trace, 6=off). +# Built lazily to avoid importing cuml at module import time. +def _cuml_level_for(verbosity_level: int) -> object: + import cuml.internals.logger as _cuml_logger + + # stdlib levels: ERROR=40, WARNING=30, INFO=20, HINT=15, DEBUG=10. + # Lower stdlib level == more verbose; cuml uses its own enum. + if verbosity_level <= DEBUG: + return _cuml_logger.level_enum.debug + if verbosity_level <= INFO: + return _cuml_logger.level_enum.info + if verbosity_level <= WARNING: + return _cuml_logger.level_enum.warn + return _cuml_logger.level_enum.error + + +class _RootLogger(logging.RootLogger): + def __init__(self, level: int): + super().__init__(level) + self.propagate = False + _RootLogger.manager = logging.Manager(self) + + def log( + self, + level: int, + msg: str, + *, + extra: dict | None = None, + time: datetime | None = None, + deep: str | None = None, + ) -> datetime: + from ._settings import settings + + now = datetime.now(UTC) + time_passed: timedelta | None = None if time is None else now - time + extra = { + **(extra or {}), + "deep": deep if settings.verbosity.level < level else None, + "time_passed": time_passed, + } + super().log(level, msg, extra=extra) + return now + + def critical(self, msg, *, time=None, deep=None, extra=None) -> datetime: + return self.log(CRITICAL, msg, time=time, deep=deep, extra=extra) + + def error(self, msg, *, time=None, deep=None, extra=None) -> datetime: + return self.log(ERROR, msg, time=time, deep=deep, extra=extra) + + def warning(self, msg, *, time=None, deep=None, extra=None) -> datetime: + return self.log(WARNING, msg, time=time, deep=deep, extra=extra) + + def info(self, msg, *, time=None, deep=None, extra=None) -> datetime: + return self.log(INFO, msg, time=time, deep=deep, extra=extra) + + def hint(self, msg, *, time=None, deep=None, extra=None) -> datetime: + return self.log(HINT, msg, time=time, deep=deep, extra=extra) + + def debug(self, msg, *, time=None, deep=None, extra=None) -> datetime: + return self.log(DEBUG, msg, time=time, deep=deep, extra=extra) + + +class _LogFormatter(logging.Formatter): + def __init__( + self, fmt="{levelname}: {message}", datefmt="%Y-%m-%d %H:%M", style="{" + ): + super().__init__(fmt, datefmt, style) + + def format(self, record: logging.LogRecord): + format_orig = self._style._fmt + if record.levelno == INFO: + self._style._fmt = "{message}" + elif record.levelno == HINT: + self._style._fmt = "--> {message}" + elif record.levelno == DEBUG: + self._style._fmt = " {message}" + if record.time_passed: + if record.time_passed.microseconds: + record.time_passed = timedelta( + seconds=int(record.time_passed.total_seconds()) + ) + if "{time_passed}" in record.msg: + record.msg = record.msg.replace( + "{time_passed}", str(record.time_passed) + ) + else: + self._style._fmt += " ({time_passed})" + if record.deep: + record.msg = f"{record.msg}: {record.deep}" + result = logging.Formatter.format(self, record) + self._style._fmt = format_orig + return result + + +def _default_logfile() -> TextIO: + import builtins + + in_ipython = getattr(builtins, "__IPYTHON__", False) + return sys.stdout if in_ipython else sys.stderr + + +_root_logger = _RootLogger(WARNING) + + +def _install_handler(stream: TextIO) -> None: + for handler in list(_root_logger.handlers): + _root_logger.removeHandler(handler) + handler.close() + h = logging.StreamHandler(stream) + h.setFormatter(_LogFormatter()) + h.setLevel(_root_logger.level) + _root_logger.addHandler(h) + + +def _set_log_level(level: int) -> None: + """Apply ``level`` to the rsc root logger and propagate to cuml.""" + _root_logger.setLevel(level) + for h in list(_root_logger.handlers): + h.setLevel(level) + try: + import cuml.internals.logger as _cuml_logger + + _cuml_logger.set_level(_cuml_level_for(level)) + except ImportError: + pass + + +_install_handler(_default_logfile()) + + +def _copy_docs_and_signature(fn): + return partial(update_wrapper, wrapped=fn, assigned=["__doc__", "__annotations__"]) + + +def error( + msg: str, + *, + time: datetime | None = None, + deep: str | None = None, + extra: dict | None = None, +) -> datetime: + """Log message with specific level and return current time. + + Parameters + ---------- + msg + Message to display. + time + A time in the past. If passed, the difference from then to now is + appended to ``msg`` as ``(HH:MM:SS)``. If ``msg`` contains + ``{time_passed}``, the time difference is inserted at that position. + deep + If the current verbosity is higher than the log function's level, + this gets displayed as well. + extra + Additional values you can specify in ``msg`` like ``{time_passed}``. + + """ + return _root_logger.error(msg, time=time, deep=deep, extra=extra) + + +@_copy_docs_and_signature(error) +def warning(msg, *, time=None, deep=None, extra=None) -> datetime: + return _root_logger.warning(msg, time=time, deep=deep, extra=extra) + + +@_copy_docs_and_signature(error) +def info(msg, *, time=None, deep=None, extra=None) -> datetime: + return _root_logger.info(msg, time=time, deep=deep, extra=extra) + + +@_copy_docs_and_signature(error) +def hint(msg, *, time=None, deep=None, extra=None) -> datetime: + return _root_logger.hint(msg, time=time, deep=deep, extra=extra) + + +@_copy_docs_and_signature(error) +def debug(msg, *, time=None, deep=None, extra=None) -> datetime: + return _root_logger.debug(msg, time=time, deep=deep, extra=extra) diff --git a/src/rapids_singlecell/preprocessing/_harmony_integrate.py b/src/rapids_singlecell/preprocessing/_harmony_integrate.py index 2c426d9b..20f6144f 100644 --- a/src/rapids_singlecell/preprocessing/_harmony_integrate.py +++ b/src/rapids_singlecell/preprocessing/_harmony_integrate.py @@ -6,6 +6,8 @@ import cupy as cp import numpy as np +from rapids_singlecell import logging as logg + if TYPE_CHECKING: from anndata import AnnData @@ -154,6 +156,7 @@ def harmony_integrate( # Resolve flavor into internal flags if flavor not in {"harmony1", "harmony2"}: raise ValueError(f"flavor must be 'harmony1' or 'harmony2', got {flavor!r}.") + start = logg.info(f"running Harmony ({flavor})") stabilized_penalty = flavor == "harmony2" dynamic_lambda = flavor == "harmony2" @@ -248,3 +251,4 @@ def harmony_integrate( ) adata.obsm[adjusted_basis] = harmony_out.get() + logg.info(" finished", time=start) diff --git a/src/rapids_singlecell/preprocessing/_hvg/__init__.py b/src/rapids_singlecell/preprocessing/_hvg/__init__.py index 0a75415a..d0aaeea5 100644 --- a/src/rapids_singlecell/preprocessing/_hvg/__init__.py +++ b/src/rapids_singlecell/preprocessing/_hvg/__init__.py @@ -4,6 +4,7 @@ import numpy as np +from rapids_singlecell import logging as logg from rapids_singlecell.preprocessing._utils import _sanitize_column from ._cutoffs import _Cutoffs @@ -145,6 +146,7 @@ def highly_variable_genes( `highly_variable_intersection` : bool If batch_key is given, this denotes the genes that are highly variable in all batches """ + start = logg.info("extracting highly variable genes") if batch_key is not None: _sanitize_column(adata, batch_key) @@ -205,6 +207,13 @@ def highly_variable_genes( ) adata.uns["hvg"] = {"flavor": flavor} + logg.hint( + "added\n" + " 'highly_variable', boolean vector (adata.var)\n" + " 'means', float vector (adata.var)\n" + " 'dispersions', float vector (adata.var)\n" + " 'dispersions_norm', float vector (adata.var)" + ) adata.var["highly_variable"] = df["highly_variable"] adata.var["means"] = df["means"] @@ -218,3 +227,5 @@ def highly_variable_genes( adata.var["highly_variable_intersection"] = df[ "highly_variable_intersection" ] + + logg.info(" finished", time=start) diff --git a/src/rapids_singlecell/preprocessing/_neighbors/__init__.py b/src/rapids_singlecell/preprocessing/_neighbors/__init__.py index 00d74bbf..4647a4aa 100644 --- a/src/rapids_singlecell/preprocessing/_neighbors/__init__.py +++ b/src/rapids_singlecell/preprocessing/_neighbors/__init__.py @@ -6,6 +6,7 @@ import cupy as cp import numpy as np +from rapids_singlecell import logging as logg from rapids_singlecell.preprocessing._neighbors._helper import ( _check_metrics, _check_neighbors_X, @@ -174,6 +175,7 @@ def neighbors( neighbors. """ + start = logg.info("computing neighbors") adata = adata.copy() if copy else adata if adata.is_view: @@ -185,6 +187,10 @@ def neighbors( f"Valid options are: {get_args(_Algorithms)}." ) + if n_neighbors > adata.shape[0]: + n_neighbors = 1 + int(0.5 * adata.shape[0]) + logg.warning(f"n_obs too small: adjusting to `n_neighbors = {n_neighbors}`") + X = _choose_representation(adata, use_rep=use_rep, n_pcs=n_pcs) X_contiguous = _check_neighbors_X(X, algorithm) _check_metrics(algorithm, metric) @@ -243,6 +249,15 @@ def neighbors( adata.obsp[dists_key] = distances adata.obsp[conns_key] = connectivities + logg.info( + " finished", + time=start, + deep=( + f"added to `.uns[{neighbors_key!r}]`\n" + f" `.obsp[{dists_key!r}]`, distances for each pair of neighbors\n" + f" `.obsp[{conns_key!r}]`, weighted adjacency matrix" + ), + ) return adata if copy else None @@ -370,6 +385,7 @@ def bbknn( f"Invalid algorithm '{algorithm}' for batch-balanced KNN. " f"Valid options are: {get_args(_Algorithms_bbknn)}." ) + start = logg.info("computing batch-balanced neighbors") adata = adata.copy() if copy else adata if adata.is_view: adata._init_as_actual(adata.copy()) @@ -464,4 +480,13 @@ def bbknn( adata.obsp[dists_key] = distances adata.obsp[conns_key] = connectivities + logg.info( + " finished", + time=start, + deep=( + f"added to `.uns[{neighbors_key!r}]`\n" + f" `.obsp[{dists_key!r}]`, distances for each pair of neighbors\n" + f" `.obsp[{conns_key!r}]`, weighted adjacency matrix" + ), + ) return adata if copy else None diff --git a/src/rapids_singlecell/preprocessing/_normalize.py b/src/rapids_singlecell/preprocessing/_normalize.py index 664c5b36..9b581c7b 100644 --- a/src/rapids_singlecell/preprocessing/_normalize.py +++ b/src/rapids_singlecell/preprocessing/_normalize.py @@ -10,6 +10,7 @@ from cupyx.scipy.sparse import csr_matrix from scanpy.get import _get_obs_rep, _set_obs_rep +from rapids_singlecell import logging as logg from rapids_singlecell._compat import ( DaskArray, _meta_dense, @@ -72,6 +73,7 @@ def normalize_total( Returns a normalized copy or updates `adata` with a normalized version of the original `adata.X` and `adata.layers['layer']`, depending on `inplace`. """ + start = logg.info("normalizing counts per cell") if copy: if not inplace: raise ValueError("`copy=True` cannot be used with `inplace=False`.") @@ -118,6 +120,7 @@ def normalize_total( if inplace: _set_obs_rep(adata, X, layer=layer) + logg.info(" finished", time=start) if copy: return adata elif not inplace: @@ -393,6 +396,7 @@ def log1p( in-place and returns None. """ + start = logg.info("computing log1p") if copy: if not inplace: raise ValueError("`copy=True` cannot be used with `inplace=False`.") @@ -409,6 +413,7 @@ def log1p( if inplace: _set_obs_rep(adata, X, layer=layer, obsm=obsm) + logg.info(" finished", time=start) if copy: return adata elif not inplace: diff --git a/src/rapids_singlecell/preprocessing/_pca.py b/src/rapids_singlecell/preprocessing/_pca.py index e0b25512..44858d2d 100644 --- a/src/rapids_singlecell/preprocessing/_pca.py +++ b/src/rapids_singlecell/preprocessing/_pca.py @@ -11,6 +11,7 @@ from cupyx.scipy.sparse import isspmatrix_csr from scipy.sparse import issparse +from rapids_singlecell import logging as logg from rapids_singlecell._compat import DaskArray from rapids_singlecell.get import _get_obs_rep @@ -216,6 +217,7 @@ def pca( "Either your data already only consists of highly-variable genes " "or consider running `highly_variable_genes` first." ) + start = logg.info("computing PCA") if copy: adata = adata.copy() @@ -234,6 +236,8 @@ def pca( else: n_comps = 50 + logg.info(f" with {n_comps=}") + # Auto-select sparse solver based on matrix dimensions # Lanczos is faster for large feature counts (>8000) # Covariance is faster for smaller matrices due to optimized kernels @@ -244,6 +248,14 @@ def pca( else: svd_solver = "covariance_eigh" + if not zero_center: + logg.debug( + " without zero-centering:\n" + " the explained variance does not correspond to the exact statistical definition\n" + " the first component, e.g., might be heavily influenced by different means\n" + " the following components often resemble the exact PCA very closely" + ) + if isinstance(X, DaskArray): if chunked: raise ValueError( @@ -325,6 +337,15 @@ def pca( else: adata.varm[key_varm] = _as_numpy(pca_func.components_.T) + logg.info(" finished", time=start) + logg.debug( + "and added\n" + f" {key_obsm!r}, the PCA coordinates (adata.obsm)\n" + f" {key_varm!r}, the loadings (adata.varm)\n" + f" 'variance', the variance / eigenvalues (adata.uns[{key_uns!r}])\n" + f" 'variance_ratio', the variance ratio (adata.uns[{key_uns!r}])" + ) + if copy: return adata diff --git a/src/rapids_singlecell/preprocessing/_regress_out.py b/src/rapids_singlecell/preprocessing/_regress_out.py index 8625be98..db705511 100644 --- a/src/rapids_singlecell/preprocessing/_regress_out.py +++ b/src/rapids_singlecell/preprocessing/_regress_out.py @@ -12,6 +12,7 @@ from scanpy._utils import view_to_actual from scanpy.get import _get_obs_rep, _set_obs_rep +from rapids_singlecell import logging as logg from rapids_singlecell._compat import DaskArray, _meta_dense from ._utils import _check_gpu_X, _sparse_to_dense @@ -62,6 +63,7 @@ def regress_out( if batchsize != "all" and type(batchsize) not in [int, type(None)]: raise ValueError("batchsize must be `int`, `None` or `'all'`") + start = logg.info(f"regressing out {keys}") if isinstance(adata, AnnData): view_to_actual(adata) @@ -69,6 +71,9 @@ def regress_out( _check_gpu_X(X, allow_dask=True) + if sparse.issparse(X) or (isinstance(X, DaskArray) and sparse.issparse(X._meta)): + logg.info(" sparse input is densified and may lead to high memory use") + if not sparse.issparse(X) and not isinstance(X, DaskArray) and inplace is False: X = cp.array(X) @@ -82,6 +87,7 @@ def regress_out( ) if categorical: + logg.debug("... regressing on per-gene means within categories") if len(keys) > 1: raise ValueError( "Only a single categorical variable is supported for regress_out." @@ -98,7 +104,9 @@ def regress_out( if inplace: _set_obs_rep(adata, X, layer=layer) + logg.info(" finished", time=start) else: + logg.info(" finished", time=start) return X diff --git a/src/rapids_singlecell/preprocessing/_scale.py b/src/rapids_singlecell/preprocessing/_scale.py index 0818feff..e76c2513 100644 --- a/src/rapids_singlecell/preprocessing/_scale.py +++ b/src/rapids_singlecell/preprocessing/_scale.py @@ -10,6 +10,7 @@ from cupyx.scipy import sparse from scanpy._utils import view_to_actual +from rapids_singlecell import logging as logg from rapids_singlecell._compat import ( DaskArray, _meta_dense, @@ -74,6 +75,7 @@ def scale( depending on `inplace`. """ + start = logg.info("scaling data") if copy: if not inplace: raise ValueError("`copy=True` cannot be used with `inplace=False`.") @@ -132,6 +134,7 @@ def scale( adata.var[str_mean_std[0]] = means.get() adata.var[str_mean_std[1]] = std.get() + logg.info(" finished", time=start) if copy: return adata elif not inplace: diff --git a/src/rapids_singlecell/preprocessing/_scrublet/__init__.py b/src/rapids_singlecell/preprocessing/_scrublet/__init__.py index 01245693..aa2e7396 100644 --- a/src/rapids_singlecell/preprocessing/_scrublet/__init__.py +++ b/src/rapids_singlecell/preprocessing/_scrublet/__init__.py @@ -7,9 +7,9 @@ import pandas as pd from anndata import AnnData from cupyx.scipy import sparse -from scanpy import logging as logg from scanpy.get import _get_obs_rep +from rapids_singlecell import logging as logg from rapids_singlecell import preprocessing as pp from . import pipeline diff --git a/src/rapids_singlecell/preprocessing/_scrublet/core.py b/src/rapids_singlecell/preprocessing/_scrublet/core.py index 213ff116..d93e52b7 100644 --- a/src/rapids_singlecell/preprocessing/_scrublet/core.py +++ b/src/rapids_singlecell/preprocessing/_scrublet/core.py @@ -10,6 +10,7 @@ from cupyx.scipy import sparse from scanpy.preprocessing._utils import sample_comb +from rapids_singlecell import logging as logg from rapids_singlecell._compat import _random_state_kwargs from rapids_singlecell.preprocessing._utils import get_random_state @@ -437,13 +438,13 @@ def call_doublets( try: threshold = cast("float", threshold_minimum(self.doublet_scores_sim_)) if verbose: - print( + logg.info( f"Automatically set threshold at doublet score = {threshold:.2f}" ) except Exception: # noqa: BLE001 self.predicted_doublets_ = None if verbose: - print( + logg.warning( "Failed to automatically identify doublet score threshold. " "Run `call_doublets` with user-specified threshold." ) @@ -465,7 +466,7 @@ def call_doublets( ) if verbose: - print( + logg.info( f"Detected doublet rate = {100 * self.detected_doublet_rate_:.1f}%\n" f"Estimated detectable doublet fraction = {100 * self.detectable_doublet_fraction_:.1f}%\n" "Overall doublet rate:\n" diff --git a/src/rapids_singlecell/preprocessing/_simple.py b/src/rapids_singlecell/preprocessing/_simple.py index 97f72343..ac8df180 100644 --- a/src/rapids_singlecell/preprocessing/_simple.py +++ b/src/rapids_singlecell/preprocessing/_simple.py @@ -5,6 +5,8 @@ import cupy as cp from anndata import AnnData +from rapids_singlecell import logging as logg + from ._qc import _basic_qc from ._utils import _check_gpu_X @@ -151,7 +153,7 @@ def filter_genes( if max_counts is None else f"{max_counts} counts" ) - print(msg) + logg.info(msg) if isinstance(data, AnnData) and inplace: data.var["n_counts"] = sums_genes.get() @@ -292,7 +294,7 @@ def filter_cells( if max_counts is None else f"{max_counts} counts" ) - print(msg) + logg.info(msg) if isinstance(data, AnnData) and inplace: data.obs["n_counts"] = sums_cells.get() @@ -314,4 +316,4 @@ def filter_highly_variable(adata: AnnData) -> None: if "highly_variable" in adata.var.keys(): adata._inplace_subset_var(adata.var["highly_variable"]) else: - print("Please calculate highly variable genes first") + logg.warning("Please calculate highly variable genes first") diff --git a/src/rapids_singlecell/tools/_clustering.py b/src/rapids_singlecell/tools/_clustering.py index 9082441a..fc3a301a 100644 --- a/src/rapids_singlecell/tools/_clustering.py +++ b/src/rapids_singlecell/tools/_clustering.py @@ -11,6 +11,8 @@ from scanpy.tools._utils import _choose_graph from scanpy.tools._utils_clustering import rename_groups, restrict_adjacency +from rapids_singlecell import logging as logg + from ._utils import _choose_representation if TYPE_CHECKING: @@ -202,6 +204,7 @@ def leiden( """ # Adjacency graph + start = logg.info("running Leiden clustering") adata = adata.copy() if copy else adata dtype = _check_dtype(dtype) @@ -279,6 +282,14 @@ def leiden( adata.uns[key_added]["modularity"] = ( modularities if len(modularities) > 1 else modularities[0] ) + logg.info( + " finished", + time=start, + deep=( + f"found {len(np.unique(groups))} clusters and added\n" + f" {key_added!r}, the cluster labels (adata.obs, categorical)" + ), + ) return adata if copy else None @@ -361,6 +372,7 @@ def louvain( """ # Adjacency graph + start = logg.info("running Louvain clustering") dtype = _check_dtype(dtype) adata = adata.copy() if copy else adata @@ -436,6 +448,14 @@ def louvain( adata.uns[key_added]["modularity"] = ( modularities if len(modularities) > 1 else modularities[0] ) + logg.info( + " finished", + time=start, + deep=( + f"found {len(np.unique(groups))} clusters and added\n" + f" {key_added!r}, the cluster labels (adata.obs, categorical)" + ), + ) return adata if copy else None diff --git a/src/rapids_singlecell/tools/_rank_genes_groups/__init__.py b/src/rapids_singlecell/tools/_rank_genes_groups/__init__.py index 0b9753a3..a50ddd54 100644 --- a/src/rapids_singlecell/tools/_rank_genes_groups/__init__.py +++ b/src/rapids_singlecell/tools/_rank_genes_groups/__init__.py @@ -7,6 +7,8 @@ import numpy as np import pandas as pd +from rapids_singlecell import logging as logg + from ._core import _RankGenes if TYPE_CHECKING: @@ -154,6 +156,7 @@ def rank_genes_groups( msg = "corr_method must be either 'benjamini-hochberg' or 'bonferroni'." raise ValueError(msg) + start = logg.info("ranking genes") if method is None: method = "t-test" @@ -199,6 +202,9 @@ def rank_genes_groups( pre_load=pre_load, ) + logg.debug(f"consider {groupby!r} groups:") + logg.debug(f"with sizes: {list(map(int, test_obj.group_sizes))}") + # Determine n_genes_user n_genes_user = n_genes if n_genes_user is None or n_genes_user > test_obj.X.shape[1]: @@ -258,6 +264,23 @@ def rank_genes_groups( index=False, column_dtypes=dtypes[col] ) + logg.info( + " finished", + time=start, + deep=( + f"added to `.uns[{key_added!r}]`\n" + " 'names', sorted np.recarray to be indexed by group ids\n" + " 'scores', sorted np.recarray to be indexed by group ids\n" + + ( + " 'logfoldchanges', sorted np.recarray to be indexed by group ids\n" + " 'pvals', sorted np.recarray to be indexed by group ids\n" + " 'pvals_adj', sorted np.recarray to be indexed by group ids" + if method != "logreg" + else "" + ) + ), + ) + if TYPE_CHECKING: from warnings import deprecated diff --git a/src/rapids_singlecell/tools/_tsne.py b/src/rapids_singlecell/tools/_tsne.py index 9aedb1b5..4d5df112 100644 --- a/src/rapids_singlecell/tools/_tsne.py +++ b/src/rapids_singlecell/tools/_tsne.py @@ -5,6 +5,7 @@ import cuml.internals.logger as logger from cuml.manifold import TSNE +from rapids_singlecell import logging as logg from rapids_singlecell._utils import _get_logger_level from ._utils import _choose_representation @@ -85,6 +86,7 @@ def tsne( `learning_rate`, `method`, `metric`, and `use_rep`. """ + start = logg.info("computing tSNE") adata = adata.copy() if copy else adata X = _choose_representation(adata, use_rep=use_rep, n_pcs=n_pcs) @@ -113,4 +115,13 @@ def tsne( } } + logg.info( + " finished", + time=start, + deep=( + f"added\n" + f" {key_obsm!r}, tSNE coordinates (adata.obsm)\n" + f" {key_uns!r}, tSNE parameters (adata.uns)" + ), + ) return adata if copy else None diff --git a/src/rapids_singlecell/tools/_umap.py b/src/rapids_singlecell/tools/_umap.py index 0d6ad896..3766b651 100644 --- a/src/rapids_singlecell/tools/_umap.py +++ b/src/rapids_singlecell/tools/_umap.py @@ -13,6 +13,7 @@ from scanpy.tools._utils import get_init_pos_from_paga from sklearn.utils import check_random_state +from rapids_singlecell import logging as logg from rapids_singlecell._compat import _random_state_kwargs from rapids_singlecell._utils import _get_logger_level @@ -126,6 +127,7 @@ def umap( UMAP parameters `a`, `b`, and `random_state` (if specified). """ + start = logg.info("computing UMAP") adata = adata.copy() if copy else adata if neighbors_key is None: @@ -137,6 +139,12 @@ def umap( ) neighbors = NeighborsView(adata, neighbors_key) + + if "params" not in neighbors or neighbors["params"].get("method") != "rapids": + logg.warning( + f'.obsp["{neighbors["connectivities_key"]}"] have not been computed using rsc.pp.neighbors' + ) + if a is None or b is None: a, b = find_ab_params(spread, min_dist) @@ -246,4 +254,13 @@ def umap( adata.obsm[key_obsm] = X_umap adata.uns[key_uns] = {"params": stored_params} + logg.info( + " finished", + time=start, + deep=( + f"added\n" + f" {key_obsm!r}, UMAP coordinates (adata.obsm)\n" + f" {key_uns!r}, UMAP parameters (adata.uns)" + ), + ) return adata if copy else None diff --git a/tests/test_logging.py b/tests/test_logging.py new file mode 100644 index 00000000..6e3c82ea --- /dev/null +++ b/tests/test_logging.py @@ -0,0 +1,182 @@ +from __future__ import annotations + +import sys +from datetime import datetime +from logging import StreamHandler +from typing import TYPE_CHECKING + +import pytest + +import rapids_singlecell as rsc +from rapids_singlecell import Verbosity +from rapids_singlecell import logging as log +from rapids_singlecell import settings as s + +if TYPE_CHECKING: + from collections.abc import Generator + + +@pytest.fixture(autouse=True) +def _caplog_adapter(caplog: pytest.LogCaptureFixture) -> Generator[None, None, None]: + """Attach caplog's handler to rsc's non-propagating root logger.""" + log._root_logger.addHandler(caplog.handler) + yield + log._root_logger.removeHandler(caplog.handler) + + +@pytest.fixture(autouse=True) +def _reset_verbosity() -> Generator[None, None, None]: + """Restore default verbosity after each test.""" + original = s.verbosity + yield + s.verbosity = original + + +def test_defaults(caplog: pytest.LogCaptureFixture) -> None: + """Default verbosity is warning and a StreamHandler is installed at that level.""" + assert Verbosity.warning == 1 + + # the handler installed by `rsc.logging` (excluding caplog's adapter handler) + [handler] = (h for h in log._root_logger.handlers if h is not caplog.handler) + assert isinstance(handler, StreamHandler) + assert log._root_logger.level == s.verbosity.level + + +def test_records(caplog: pytest.LogCaptureFixture) -> None: + s.verbosity = Verbosity.debug + log.error("0") + log.warning("1") + log.info("2") + log.hint("3") + log.debug("4") + assert caplog.record_tuples == [ + ("root", 40, "0"), + ("root", 30, "1"), + ("root", 20, "2"), + ("root", 15, "3"), + ("root", 10, "4"), + ] + + +def test_formats(capsys: pytest.CaptureFixture) -> None: + log._install_handler(sys.stderr) + s.verbosity = Verbosity.debug + log.error("0") + assert capsys.readouterr().err == "ERROR: 0\n" + log.warning("1") + assert capsys.readouterr().err == "WARNING: 1\n" + log.info("2") + assert capsys.readouterr().err == "2\n" + log.hint("3") + assert capsys.readouterr().err == "--> 3\n" + log.debug("4") + assert capsys.readouterr().err == " 4\n" + + +def test_deep(capsys: pytest.CaptureFixture) -> None: + log._install_handler(sys.stderr) + s.verbosity = Verbosity.hint + log.hint("0") + assert capsys.readouterr().err == "--> 0\n" + log.hint("1", deep="1!") + assert capsys.readouterr().err == "--> 1\n" + s.verbosity = Verbosity.debug + log.hint("2") + assert capsys.readouterr().err == "--> 2\n" + log.hint("3", deep="3!") + assert capsys.readouterr().err == "--> 3: 3!\n" + + +def test_timing(monkeypatch, capsys: pytest.CaptureFixture) -> None: + counter = 0 + + class IncTime: + @staticmethod + def now(tz): + nonlocal counter + counter += 1 + return datetime(2000, 1, 1, second=counter, microsecond=counter, tzinfo=tz) + + monkeypatch.setattr(log, "datetime", IncTime) + log._install_handler(sys.stderr) + s.verbosity = Verbosity.debug + + log.hint("1") + assert counter == 1 + assert capsys.readouterr().err == "--> 1\n" + + start = log.info("2") + assert counter == 2 + assert capsys.readouterr().err == "2\n" + + log.hint("3") + assert counter == 3 + assert capsys.readouterr().err == "--> 3\n" + + log.info("4", time=start) + assert counter == 4 + assert capsys.readouterr().err == "4 (0:00:02)\n" + + log.info("5 {time_passed}", time=start) + assert counter == 5 + assert capsys.readouterr().err == "5 0:00:03\n" + + +def test_verbosity_propagates_to_cuml() -> None: + """Setting rsc.settings.verbosity also updates cuml's logger level.""" + import cuml.internals.logger as cuml_log + + s.verbosity = Verbosity.error + assert cuml_log.get_level() == cuml_log.level_enum.error + + s.verbosity = Verbosity.warning + assert cuml_log.get_level() == cuml_log.level_enum.warn + + s.verbosity = Verbosity.info + assert cuml_log.get_level() == cuml_log.level_enum.info + + s.verbosity = Verbosity.hint + # cuml has no hint level; we map hint -> info + assert cuml_log.get_level() == cuml_log.level_enum.info + + s.verbosity = Verbosity.debug + assert cuml_log.get_level() == cuml_log.level_enum.debug + + +def test_verbosity_override() -> None: + """Verbosity.override is a context manager that restores the prior level.""" + s.verbosity = Verbosity.info + with s.verbosity.override(Verbosity.debug): + assert s.verbosity == Verbosity.debug + assert s.verbosity == Verbosity.info + + +def test_settings_override() -> None: + """scverse_misc Settings.override restores the prior value.""" + s.verbosity = Verbosity.info + with s.override(verbosity="error"): + assert s.verbosity == Verbosity.error + assert s.verbosity == Verbosity.info + + +def test_string_verbosity_coercion() -> None: + """`settings.verbosity = "debug"` (string by name) is accepted.""" + s.verbosity = "debug" + assert s.verbosity == Verbosity.debug + s.verbosity = "warning" + assert s.verbosity == Verbosity.warning + + +def test_invalid_verbosity_raises() -> None: + """Unknown verbosity names raise a ValidationError-style error.""" + from pydantic import ValidationError + + with pytest.raises(ValidationError): + s.verbosity = "nonexistent" + + +def test_module_exports() -> None: + """The expected names are accessible from the rsc top level.""" + assert rsc.settings is s + assert rsc.Verbosity is Verbosity + assert rsc.logging is log