From 75bf61af65cddd64726f1297033e590375c7a24b Mon Sep 17 00:00:00 2001
From: lmoresi <louis.moresi@anu.edu.au>
Date: Mon, 25 May 2026 13:38:55 +1000
Subject: [PATCH 1/2] Promote monotone interpolation to a universal evaluator
 flag
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add an opt-in `monotone` keyword (default False) to uw.function.evaluate
and global_evaluate. When truthy ("clamp"/"pick", with True aliasing
"clamp"), the interpolated result is bounded to the local data range of
the source field: for each query point, the mesh.dim+1 nearest source
DOFs define [min, max]; "clamp" clips into that range, "pick" keeps
in-bounds values and re-evaluates only the out-of-bounds subset via
bounded RBF.

This lifts the kNN clamp/pick limiter that previously lived only in the
semi-Lagrangian trace-back (SemiLagrangian.update_pre_solve) into a
shared evaluator helper (_apply_monotone_limit), so any resampling path
(remesh re-interpolation, projection, restart, the SL DDt itself) can
request the same bounded result from one place. The source field is
recovered from the expression via meshVariable_lookup_by_symbol.

Implementation keeps monotone=False provably bit-identical: the existing
evaluate/global_evaluate bodies are renamed to _evaluate_impl /
_global_evaluate_impl unchanged, and the public functions are thin
wrappers that return the impl result untouched unless monotone is truthy.
ddt.py's inline limiter block is deleted and routed through the new
option (monotone=monotone_mode); a full AdvDiffusionSLCN trajectory is
bit-identical pre/post refactor for None, "clamp" and "pick" (max|Δ|=0).

Scope (MVP): single-MeshVariable expressions only; composites raise
ValueError (the neighbour bound across fields is ill-defined). The
neighbour bound is rank-local and "pick"'s collective re-eval is
serial-only -- both flagged as TODO(parallel) with the nav-clone overlap
machinery noted as the hardening hook. A pre-existing dimensional-vs-ND
bound mismatch in units-active runs is flagged TODO(units), not fixed
(scaling is inactive in the validated baseline).

Tests: new tests/test_0760_evaluate_monotone.py locks the API (default
no-op bit-identical, clamp/pick bounding, composite/unknown-option
refusal); tests/test_1054 gains end-to-end solver-integration coverage.

Underworld development team with AI support from Claude Code
---
 .../function/functions_unit_system.py         | 308 +++++++++++++++++-
 src/underworld3/systems/ddt.py                |  89 +----
 tests/test_0760_evaluate_monotone.py          | 150 +++++++++
 .../test_1054_advdiff_monotone_mode_kwarg.py  |  61 ++++
 4 files changed, 523 insertions(+), 85 deletions(-)
 create mode 100644 tests/test_0760_evaluate_monotone.py

diff --git a/src/underworld3/function/functions_unit_system.py b/src/underworld3/function/functions_unit_system.py
index 1385eb9c..4f301b64 100644
--- a/src/underworld3/function/functions_unit_system.py
+++ b/src/underworld3/function/functions_unit_system.py
@@ -29,8 +29,7 @@
 import underworld3 as uw
 
 
-@uw.timing.routine_timer_decorator
-def evaluate(
+def _evaluate_impl(
     expr,
     coords,
     coord_sys=None,
@@ -49,6 +48,11 @@ def evaluate(
     """
     Evaluate expression at coordinates with automatic unit handling.
 
+    Internal implementation of :func:`evaluate`. The public wrapper
+    delegates here and then optionally applies the ``monotone``
+    bounded-interpolation post-process. This body is unchanged from the
+    historical ``evaluate`` so that ``monotone=False`` is bit-identical.
+
     This function wraps the Cython evaluate_nd implementation to automatically
     handle unit conversions and return unit-aware results.
 
@@ -369,8 +373,7 @@ def evaluate(
         return raw_values
 
 
-@uw.timing.routine_timer_decorator
-def global_evaluate(
+def _global_evaluate_impl(
     expr,
     coords=None,
     coord_sys=None,
@@ -389,6 +392,12 @@ def global_evaluate(
     """
     Global evaluate with automatic unit-aware results.
 
+    Internal implementation of :func:`global_evaluate`. The public
+    wrapper delegates here and then optionally applies the ``monotone``
+    bounded-interpolation post-process. This body is unchanged from the
+    historical ``global_evaluate`` so that ``monotone=False`` is
+    bit-identical.
+
     Similar to evaluate() but performs global evaluation across all processes.
     Returns unit-aware objects when expression has units.
 
@@ -583,3 +592,294 @@ def global_evaluate(
     else:
         # Array result - wrap as UnitAwareArray
         return UnitAwareArray(raw_result, units=result_units)
+
+
+# ---------------------------------------------------------------------------
+# Monotone (bounded) interpolation
+#
+# An opt-in post-process that bounds an interpolated result to the local
+# data range of its source field. Lifted from the semi-Lagrangian
+# advection-diffusion trace-back (``SemiLagrangian.update_pre_solve``),
+# where FE Lagrange-P3 overshoot at non-nodal upstream points ignited
+# catastrophic ringing in high-Ra free-surface convection (PR #186/#188).
+# Exposed here so any resampling path (remesh re-interpolation, projection,
+# restart, the SL DDt itself) can request the same bounded result from one
+# place. It operates on the already-computed numbers and never touches the
+# symbolic evaluation path.
+# ---------------------------------------------------------------------------
+
+
+def _normalize_monotone(monotone):
+    """Map the ``monotone`` kwarg to a canonical mode string or ``None``.
+
+    ``False`` / ``None`` → ``None`` (no limiting); ``True`` → ``"clamp"``
+    (the cheap, always-safe choice); ``"clamp"`` / ``"pick"`` pass
+    through. Anything else raises ``ValueError``.
+    """
+    if monotone is False or monotone is None:
+        return None
+    if monotone is True:
+        return "clamp"
+    if monotone in ("clamp", "pick"):
+        return monotone
+    raise ValueError(
+        f"Unknown monotone option: {monotone!r}. "
+        f"Use False, True, 'clamp', or 'pick'."
+    )
+
+
+def _apply_monotone_limit(expr, coords, value, mode, evalf=False):
+    """Bound an interpolated result to the local data range of its source
+    field (monotone / bounded interpolation).
+
+    For each evaluation coordinate this finds the ``mesh.dim + 1`` nearest
+    source-field DOFs and bounds ``value`` to their nodal min/max:
+
+    - ``mode == "clamp"`` clips the result into ``[nbr_min, nbr_max]``
+      (cheap, always bounded).
+    - ``mode == "pick"`` keeps the in-bounds result and re-evaluates only
+      the out-of-bounds subset via RBF (``evalf=True``), which is
+      intrinsically bounded (more accurate, more expensive).
+
+    MVP scope: a single source MeshVariable. ``expr`` must resolve
+    directly to one mesh variable (or one of its components, e.g.
+    ``T.sym``); composite / multi-field expressions raise ``ValueError``
+    because the neighbour bound across fields is ill-defined.
+
+    The algorithm is lifted verbatim from the SL trace-back limiter so
+    that routing the DDt through here is bit-identical.
+    """
+    from ..utilities.unit_aware_array import UnitAwareArray
+    from .expressions import extract_meshes
+
+    # --- Resolve the single source MeshVariable --------------------------
+    meshes = extract_meshes(expr)
+    if len(meshes) != 1:
+        raise ValueError(
+            "monotone interpolation requires an expression backed by a "
+            f"single MeshVariable, but the expression references "
+            f"{len(meshes)} mesh(es). Composite / multi-field expressions "
+            "are not supported (the neighbour bound across fields is "
+            "ill-defined)."
+        )
+    mesh = next(iter(meshes))
+    hit = uw.discretisation.meshVariable_lookup_by_symbol(mesh, expr)
+    if hit is None:
+        raise ValueError(
+            "monotone interpolation requires an expression that is a "
+            "single MeshVariable (or one of its components), e.g. `T.sym`. "
+            "Composite expressions such as `a*T.sym + b` cannot be bounded "
+            "against a single source field."
+        )
+    var, _comp = hit  # use the full var.data (matches the SL limiter)
+
+    # --- Convert coords to the ND space of the source DOF cloud ----------
+    # In unit-aware runs `coords` is dimensional (e.g. metres) while
+    # `var.coords_nd` is [0,1] non-dimensional, so the kdtree must compare
+    # like with like. Same conversion as the SL trace-back.
+    if hasattr(coords, "magnitude"):
+        coords_raw = uw.non_dimensionalise(coords)
+        if isinstance(coords_raw, UnitAwareArray):
+            coords_nd = np.array(coords_raw)
+        elif hasattr(coords_raw, "magnitude"):
+            coords_nd = coords_raw.magnitude
+        else:
+            coords_nd = np.asarray(coords_raw)
+    else:
+        coords_nd = np.asarray(coords)
+
+    psi_coords_nd = np.asarray(var.coords_nd)
+    if hasattr(psi_coords_nd, "magnitude"):
+        psi_coords_nd = np.asarray(psi_coords_nd.magnitude)
+
+    # --- kNN neighbour stats from the source nodal data ------------------
+    # TODO(parallel): the KDTree is built from rank-local `var.coords_nd`,
+    # so near a partition seam the neighbour stats bound against a
+    # truncated neighbourhood. This matches the validated SL behaviour. For
+    # full parallel correctness the bound should include halo / global DOF
+    # neighbours -- see the nav-only overlap-clone machinery
+    # (project_parallel_point_eval_decision) as the hook if hardened.
+    # TODO(units): nbr bounds come from `var.data` (always non-dimensional)
+    # while `value` is dimensional in a units-active run -- a pre-existing
+    # latent mismatch (scaling is inactive in the validated baseline so it
+    # never bites). Do not "fix" without re-validating the trajectory.
+    nnn = mesh.dim + 1
+    kdt = uw.kdtree.KDTree(np.ascontiguousarray(psi_coords_nd))
+    _, idxs = kdt.query(
+        np.ascontiguousarray(coords_nd), k=nnn, sqr_dists=False)
+    psi_data = np.asarray(var.data)
+    # Flatten trailing dims to compute per-coord nbr stats, then restore
+    # the original shape afterwards.
+    psi_data_flat = psi_data.reshape(psi_data.shape[0], -1)
+    nbr_vals = psi_data_flat[idxs]
+    nbr_min = nbr_vals.min(axis=1)
+    nbr_max = nbr_vals.max(axis=1)
+
+    veep_np = np.asarray(value)
+    orig_shape = veep_np.shape
+    veep_flat = veep_np.reshape(nbr_min.shape)
+
+    if mode == "clamp":
+        veep_lim = np.clip(veep_flat, nbr_min, nbr_max)
+    else:
+        # "pick": re-evaluate via RBF only at the subset of coords whose
+        # result is out-of-bounds. Keeps cost dominated by the cheap pass
+        # when most points are in-bounds.
+        out_of_bounds = ((veep_flat < nbr_min) | (veep_flat > nbr_max))
+        oob_mask = out_of_bounds.any(
+            axis=tuple(range(1, out_of_bounds.ndim)))
+        veep_lim = veep_flat.copy()
+        # TODO(parallel): this re-evaluation is gated on a rank-local
+        # `oob_mask.any()`, so in parallel one rank may enter the
+        # (collective) global_evaluate while another skips it -> deadlock.
+        # Lifted verbatim from the validated (serial) SL limiter; pick is
+        # serial-only until the rank-local bound is hardened (see the
+        # TODO(parallel) above and project_parallel_point_eval_decision).
+        if oob_mask.any():
+            oob_coords = coords[oob_mask]
+            value_rbf_oob = global_evaluate(expr, oob_coords, evalf=True)
+            vrbf_flat = np.asarray(value_rbf_oob).reshape(
+                (-1,) + veep_flat.shape[1:])
+            # Only overwrite entries individually out of bounds
+            # (multi-component case).
+            sub_oob = out_of_bounds[oob_mask]
+            veep_sub = veep_lim[oob_mask]
+            veep_sub = np.where(sub_oob, vrbf_flat, veep_sub)
+            veep_lim[oob_mask] = veep_sub
+
+    limited = veep_lim.reshape(orig_shape)
+
+    # Re-wrap units after the numpy ops, mirroring the source field.
+    var_units = getattr(var, "units", None)
+    if var_units is not None and not isinstance(limited, UnitAwareArray):
+        limited = UnitAwareArray(limited, units=var_units)
+    return limited
+
+
+@uw.timing.routine_timer_decorator
+def evaluate(
+    expr,
+    coords,
+    coord_sys=None,
+    other_arguments=None,
+    simplify=False,
+    verbose=False,
+    evalf=False,
+    mode="default",
+    data_layout=None,
+    check_extrapolated=False,
+    smoothing=1e-6,
+    # Expert overrides (override mode settings)
+    rbf=None,
+    force_l2=None,
+    monotone=False,
+):
+    """Evaluate ``expr`` at ``coords`` with automatic unit handling.
+
+    Thin wrapper over :func:`_evaluate_impl`. See that function for the
+    full parameter documentation and evaluation-mode notes. With the
+    default ``monotone=False`` this is bit-identical to the historical
+    ``evaluate``.
+
+    Parameters
+    ----------
+    monotone : bool or str, optional
+        Opt-in bounded (monotone) interpolation, applied as a
+        post-process to the computed result. ``False`` (default) → no
+        limiting. ``True`` / ``"clamp"`` → clip the result into the
+        ``[min, max]`` of the ``mesh.dim + 1`` nearest source-field DOFs.
+        ``"pick"`` → keep in-bounds values and re-evaluate only the
+        out-of-bounds subset via (bounded) RBF interpolation. Only
+        single-MeshVariable expressions are supported; composites raise
+        ``ValueError``. See :func:`_apply_monotone_limit`.
+    """
+    result = _evaluate_impl(
+        expr,
+        coords,
+        coord_sys=coord_sys,
+        other_arguments=other_arguments,
+        simplify=simplify,
+        verbose=verbose,
+        evalf=evalf,
+        mode=mode,
+        data_layout=data_layout,
+        check_extrapolated=check_extrapolated,
+        smoothing=smoothing,
+        rbf=rbf,
+        force_l2=force_l2,
+    )
+
+    monotone_mode = _normalize_monotone(monotone)
+    if monotone_mode is None:
+        return result
+
+    if check_extrapolated:
+        value, extrapolated = result
+        limited = _apply_monotone_limit(
+            expr, coords, value, monotone_mode, evalf=evalf)
+        return limited, extrapolated
+
+    return _apply_monotone_limit(
+        expr, coords, result, monotone_mode, evalf=evalf)
+
+
+@uw.timing.routine_timer_decorator
+def global_evaluate(
+    expr,
+    coords=None,
+    coord_sys=None,
+    other_arguments=None,
+    simplify=False,
+    verbose=False,
+    evalf=False,
+    mode="default",
+    data_layout=None,
+    check_extrapolated=False,
+    smoothing=1e-6,
+    # Expert overrides (override mode settings)
+    rbf=None,
+    force_l2=None,
+    monotone=False,
+):
+    """Parallel-safe evaluate with automatic unit-aware results.
+
+    Thin wrapper over :func:`_global_evaluate_impl`. See that function and
+    :func:`evaluate` for the full parameter documentation. With the
+    default ``monotone=False`` this is bit-identical to the historical
+    ``global_evaluate``.
+
+    Parameters
+    ----------
+    monotone : bool or str, optional
+        Opt-in bounded (monotone) interpolation post-process. See
+        :func:`evaluate` for semantics. Not supported together with
+        ``check_extrapolated`` (raises ``NotImplementedError``).
+    """
+    result = _global_evaluate_impl(
+        expr,
+        coords=coords,
+        coord_sys=coord_sys,
+        other_arguments=other_arguments,
+        simplify=simplify,
+        verbose=verbose,
+        evalf=evalf,
+        mode=mode,
+        data_layout=data_layout,
+        check_extrapolated=check_extrapolated,
+        smoothing=smoothing,
+        rbf=rbf,
+        force_l2=force_l2,
+    )
+
+    monotone_mode = _normalize_monotone(monotone)
+    if monotone_mode is None:
+        return result
+
+    if check_extrapolated:
+        raise NotImplementedError(
+            "monotone interpolation is not supported together with "
+            "check_extrapolated in global_evaluate."
+        )
+
+    return _apply_monotone_limit(
+        expr, coords, result, monotone_mode, evalf=evalf)
diff --git a/src/underworld3/systems/ddt.py b/src/underworld3/systems/ddt.py
index c70fbae8..76f1b506 100644
--- a/src/underworld3/systems/ddt.py
+++ b/src/underworld3/systems/ddt.py
@@ -2327,10 +2327,18 @@ def update_pre_solve(
             # in cells with sharp gradients — observed as the 'pepper'
             # DOF scatter that ignites catastrophic ringing on free-
             # surface convection at high Ra.
+            #
+            # The monotonicity limiter (B.1 "pick" / B.2 "clamp") that
+            # bounds the FE/RBF trace-back to the local data range of
+            # psi_star now lives in the evaluator as the `monotone`
+            # option (uw.function.global_evaluate), so any resampling
+            # path can request the same bounded result. monotone_mode is
+            # None in the default trajectory → no-op (bit-identical).
             value_at_end_points = uw.function.global_evaluate(
                 expr_to_evaluate,
                 end_pt_coords,
                 evalf=evalf,
+                monotone=monotone_mode,
             )
 
             # CRITICAL FIX (2025-11-27): If psi_star has units, ensure the assigned
@@ -2339,87 +2347,6 @@ def update_pre_solve(
             if psi_star_units is not None and not isinstance(value_at_end_points, UnitAwareArray):
                 value_at_end_points = UnitAwareArray(value_at_end_points, units=psi_star_units)
 
-            # Monotonicity limiter (B.1 / B.2). Bound the FE/RBF
-            # trace-back result to the local data range of psi_star
-            # near each upstream coord.
-            #   monotone_mode = "clamp" → B.2: clip to [nbr_min, nbr_max]
-            #   monotone_mode = "pick"  → B.1: keep FE in bounds, else RBF
-            if monotone_mode in ("clamp", "pick"):
-                # Convert end_pt_coords to ND space matching the
-                # psi_star DOF coords so the kdtree query compares
-                # like-with-like. Same pattern as the
-                # psi_star_0_coords → psi_star_0_coords_nd conversion
-                # above. In unit-aware runs `end_pt_coords` is
-                # dimensional (e.g. metres) while
-                # `psi_star[i].coords_nd` is [0,1] non-dimensional,
-                # so without this conversion the kdtree picks wrong
-                # neighbours.
-                if hasattr(end_pt_coords, "magnitude"):
-                    epc_raw = uw.non_dimensionalise(end_pt_coords)
-                    if isinstance(epc_raw, UnitAwareArray):
-                        epc_nd = np.array(epc_raw)
-                    elif hasattr(epc_raw, "magnitude"):
-                        epc_nd = epc_raw.magnitude
-                    else:
-                        epc_nd = np.asarray(epc_raw)
-                else:
-                    epc_nd = np.asarray(end_pt_coords)
-                psi_coords_nd = np.asarray(self.psi_star[i].coords_nd)
-                if hasattr(psi_coords_nd, "magnitude"):
-                    psi_coords_nd = np.asarray(psi_coords_nd.magnitude)
-                nnn = self.mesh.dim + 1
-                kdt = uw.kdtree.KDTree(
-                    np.ascontiguousarray(psi_coords_nd))
-                _, idxs = kdt.query(
-                    np.ascontiguousarray(epc_nd), k=nnn,
-                    sqr_dists=False)
-                psi_data = np.asarray(self.psi_star[i].data)
-                # Flatten trailing dims to compute per-coord nbr stats,
-                # then restore the original shape afterwards.
-                psi_data_flat = psi_data.reshape(psi_data.shape[0], -1)
-                nbr_vals = psi_data_flat[idxs]
-                nbr_min = nbr_vals.min(axis=1)
-                nbr_max = nbr_vals.max(axis=1)
-                veep_np = np.asarray(value_at_end_points)
-                orig_shape = veep_np.shape
-                veep_flat = veep_np.reshape(nbr_min.shape)
-                if monotone_mode == "clamp":
-                    veep_lim = np.clip(veep_flat, nbr_min, nbr_max)
-                else:
-                    # B.1 "pick": re-evaluate via RBF only at the
-                    # subset of upstream coords whose FE result is
-                    # out-of-bounds. Keeps pick-mode cost dominated
-                    # by the cheap FE pass when most DOFs are
-                    # in-bounds; the RBF eval is the expensive part
-                    # so we don't want it for points we're going to
-                    # keep anyway.
-                    out_of_bounds = ((veep_flat < nbr_min)
-                                     | (veep_flat > nbr_max))
-                    oob_mask = out_of_bounds.any(axis=tuple(
-                        range(1, out_of_bounds.ndim)))
-                    veep_lim = veep_flat.copy()
-                    if oob_mask.any():
-                        oob_coords = end_pt_coords[oob_mask]
-                        value_rbf_oob = uw.function.global_evaluate(
-                            expr_to_evaluate, oob_coords, evalf=True)
-                        vrbf_flat = np.asarray(value_rbf_oob).reshape(
-                            (-1,) + veep_flat.shape[1:])
-                        # Within the oob subset, only overwrite
-                        # entries that are individually out of bounds
-                        # (multi-component case).
-                        sub_oob = out_of_bounds[oob_mask]
-                        veep_sub = veep_lim[oob_mask]
-                        veep_sub = np.where(
-                            sub_oob, vrbf_flat, veep_sub)
-                        veep_lim[oob_mask] = veep_sub
-                value_at_end_points = veep_lim.reshape(orig_shape)
-                # Re-wrap units after numpy ops, if needed.
-                if (psi_star_units is not None
-                        and not isinstance(value_at_end_points,
-                                           UnitAwareArray)):
-                    value_at_end_points = UnitAwareArray(
-                        value_at_end_points, units=psi_star_units)
-
             self.psi_star[i].array[...] = value_at_end_points
 
             # disable this for now - Compute moments before update
diff --git a/tests/test_0760_evaluate_monotone.py b/tests/test_0760_evaluate_monotone.py
new file mode 100644
index 00000000..ebd78741
--- /dev/null
+++ b/tests/test_0760_evaluate_monotone.py
@@ -0,0 +1,150 @@
+"""Contract tests for the ``monotone`` keyword on
+``uw.function.evaluate`` / ``global_evaluate``.
+
+``monotone`` is an opt-in, default-off post-process that bounds the
+interpolated result to the local data range of the source field (the
+``mesh.dim + 1`` nearest DOFs). It was lifted out of the semi-Lagrangian
+trace-back limiter (PR #186/#188) so any resampling path can request the
+same bounded result from one place.
+
+These tests lock:
+  (a) default ``monotone=False`` is bit-identical to omitting it;
+  (b) ``"clamp"`` bounds the result to an independently recomputed
+      neighbour ``[min, max]`` while ``False`` reproduces the overshoot;
+  (c) ``"pick"`` leaves in-bounds points untouched and brings
+      out-of-bounds points within range;
+  (d) composite expressions and unknown options raise ``ValueError``.
+"""
+
+import numpy as np
+import pytest
+import sympy
+
+import underworld3 as uw
+
+pytestmark = [pytest.mark.level_1, pytest.mark.tier_a]
+
+
+def _steep_p3_field():
+    """A P3 scalar with a sharp internal peak — FE Lagrange-P3 overshoots
+    at non-nodal points in the steep-gradient cells."""
+    mesh = uw.meshing.StructuredQuadBox(
+        elementRes=(8, 8),
+        minCoords=(0.0, 0.0),
+        maxCoords=(1.0, 1.0),
+        qdegree=3,
+    )
+    x, y = mesh.X
+    T = uw.discretisation.MeshVariable("Tm", mesh, 1, degree=3)
+    init = sympy.exp(-(((x - 0.5) ** 2 + (y - 0.5) ** 2) / 0.003))
+    T.array[...] = uw.function.evaluate(init, T.coords).reshape(T.array[...].shape)
+    return mesh, T
+
+
+def _offnode_coords(mesh, n=400, seed=0):
+    """Random interior points, deliberately off the DOF nodes."""
+    rng = np.random.default_rng(seed)
+    return rng.uniform(0.06, 0.94, size=(n, mesh.dim))
+
+
+def _recompute_bounds(T, coords):
+    """Independent reference: per-coord min/max over the ``dim+1`` nearest
+    source DOFs, rebuilt from ``T.coords_nd`` / ``T.data`` in the test."""
+    nnn = T.mesh.dim + 1
+    kdt = uw.kdtree.KDTree(np.ascontiguousarray(np.asarray(T.coords_nd)))
+    _, idxs = kdt.query(np.ascontiguousarray(coords), k=nnn, sqr_dists=False)
+    data_flat = np.asarray(T.data).reshape(np.asarray(T.data).shape[0], -1)
+    nbr = data_flat[idxs]
+    return nbr.min(axis=1).ravel(), nbr.max(axis=1).ravel()
+
+
+class TestMonotoneDefaultNoOp:
+
+    def test_evaluate_default_bit_identical(self):
+        mesh, T = _steep_p3_field()
+        coords = _offnode_coords(mesh)
+        base = uw.function.evaluate(T.sym, coords)
+        same = uw.function.evaluate(T.sym, coords, monotone=False)
+        assert np.array_equal(np.asarray(base), np.asarray(same))
+
+    def test_global_evaluate_default_bit_identical(self):
+        mesh, T = _steep_p3_field()
+        coords = _offnode_coords(mesh)
+        base = uw.function.global_evaluate(T.sym, coords)
+        same = uw.function.global_evaluate(T.sym, coords, monotone=False)
+        assert np.array_equal(np.asarray(base), np.asarray(same))
+
+
+class TestMonotoneClamp:
+
+    def test_clamp_bounds_result(self):
+        mesh, T = _steep_p3_field()
+        coords = _offnode_coords(mesh)
+        nbr_min, nbr_max = _recompute_bounds(T, coords)
+
+        unlimited = np.asarray(
+            uw.function.evaluate(T.sym, coords)).ravel()
+        clamped = np.asarray(
+            uw.function.evaluate(T.sym, coords, monotone="clamp")).ravel()
+
+        # Clamp lies within the recomputed neighbour range, exactly
+        # (np.clip is a closed bound).
+        assert np.all(clamped >= nbr_min)
+        assert np.all(clamped <= nbr_max)
+
+        # The steep P3 field must overshoot somewhere — otherwise the
+        # limiter would be a no-op and the test would prove nothing. The
+        # helper's bound test is exact (untoleranced), so use the same
+        # comparison here.
+        overshoot = (unlimited > nbr_max) | (unlimited < nbr_min)
+        assert overshoot.any(), "expected FE overshoot on the steep field"
+
+        # The clamp result reproduces an independent clip against the
+        # recomputed neighbour bounds, to the last bit.
+        assert np.array_equal(clamped, np.clip(unlimited, nbr_min, nbr_max))
+
+    def test_true_aliases_clamp(self):
+        mesh, T = _steep_p3_field()
+        coords = _offnode_coords(mesh)
+        a = np.asarray(uw.function.evaluate(T.sym, coords, monotone=True))
+        b = np.asarray(uw.function.evaluate(T.sym, coords, monotone="clamp"))
+        assert np.array_equal(a, b)
+
+
+class TestMonotonePick:
+
+    def test_pick_preserves_inbounds_and_changes_only_oob(self):
+        mesh, T = _steep_p3_field()
+        coords = _offnode_coords(mesh)
+        nbr_min, nbr_max = _recompute_bounds(T, coords)
+
+        unlimited = np.asarray(
+            uw.function.evaluate(T.sym, coords)).ravel()
+        picked = np.asarray(
+            uw.function.evaluate(T.sym, coords, monotone="pick")).ravel()
+
+        # Exact (untoleranced) bound test, matching the helper.
+        inb = (unlimited >= nbr_min) & (unlimited <= nbr_max)
+        assert (~inb).any(), "expected FE overshoot on the steep field"
+
+        # In-bounds points are kept exactly as the FE result; only the
+        # out-of-bounds subset is re-evaluated (via bounded RBF).
+        assert np.array_equal(picked[inb], unlimited[inb])
+        changed = picked != unlimited
+        assert np.all(changed <= ~inb), "pick must not touch in-bounds points"
+        assert np.all(np.isfinite(picked))
+
+
+class TestMonotoneRefusal:
+
+    def test_composite_expression_raises(self):
+        mesh, T = _steep_p3_field()
+        coords = _offnode_coords(mesh)
+        with pytest.raises(ValueError):
+            uw.function.evaluate(T.sym[0, 0] + 1.0, coords, monotone="clamp")
+
+    def test_unknown_option_raises(self):
+        mesh, T = _steep_p3_field()
+        coords = _offnode_coords(mesh)
+        with pytest.raises(ValueError):
+            uw.function.evaluate(T.sym, coords, monotone="bogus")
diff --git a/tests/test_1054_advdiff_monotone_mode_kwarg.py b/tests/test_1054_advdiff_monotone_mode_kwarg.py
index 3ef11469..c29a45be 100644
--- a/tests/test_1054_advdiff_monotone_mode_kwarg.py
+++ b/tests/test_1054_advdiff_monotone_mode_kwarg.py
@@ -7,8 +7,17 @@
 ``adv_diff = AdvDiffusionSLCN(..., monotone_mode="clamp")`` instead of
 the two-step ``adv_diff.DuDt.monotone_mode = "clamp";
 adv_diff.DFDt.monotone_mode = "clamp"`` dance.
+
+The trace-back limiter itself now lives in the evaluator as the
+``monotone`` option (``uw.function.global_evaluate(..., monotone=...)``);
+``SemiLagrangian.update_pre_solve`` routes through it. The bit-identical
+equivalence of that refactor (``monotone_mode`` None / "clamp" / "pick"
+unchanged to the last digit) is exercised end-to-end here and locked at
+the evaluator level in ``test_0760_evaluate_monotone.py``.
 """
 
+import numpy as np
+import sympy
 import pytest
 
 import underworld3 as uw
@@ -76,3 +85,55 @@ def test_explicit_DuDt_overrides_kwarg(self):
         assert adv.DuDt.monotone_mode == "pick"  # preserved
         # DFDt is constructed internally → uses the kwarg
         assert adv.DFDt.monotone_mode == "clamp"
+
+
+def _run_steps(monotone_mode, n_steps=4, dt=0.02):
+    """Advect a steep blob with a prescribed rotation and return the
+    stepped T-field. Steep gradients on a P3 field make the SL trace-back
+    land on non-nodal points where FE overshoots — i.e. the path the
+    limiter guards. Deterministic (no Stokes solve)."""
+    mesh = uw.meshing.StructuredQuadBox(
+        elementRes=(12, 12),
+        minCoords=(0.0, 0.0),
+        maxCoords=(1.0, 1.0),
+        qdegree=3,
+    )
+    x, y = mesh.X
+    T = uw.discretisation.MeshVariable("Ts", mesh, 1, degree=3)
+    V_fn = sympy.Matrix([[-(y - 0.5)], [(x - 0.5)]]).T
+
+    adv = uw.systems.AdvDiffusionSLCN(
+        mesh, u_Field=T, V_fn=V_fn, monotone_mode=monotone_mode)
+    adv.constitutive_model = uw.constitutive_models.DiffusionModel
+    adv.constitutive_model.Parameters.diffusivity = 1.0e-4
+    adv.theta = 0.5
+
+    init = sympy.exp(-(((x - 0.5) ** 2 + (y - 0.72) ** 2) / 0.004))
+    T.array[...] = uw.function.evaluate(init, T.coords).reshape(
+        T.array[...].shape)
+    for _ in range(n_steps):
+        adv.solve(timestep=dt, zero_init_guess=False)
+    return np.asarray(T.array[...], dtype=np.float64).copy()
+
+
+class TestMonotoneSolverIntegration:
+    """End-to-end: the kwarg drives a real solve through the refactored
+    evaluator ``monotone`` path."""
+
+    def test_clamp_runs_and_stays_bounded(self):
+        # IC is a Gaussian blob in [0, 1]; passive advection-diffusion of
+        # a source-free scalar respects the discrete maximum principle, so
+        # the clamped field must not develop large new extrema.
+        field = _run_steps("clamp")
+        assert np.all(np.isfinite(field))
+        assert field.min() > -0.05
+        assert field.max() < 1.05
+
+    def test_pick_runs_end_to_end(self):
+        field = _run_steps("pick")
+        assert np.all(np.isfinite(field))
+        assert field.max() < 1.05
+
+    def test_none_runs_end_to_end(self):
+        field = _run_steps(None)
+        assert np.all(np.isfinite(field))

From e171fb2f83ec9bb2ee76fa936ca5347014b21090 Mon Sep 17 00:00:00 2001
From: lmoresi <louis.moresi@anu.edu.au>
Date: Mon, 25 May 2026 14:03:58 +1000
Subject: [PATCH 2/2] Address Copilot review: fail-fast validation, pick MPI
 guard, settings propagation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Responding to the Copilot review on PR #208:

- Validate `monotone` up front in both `evaluate` and `global_evaluate`
  (before the impl call), so an unknown option — or the unsupported
  `monotone` + `check_extrapolated` combination in `global_evaluate` —
  fails fast without doing a full, wasted evaluation.
- Guard `monotone="pick"` against MPI: its out-of-bounds re-evaluation
  is a collective `global_evaluate` gated on a rank-local mask, which
  would deadlock under MPI. Raise a clear NotImplementedError pointing to
  `monotone="clamp"` (which is rank-local and parallel-safe) instead of
  hanging. The `TODO(parallel)` for a true global-neighbour bound stays.
- Propagate `coord_sys` / `other_arguments` into the `pick` re-evaluation
  so it stays in the caller's frame. `evalf=True` remains a deliberate
  override (the FE result is exactly what overshot, so the re-eval uses
  the bounded RBF path and does not honour `mode`/`rbf`/`force_l2`) —
  documented in the helper.

Not changed (with rationale):
- Rank-local neighbour bound: already flagged `TODO(parallel)`; the true
  global-neighbour bound is deferred per the PR plan.
- List/tuple coords in `pick`: the evaluator rejects non-array coords
  upstream (in `_evaluate_impl`, via `non_dimensionalise`) before monotone
  runs, so the masking only ever sees ndarray / UnitAwareArray — no
  coercion needed.

All defaults remain bit-identical (full AdvDiffusionSLCN trajectory
max|Δ|=0 across None/clamp/pick). tests/test_0760 + test_1054: 14 passed;
level_1 and tier_a: 158 passed.

Underworld development team with AI support from Claude Code
---
 .../function/functions_unit_system.py         | 68 +++++++++++++------
 1 file changed, 48 insertions(+), 20 deletions(-)

diff --git a/src/underworld3/function/functions_unit_system.py b/src/underworld3/function/functions_unit_system.py
index 4f301b64..8dbbb594 100644
--- a/src/underworld3/function/functions_unit_system.py
+++ b/src/underworld3/function/functions_unit_system.py
@@ -628,7 +628,8 @@ def _normalize_monotone(monotone):
     )
 
 
-def _apply_monotone_limit(expr, coords, value, mode, evalf=False):
+def _apply_monotone_limit(
+    expr, coords, value, mode, coord_sys=None, other_arguments=None):
     """Bound an interpolated result to the local data range of its source
     field (monotone / bounded interpolation).
 
@@ -638,20 +639,36 @@ def _apply_monotone_limit(expr, coords, value, mode, evalf=False):
     - ``mode == "clamp"`` clips the result into ``[nbr_min, nbr_max]``
       (cheap, always bounded).
     - ``mode == "pick"`` keeps the in-bounds result and re-evaluates only
-      the out-of-bounds subset via RBF (``evalf=True``), which is
-      intrinsically bounded (more accurate, more expensive).
+      the out-of-bounds subset via the bounded RBF path (``evalf=True``,
+      which is intrinsically neighbour-bounded). ``evalf=True`` is a
+      deliberate override -- the FE result is exactly what overshot, so we
+      do *not* honour the caller's ``mode``/``rbf``/``force_l2`` here.
+      ``coord_sys`` / ``other_arguments`` are propagated so the re-eval
+      stays in the same frame.
 
     MVP scope: a single source MeshVariable. ``expr`` must resolve
     directly to one mesh variable (or one of its components, e.g.
     ``T.sym``); composite / multi-field expressions raise ``ValueError``
     because the neighbour bound across fields is ill-defined.
 
-    The algorithm is lifted verbatim from the SL trace-back limiter so
-    that routing the DDt through here is bit-identical.
+    The clamp algorithm is lifted verbatim from the SL trace-back limiter
+    so that routing the DDt through here is bit-identical.
     """
     from ..utilities.unit_aware_array import UnitAwareArray
     from .expressions import extract_meshes
 
+    # "pick" re-evaluates the out-of-bounds subset via a collective
+    # global_evaluate, gated on a rank-local mask -> would deadlock in
+    # parallel (see TODO(parallel) below). Fail fast with a clear message
+    # instead of hanging. "clamp" is rank-local-only and parallel-safe.
+    if mode == "pick" and uw.mpi.size > 1:
+        raise NotImplementedError(
+            "monotone='pick' is serial-only: its out-of-bounds "
+            "re-evaluation is gated on a rank-local mask around a "
+            "collective evaluate and would deadlock under MPI. Use "
+            "monotone='clamp' (parallel-safe) instead."
+        )
+
     # --- Resolve the single source MeshVariable --------------------------
     meshes = extract_meshes(expr)
     if len(meshes) != 1:
@@ -732,12 +749,16 @@ def _apply_monotone_limit(expr, coords, value, mode, evalf=False):
         # TODO(parallel): this re-evaluation is gated on a rank-local
         # `oob_mask.any()`, so in parallel one rank may enter the
         # (collective) global_evaluate while another skips it -> deadlock.
-        # Lifted verbatim from the validated (serial) SL limiter; pick is
-        # serial-only until the rank-local bound is hardened (see the
-        # TODO(parallel) above and project_parallel_point_eval_decision).
+        # Guarded against above (pick is serial-only) until the rank-local
+        # bound is hardened (see project_parallel_point_eval_decision).
         if oob_mask.any():
+            # `coords` is an ndarray / UnitAwareArray here (the evaluator
+            # rejects non-array coords upstream, before monotone runs), so
+            # boolean masking preserves both the data and any units.
             oob_coords = coords[oob_mask]
-            value_rbf_oob = global_evaluate(expr, oob_coords, evalf=True)
+            value_rbf_oob = global_evaluate(
+                expr, oob_coords, coord_sys=coord_sys,
+                other_arguments=other_arguments, evalf=True)
             vrbf_flat = np.asarray(value_rbf_oob).reshape(
                 (-1,) + veep_flat.shape[1:])
             # Only overwrite entries individually out of bounds
@@ -793,6 +814,9 @@ def evaluate(
         single-MeshVariable expressions are supported; composites raise
         ``ValueError``. See :func:`_apply_monotone_limit`.
     """
+    # Validate up front so an unknown option fails fast (no wasted eval).
+    monotone_mode = _normalize_monotone(monotone)
+
     result = _evaluate_impl(
         expr,
         coords,
@@ -809,18 +833,19 @@ def evaluate(
         force_l2=force_l2,
     )
 
-    monotone_mode = _normalize_monotone(monotone)
     if monotone_mode is None:
         return result
 
     if check_extrapolated:
         value, extrapolated = result
         limited = _apply_monotone_limit(
-            expr, coords, value, monotone_mode, evalf=evalf)
+            expr, coords, value, monotone_mode,
+            coord_sys=coord_sys, other_arguments=other_arguments)
         return limited, extrapolated
 
     return _apply_monotone_limit(
-        expr, coords, result, monotone_mode, evalf=evalf)
+        expr, coords, result, monotone_mode,
+        coord_sys=coord_sys, other_arguments=other_arguments)
 
 
 @uw.timing.routine_timer_decorator
@@ -855,6 +880,15 @@ def global_evaluate(
         :func:`evaluate` for semantics. Not supported together with
         ``check_extrapolated`` (raises ``NotImplementedError``).
     """
+    # Validate up front so an unknown option or the unsupported
+    # monotone + check_extrapolated combination fails fast (no wasted eval).
+    monotone_mode = _normalize_monotone(monotone)
+    if monotone_mode is not None and check_extrapolated:
+        raise NotImplementedError(
+            "monotone interpolation is not supported together with "
+            "check_extrapolated in global_evaluate."
+        )
+
     result = _global_evaluate_impl(
         expr,
         coords=coords,
@@ -871,15 +905,9 @@ def global_evaluate(
         force_l2=force_l2,
     )
 
-    monotone_mode = _normalize_monotone(monotone)
     if monotone_mode is None:
         return result
 
-    if check_extrapolated:
-        raise NotImplementedError(
-            "monotone interpolation is not supported together with "
-            "check_extrapolated in global_evaluate."
-        )
-
     return _apply_monotone_limit(
-        expr, coords, result, monotone_mode, evalf=evalf)
+        expr, coords, result, monotone_mode,
+        coord_sys=coord_sys, other_arguments=other_arguments)