From e7b825ea0925268921b8a5715c29c5beb67c36e2 Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Thu, 19 Jun 2025 11:49:59 +0200
Subject: [PATCH 1/8] Delete the extension

---
 openml/extensions/sklearn/__init__.py  |   43 -
 openml/extensions/sklearn/extension.py | 2270 ------------------------
 2 files changed, 2313 deletions(-)
 delete mode 100644 openml/extensions/sklearn/__init__.py
 delete mode 100644 openml/extensions/sklearn/extension.py
diff --git a/openml/extensions/sklearn/__init__.py b/openml/extensions/sklearn/__init__.py
deleted file mode 100644
index 9c1c6cba6..000000000
--- a/openml/extensions/sklearn/__init__.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# License: BSD 3-Clause
-from __future__ import annotations
-
-from typing import TYPE_CHECKING
-
-from openml.extensions import register_extension
-
-from .extension import SklearnExtension
-
-if TYPE_CHECKING:
-    import pandas as pd
-
-__all__ = ["SklearnExtension"]
-
-register_extension(SklearnExtension)
-
-
-def cont(X: pd.DataFrame) -> pd.Series:
-    """Returns True for all non-categorical columns, False for the rest.
-
-    This is a helper function for OpenML datasets encoded as DataFrames simplifying the handling
-    of mixed data types. To build sklearn models on mixed data types, a ColumnTransformer is
-    required to process each type of columns separately.
-    This function allows transformations meant for continuous/numeric columns to access the
-    continuous/numeric columns given the dataset as DataFrame.
-    """
-    if not hasattr(X, "dtypes"):
-        raise AttributeError("Not a Pandas DataFrame with 'dtypes' as attribute!")
-    return X.dtypes != "category"
-
-
-def cat(X: pd.DataFrame) -> pd.Series:
-    """Returns True for all categorical columns, False for the rest.
-
-    This is a helper function for OpenML datasets encoded as DataFrames simplifying the handling
-    of mixed data types. To build sklearn models on mixed data types, a ColumnTransformer is
-    required to process each type of columns separately.
-    This function allows transformations meant for categorical columns to access the
-    categorical columns given the dataset as DataFrame.
-    """
-    if not hasattr(X, "dtypes"):
-        raise AttributeError("Not a Pandas DataFrame with 'dtypes' as attribute!")
-    return X.dtypes == "category"
diff --git a/openml/extensions/sklearn/extension.py b/openml/extensions/sklearn/extension.py
deleted file mode 100644
index 0c7588cdd..000000000
--- a/openml/extensions/sklearn/extension.py
+++ /dev/null
@@ -1,2270 +0,0 @@
-# License: BSD 3-Clause
-from __future__ import annotations
-
-import contextlib
-import copy
-import importlib
-import inspect
-import json
-import logging
-import re
-import sys
-import time
-import traceback
-import warnings
-from collections import OrderedDict
-from json.decoder import JSONDecodeError
-from re import IGNORECASE
-from typing import Any, Callable, List, Sized, cast
-
-import numpy as np
-import pandas as pd
-import scipy.sparse
-import scipy.stats
-import sklearn.base
-import sklearn.model_selection
-import sklearn.pipeline
-from packaging.version import Version
-
-import openml
-from openml.exceptions import PyOpenMLError
-from openml.extensions import Extension
-from openml.flows import OpenMLFlow
-from openml.runs.trace import PREFIX, OpenMLRunTrace, OpenMLTraceIteration
-from openml.tasks import (
-    OpenMLClassificationTask,
-    OpenMLClusteringTask,
-    OpenMLLearningCurveTask,
-    OpenMLRegressionTask,
-    OpenMLSupervisedTask,
-    OpenMLTask,
-)
-
-logger = logging.getLogger(__name__)
-
-
-DEPENDENCIES_PATTERN = re.compile(
-    r"^(?P<name>[\w\-]+)((?P<operation>==|>=|>)"
-    r"(?P<version>(\d+\.)?(\d+\.)?(\d+)?(dev)?[0-9]*))?$",
-)
-
-# NOTE(eddiebergman): This was imported before but became deprecated,
-# as a result I just enumerated them manually by copy-ing and pasting,
-# recommended solution in Numpy 2.0 guide was to explicitly list them.
-SIMPLE_NUMPY_TYPES = [
-    np.int8,
-    np.int16,
-    np.int32,
-    np.int64,
-    np.longlong,
-    np.uint8,
-    np.uint16,
-    np.uint32,
-    np.uint64,
-    np.ulonglong,
-    np.float16,
-    np.float32,
-    np.float64,
-    np.longdouble,
-    np.complex64,
-    np.complex128,
-    np.clongdouble,
-]
-SIMPLE_TYPES = (bool, int, float, str, *SIMPLE_NUMPY_TYPES)
-
-SKLEARN_PIPELINE_STRING_COMPONENTS = ("drop", "passthrough")
-COMPONENT_REFERENCE = "component_reference"
-COMPOSITION_STEP_CONSTANT = "composition_step_constant"
-
-
-class SklearnExtension(Extension):
-    """Connect scikit-learn to OpenML-Python.
-    The estimators which use this extension must be scikit-learn compatible,
-    i.e needs to be a subclass of sklearn.base.BaseEstimator".
-    """
-
-    ################################################################################################
-    # General setup
-
-    @classmethod
-    def can_handle_flow(cls, flow: OpenMLFlow) -> bool:
-        """Check whether a given describes a scikit-learn estimator.
-
-        This is done by parsing the ``external_version`` field.
-
-        Parameters
-        ----------
-        flow : OpenMLFlow
-
-        Returns
-        -------
-        bool
-        """
-        return cls._is_sklearn_flow(flow)
-
-    @classmethod
-    def can_handle_model(cls, model: Any) -> bool:
-        """Check whether a model is an instance of ``sklearn.base.BaseEstimator``.
-
-        Parameters
-        ----------
-        model : Any
-
-        Returns
-        -------
-        bool
-        """
-        return isinstance(model, sklearn.base.BaseEstimator)
-
-    @classmethod
-    def trim_flow_name(  # noqa: C901
-        cls,
-        long_name: str,
-        extra_trim_length: int = 100,
-        _outer: bool = True,  # noqa: FBT001, FBT002
-    ) -> str:
-        """Shorten generated sklearn flow name to at most ``max_length`` characters.
-
-        Flows are assumed to have the following naming structure:
-        ``(model_selection)? (pipeline)? (steps)+``
-        and will be shortened to:
-        ``sklearn.(selection.)?(pipeline.)?(steps)+``
-        e.g. (white spaces and newlines added for readability)
-
-        .. code ::
-
-            sklearn.pipeline.Pipeline(
-                columntransformer=sklearn.compose._column_transformer.ColumnTransformer(
-                    numeric=sklearn.pipeline.Pipeline(
-                        imputer=sklearn.preprocessing.imputation.Imputer,
-                        standardscaler=sklearn.preprocessing.data.StandardScaler),
-                    nominal=sklearn.pipeline.Pipeline(
-                        simpleimputer=sklearn.impute.SimpleImputer,
-                        onehotencoder=sklearn.preprocessing._encoders.OneHotEncoder)),
-                variancethreshold=sklearn.feature_selection.variance_threshold.VarianceThreshold,
-                svc=sklearn.svm.classes.SVC)
-
-        ->
-        ``sklearn.Pipeline(ColumnTransformer,VarianceThreshold,SVC)``
-
-        Parameters
-        ----------
-        long_name : str
-            The full flow name generated by the scikit-learn extension.
-        extra_trim_length: int (default=100)
-            If the trimmed name would exceed `extra_trim_length` characters, additional trimming
-            of the short name is performed. This reduces the produced short name length.
-            There is no guarantee the end result will not exceed `extra_trim_length`.
-        _outer : bool (default=True)
-            For internal use only. Specifies if the function is called recursively.
-
-        Returns
-        -------
-        str
-
-        """
-
-        def remove_all_in_parentheses(string: str) -> str:
-            string, removals = re.subn(r"\([^()]*\)", "", string)
-            while removals > 0:
-                string, removals = re.subn(r"\([^()]*\)", "", string)
-            return string
-
-        # Generally, we want to trim all hyperparameters, the exception to that is for model
-        # selection, as the `estimator` hyperparameter is very indicative of what is in the flow.
-        # So we first trim name of the `estimator` specified in mode selection. For reference, in
-        # the example below, we want to trim `sklearn.tree.tree.DecisionTreeClassifier`, and
-        # keep it in the final trimmed flow name:
-        # sklearn.pipeline.Pipeline(Imputer=sklearn.preprocessing.imputation.Imputer,
-        # VarianceThreshold=sklearn.feature_selection.variance_threshold.VarianceThreshold,  # noqa: ERA001, E501
-        # Estimator=sklearn.model_selection._search.RandomizedSearchCV(estimator=
-        # sklearn.tree.tree.DecisionTreeClassifier))
-        if "sklearn.model_selection" in long_name:
-            start_index = long_name.index("sklearn.model_selection")
-            estimator_start = (
-                start_index + long_name[start_index:].index("estimator=") + len("estimator=")
-            )
-
-            model_select_boilerplate = long_name[start_index:estimator_start]
-            # above is .g. "sklearn.model_selection._search.RandomizedSearchCV(estimator="
-            model_selection_class = model_select_boilerplate.split("(")[0].split(".")[-1]
-
-            # Now we want to also find and parse the `estimator`, for this we find the closing
-            # parenthesis to the model selection technique:
-            closing_parenthesis_expected = 1
-            for char in long_name[estimator_start:]:
-                if char == "(":
-                    closing_parenthesis_expected += 1
-                if char == ")":
-                    closing_parenthesis_expected -= 1
-                if closing_parenthesis_expected == 0:
-                    break
-
-            _end: int = estimator_start + len(long_name[estimator_start:]) - 1
-            model_select_pipeline = long_name[estimator_start:_end]
-
-            trimmed_pipeline = cls.trim_flow_name(model_select_pipeline, _outer=False)
-            _, trimmed_pipeline = trimmed_pipeline.split(".", maxsplit=1)  # trim module prefix
-            model_select_short = f"sklearn.{model_selection_class}[{trimmed_pipeline}]"
-            name = long_name[:start_index] + model_select_short + long_name[_end + 1 :]
-        else:
-            name = long_name
-
-        module_name = long_name.split(".")[0]
-        short_name = module_name + ".{}"
-
-        if name.startswith("sklearn.pipeline"):
-            full_pipeline_class, pipeline = name[:-1].split("(", maxsplit=1)
-            pipeline_class = full_pipeline_class.split(".")[-1]
-            # We don't want nested pipelines in the short name, so we trim all complicated
-            # subcomponents, i.e. those with parentheses:
-            pipeline = remove_all_in_parentheses(pipeline)
-
-            # then the pipeline steps are formatted e.g.:
-            # step1name=sklearn.submodule.ClassName,step2name...
-            components = [component.split(".")[-1] for component in pipeline.split(",")]
-            pipeline = f"{pipeline_class}({','.join(components)})"
-            if len(short_name.format(pipeline)) > extra_trim_length:
-                pipeline = f"{pipeline_class}(...,{components[-1]})"
-        else:
-            # Just a simple component: e.g. sklearn.tree.DecisionTreeClassifier
-            pipeline = remove_all_in_parentheses(name).split(".")[-1]
-
-        if not _outer:
-            # Anything from parenthesis in inner calls should not be culled, so we use brackets
-            pipeline = pipeline.replace("(", "[").replace(")", "]")
-        else:
-            # Square brackets may be introduced with nested model_selection
-            pipeline = pipeline.replace("[", "(").replace("]", ")")
-
-        return short_name.format(pipeline)
-
-    @classmethod
-    def _min_dependency_str(cls, sklearn_version: str) -> str:
-        """Returns a string containing the minimum dependencies for the sklearn version passed.
-
-        Parameters
-        ----------
-        sklearn_version : str
-            A version string of the xx.xx.xx
-
-        Returns
-        -------
-        str
-        """
-        # This explicit check is necessary to support existing entities on the OpenML servers
-        # that used the fixed dependency string (in the else block)
-        if Version(openml.__version__) > Version("0.11"):
-            # OpenML v0.11 onwards supports sklearn>=0.24
-            # assumption: 0.24 onwards sklearn should contain a _min_dependencies.py file with
-            # variables declared for extracting minimum dependency for that version
-            if Version(sklearn_version) >= Version("0.24"):
-                from sklearn import _min_dependencies as _mindep
-
-                dependency_list = {
-                    "numpy": f"{_mindep.NUMPY_MIN_VERSION}",
-                    "scipy": f"{_mindep.SCIPY_MIN_VERSION}",
-                    "joblib": f"{_mindep.JOBLIB_MIN_VERSION}",
-                    "threadpoolctl": f"{_mindep.THREADPOOLCTL_MIN_VERSION}",
-                }
-            elif Version(sklearn_version) >= Version("0.23"):
-                dependency_list = {
-                    "numpy": "1.13.3",
-                    "scipy": "0.19.1",
-                    "joblib": "0.11",
-                    "threadpoolctl": "2.0.0",
-                }
-                if Version(sklearn_version).micro == 0:
-                    dependency_list.pop("threadpoolctl")
-            elif Version(sklearn_version) >= Version("0.21"):
-                dependency_list = {"numpy": "1.11.0", "scipy": "0.17.0", "joblib": "0.11"}
-            elif Version(sklearn_version) >= Version("0.19"):
-                dependency_list = {"numpy": "1.8.2", "scipy": "0.13.3"}
-            else:
-                dependency_list = {"numpy": "1.6.1", "scipy": "0.9"}
-        else:
-            # this is INCORRECT for sklearn versions >= 0.19 and < 0.24
-            # given that OpenML has existing flows uploaded with such dependency information,
-            # we change no behaviour for older sklearn version, however from 0.24 onwards
-            # the dependency list will be accurately updated for any flow uploaded to OpenML
-            dependency_list = {"numpy": "1.6.1", "scipy": "0.9"}
-
-        sklearn_dep = f"sklearn=={sklearn_version}"
-        dep_str = "\n".join([f"{k}>={v}" for k, v in dependency_list.items()])
-        return "\n".join([sklearn_dep, dep_str])
-
-    ################################################################################################
-    # Methods for flow serialization and de-serialization
-
-    def flow_to_model(
-        self,
-        flow: OpenMLFlow,
-        initialize_with_defaults: bool = False,  # noqa: FBT001, FBT002
-        strict_version: bool = True,  # noqa: FBT001, FBT002
-    ) -> Any:
-        """Initializes a sklearn model based on a flow.
-
-        Parameters
-        ----------
-        flow : mixed
-            the object to deserialize (can be flow object, or any serialized
-            parameter value that is accepted by)
-
-        initialize_with_defaults : bool, optional (default=False)
-            If this flag is set, the hyperparameter values of flows will be
-            ignored and a flow with its defaults is returned.
-
-        strict_version : bool, default=True
-            Whether to fail if version requirements are not fulfilled.
-
-        Returns
-        -------
-        mixed
-        """
-        return self._deserialize_sklearn(
-            flow,
-            initialize_with_defaults=initialize_with_defaults,
-            strict_version=strict_version,
-        )
-
-    def _deserialize_sklearn(  # noqa: PLR0915, C901, PLR0912
-        self,
-        o: Any,
-        components: dict | None = None,
-        initialize_with_defaults: bool = False,  # noqa: FBT001, FBT002
-        recursion_depth: int = 0,
-        strict_version: bool = True,  # noqa: FBT002, FBT001
-    ) -> Any:
-        """Recursive function to deserialize a scikit-learn flow.
-
-        This function inspects an object to deserialize and decides how to do so. This function
-        delegates all work to the respective functions to deserialize special data structures etc.
-        This function works on everything that has been serialized to OpenML: OpenMLFlow,
-        components (which are flows themselves), functions, hyperparameter distributions (for
-        random search) and the actual hyperparameter values themselves.
-
-        Parameters
-        ----------
-        o : mixed
-            the object to deserialize (can be flow object, or any serialized
-            parameter value that is accepted by)
-
-        components : Optional[dict]
-            Components of the current flow being de-serialized. These will not be used when
-            de-serializing the actual flow, but when de-serializing a component reference.
-
-        initialize_with_defaults : bool, optional (default=False)
-            If this flag is set, the hyperparameter values of flows will be
-            ignored and a flow with its defaults is returned.
-
-        recursion_depth : int
-            The depth at which this flow is called, mostly for debugging
-            purposes
-
-        strict_version : bool, default=True
-            Whether to fail if version requirements are not fulfilled.
-
-        Returns
-        -------
-        mixed
-        """
-        logger.info(
-            "-{} flow_to_sklearn START o={}, components={}, init_defaults={}".format(
-                "-" * recursion_depth, o, components, initialize_with_defaults
-            ),
-        )
-        depth_pp = recursion_depth + 1  # shortcut var, depth plus plus
-
-        # First, we need to check whether the presented object is a json string.
-        # JSON strings are used to encoder parameter values. By passing around
-        # json strings for parameters, we make sure that we can flow_to_sklearn
-        # the parameter values to the correct type.
-
-        if isinstance(o, str):
-            with contextlib.suppress(JSONDecodeError):
-                o = json.loads(o)
-
-        if isinstance(o, dict):
-            # Check if the dict encodes a 'special' object, which could not
-            # easily converted into a string, but rather the information to
-            # re-create the object were stored in a dictionary.
-            if "oml-python:serialized_object" in o:
-                serialized_type = o["oml-python:serialized_object"]
-                value = o["value"]
-                if serialized_type == "type":
-                    rval = self._deserialize_type(value)
-                elif serialized_type == "rv_frozen":
-                    rval = self._deserialize_rv_frozen(value)
-                elif serialized_type == "function":
-                    rval = self._deserialize_function(value)
-                elif serialized_type in (COMPOSITION_STEP_CONSTANT, COMPONENT_REFERENCE):
-                    if serialized_type == COMPOSITION_STEP_CONSTANT:
-                        pass
-                    elif serialized_type == COMPONENT_REFERENCE:
-                        value = self._deserialize_sklearn(
-                            value,
-                            recursion_depth=depth_pp,
-                            strict_version=strict_version,
-                        )
-                    else:
-                        raise NotImplementedError(serialized_type)
-                    assert components is not None  # Necessary for mypy
-                    step_name = value["step_name"]
-                    key = value["key"]
-                    component = self._deserialize_sklearn(
-                        components[key],
-                        initialize_with_defaults=initialize_with_defaults,
-                        recursion_depth=depth_pp,
-                        strict_version=strict_version,
-                    )
-                    # The component is now added to where it should be used
-                    # later. It should not be passed to the constructor of the
-                    # main flow object.
-                    del components[key]
-                    if step_name is None:
-                        rval = component
-                    elif "argument_1" not in value:
-                        rval = (step_name, component)
-                    else:
-                        rval = (step_name, component, value["argument_1"])
-                elif serialized_type == "cv_object":
-                    rval = self._deserialize_cross_validator(
-                        value,
-                        recursion_depth=recursion_depth,
-                        strict_version=strict_version,
-                    )
-                else:
-                    raise ValueError(f"Cannot flow_to_sklearn {serialized_type}")
-
-            else:
-                rval = OrderedDict(
-                    (
-                        self._deserialize_sklearn(
-                            o=key,
-                            components=components,
-                            initialize_with_defaults=initialize_with_defaults,
-                            recursion_depth=depth_pp,
-                            strict_version=strict_version,
-                        ),
-                        self._deserialize_sklearn(
-                            o=value,
-                            components=components,
-                            initialize_with_defaults=initialize_with_defaults,
-                            recursion_depth=depth_pp,
-                            strict_version=strict_version,
-                        ),
-                    )
-                    for key, value in sorted(o.items())
-                )
-        elif isinstance(o, (list, tuple)):
-            rval = [
-                self._deserialize_sklearn(
-                    o=element,
-                    components=components,
-                    initialize_with_defaults=initialize_with_defaults,
-                    recursion_depth=depth_pp,
-                    strict_version=strict_version,
-                )
-                for element in o
-            ]
-            if isinstance(o, tuple):
-                rval = tuple(rval)
-        elif isinstance(o, (bool, int, float, str)) or o is None:
-            rval = o
-        elif isinstance(o, OpenMLFlow):
-            if not self._is_sklearn_flow(o):
-                raise ValueError("Only sklearn flows can be reinstantiated")
-            rval = self._deserialize_model(
-                flow=o,
-                keep_defaults=initialize_with_defaults,
-                recursion_depth=recursion_depth,
-                strict_version=strict_version,
-            )
-        else:
-            raise TypeError(o)
-        logger.info(f"-{'-' * recursion_depth} flow_to_sklearn END   o={o}, rval={rval}")
-        return rval
-
-    def model_to_flow(self, model: Any) -> OpenMLFlow:
-        """Transform a scikit-learn model to a flow for uploading it to OpenML.
-
-        Parameters
-        ----------
-        model : Any
-
-        Returns
-        -------
-        OpenMLFlow
-        """
-        # Necessary to make pypy not complain about all the different possible return types
-        return self._serialize_sklearn(model)
-
-    def _serialize_sklearn(self, o: Any, parent_model: Any | None = None) -> Any:  # noqa: PLR0912, C901
-        rval = None  # type: Any
-
-        # TODO: assert that only on first recursion lvl `parent_model` can be None
-        if self.is_estimator(o):
-            # is the main model or a submodel
-            rval = self._serialize_model(o)
-        elif (
-            isinstance(o, (list, tuple))
-            and len(o) == 2
-            and o[1] in SKLEARN_PIPELINE_STRING_COMPONENTS
-            and isinstance(parent_model, sklearn.pipeline._BaseComposition)
-        ):
-            rval = o
-        elif isinstance(o, (list, tuple)):
-            # TODO: explain what type of parameter is here
-            rval = [self._serialize_sklearn(element, parent_model) for element in o]
-            if isinstance(o, tuple):
-                rval = tuple(rval)
-        elif isinstance(o, SIMPLE_TYPES) or o is None:
-            if isinstance(o, tuple(SIMPLE_NUMPY_TYPES)):
-                o = o.item()  # type: ignore
-            # base parameter values
-            rval = o
-        elif isinstance(o, dict):
-            # TODO: explain what type of parameter is here
-            if not isinstance(o, OrderedDict):
-                o = OrderedDict(sorted(o.items()))
-
-            rval = OrderedDict()
-            for key, value in o.items():
-                if not isinstance(key, str):
-                    raise TypeError(
-                        "Can only use string as keys, you passed "
-                        f"type {type(key)} for value {key!s}.",
-                    )
-                _key = self._serialize_sklearn(key, parent_model)
-                rval[_key] = self._serialize_sklearn(value, parent_model)
-        elif isinstance(o, type):
-            # TODO: explain what type of parameter is here
-            rval = self._serialize_type(o)
-        elif isinstance(o, scipy.stats.distributions.rv_frozen):
-            rval = self._serialize_rv_frozen(o)
-        # This only works for user-defined functions (and not even partial).
-        # I think this is exactly what we want here as there shouldn't be any
-        # built-in or functool.partials in a pipeline
-        elif inspect.isfunction(o):
-            # TODO: explain what type of parameter is here
-            rval = self._serialize_function(o)
-        elif self._is_cross_validator(o):
-            # TODO: explain what type of parameter is here
-            rval = self._serialize_cross_validator(o)
-        else:
-            raise TypeError(o, type(o))
-
-        return rval
-
-    def get_version_information(self) -> list[str]:
-        """List versions of libraries required by the flow.
-
-        Libraries listed are ``Python``, ``scikit-learn``, ``numpy`` and ``scipy``.
-
-        Returns
-        -------
-        List
-        """
-        # This can possibly be done by a package such as pyxb, but I could not get
-        # it to work properly.
-        import numpy
-        import scipy
-        import sklearn
-
-        major, minor, micro, _, _ = sys.version_info
-        python_version = f"Python_{'.'.join([str(major), str(minor), str(micro)])}."
-        sklearn_version = f"Sklearn_{sklearn.__version__}."
-        numpy_version = f"NumPy_{numpy.__version__}."  # type: ignore
-        scipy_version = f"SciPy_{scipy.__version__}."
-
-        return [python_version, sklearn_version, numpy_version, scipy_version]
-
-    def create_setup_string(self, model: Any) -> str:  # noqa: ARG002
-        """Create a string which can be used to reinstantiate the given model.
-
-        Parameters
-        ----------
-        model : Any
-
-        Returns
-        -------
-        str
-        """
-        return " ".join(self.get_version_information())
-
-    def _is_cross_validator(self, o: Any) -> bool:
-        return isinstance(o, sklearn.model_selection.BaseCrossValidator)
-
-    @classmethod
-    def _is_sklearn_flow(cls, flow: OpenMLFlow) -> bool:
-        sklearn_dependency = isinstance(flow.dependencies, str) and "sklearn" in flow.dependencies
-        sklearn_as_external = isinstance(flow.external_version, str) and (
-            flow.external_version.startswith("sklearn==") or ",sklearn==" in flow.external_version
-        )
-        return sklearn_dependency or sklearn_as_external
-
-    def _get_sklearn_description(self, model: Any, char_lim: int = 1024) -> str:
-        r"""Fetches the sklearn function docstring for the flow description
-
-        Retrieves the sklearn docstring available and does the following:
-        * If length of docstring <= char_lim, then returns the complete docstring
-        * Else, trims the docstring till it encounters a 'Read more in the :ref:'
-        * Or till it encounters a 'Parameters\n----------\n'
-        The final string returned is at most of length char_lim with leading and
-        trailing whitespaces removed.
-
-        Parameters
-        ----------
-        model : sklearn model
-        char_lim : int
-            Specifying the max length of the returned string.
-            OpenML servers have a constraint of 1024 characters for the 'description' field.
-
-        Returns
-        -------
-        str
-        """
-
-        def match_format(s):
-            return f"{s}\n{len(s) * '-'}\n"
-
-        s = inspect.getdoc(model)
-        if s is None:
-            return ""
-        try:
-            # trim till 'Read more'
-            pattern = "Read more in the :ref:"
-            index = s.index(pattern)
-            s = s[:index]
-            # trimming docstring to be within char_lim
-            if len(s) > char_lim:
-                s = f"{s[: char_lim - 3]}..."
-            return s.strip()
-        except ValueError:
-            logger.warning(
-                "'Read more' not found in descriptions. "
-                "Trying to trim till 'Parameters' if available in docstring.",
-            )
-        try:
-            # if 'Read more' doesn't exist, trim till 'Parameters'
-            pattern = "Parameters"
-            index = s.index(match_format(pattern))
-        except ValueError:
-            # returning full docstring
-            logger.warning("'Parameters' not found in docstring. Omitting docstring trimming.")
-            index = len(s)
-        s = s[:index]
-        # trimming docstring to be within char_lim
-        if len(s) > char_lim:
-            s = f"{s[: char_lim - 3]}..."
-        return s.strip()
-
-    def _extract_sklearn_parameter_docstring(self, model) -> None | str:
-        """Extracts the part of sklearn docstring containing parameter information
-
-        Fetches the entire docstring and trims just the Parameter section.
-        The assumption is that 'Parameters' is the first section in sklearn docstrings,
-        followed by other sections titled 'Attributes', 'See also', 'Note', 'References',
-        appearing in that order if defined.
-        Returns a None if no section with 'Parameters' can be found in the docstring.
-
-        Parameters
-        ----------
-        model : sklearn model
-
-        Returns
-        -------
-        str, or None
-        """
-
-        def match_format(s):
-            return f"{s}\n{len(s) * '-'}\n"
-
-        s = inspect.getdoc(model)
-        if s is None:
-            return None
-        try:
-            index1 = s.index(match_format("Parameters"))
-        except ValueError as e:
-            # when sklearn docstring has no 'Parameters' section
-            logger.warning(f"{match_format('Parameters')} {e}")
-            return None
-
-        headings = ["Attributes", "Notes", "See also", "Note", "References"]
-        for h in headings:
-            try:
-                # to find end of Parameters section
-                index2 = s.index(match_format(h))
-                break
-            except ValueError:
-                logger.warning(f"{h} not available in docstring")
-                continue
-        else:
-            # in the case only 'Parameters' exist, trim till end of docstring
-            index2 = len(s)
-        s = s[index1:index2]
-        return s.strip()
-
-    def _extract_sklearn_param_info(self, model, char_lim=1024) -> None | dict:
-        """Parses parameter type and description from sklearn dosctring
-
-        Parameters
-        ----------
-        model : sklearn model
-        char_lim : int
-            Specifying the max length of the returned string.
-            OpenML servers have a constraint of 1024 characters string fields.
-
-        Returns
-        -------
-        Dict, or None
-        """
-        docstring = self._extract_sklearn_parameter_docstring(model)
-        if docstring is None:
-            # when sklearn docstring has no 'Parameters' section
-            return None
-
-        n = re.compile("[.]*\n", flags=IGNORECASE)
-        lines = n.split(docstring)
-        p = re.compile("[a-z0-9_ ]+ : [a-z0-9_']+[a-z0-9_ ]*", flags=IGNORECASE)
-        # The above regular expression is designed to detect sklearn parameter names and type
-        # in the format of [variable_name][space]:[space][type]
-        # The expectation is that the parameter description for this detected parameter will
-        # be all the lines in the docstring till the regex finds another parameter match
-
-        # collecting parameters and their descriptions
-        description = []  # type: List
-        for s in lines:
-            param = p.findall(s)
-            if param != []:
-                # a parameter definition is found by regex
-                # creating placeholder when parameter found which will be a list of strings
-                # string descriptions will be appended in subsequent iterations
-                # till another parameter is found and a new placeholder is created
-                placeholder = [""]  # type: List[str]
-                description.append(placeholder)
-            elif len(description) > 0:  # description=[] means no parameters found yet
-                # appending strings to the placeholder created when parameter found
-                description[-1].append(s)
-        for i in range(len(description)):
-            # concatenating parameter description strings
-            description[i] = "\n".join(description[i]).strip()
-            # limiting all parameter descriptions to accepted OpenML string length
-            if len(description[i]) > char_lim:
-                description[i] = f"{description[i][: char_lim - 3]}..."
-
-        # collecting parameters and their types
-        parameter_docs = OrderedDict()
-        matches = p.findall(docstring)
-        for i, param in enumerate(matches):
-            key, value = str(param).split(":")
-            parameter_docs[key.strip()] = [value.strip(), description[i]]
-
-        # to avoid KeyError for missing parameters
-        param_list_true = list(model.get_params().keys())
-        param_list_found = list(parameter_docs.keys())
-        for param in list(set(param_list_true) - set(param_list_found)):
-            parameter_docs[param] = [None, None]
-
-        return parameter_docs
-
-    def _serialize_model(self, model: Any) -> OpenMLFlow:
-        """Create an OpenMLFlow.
-
-        Calls `sklearn_to_flow` recursively to properly serialize the
-        parameters to strings and the components (other models) to OpenMLFlows.
-
-        Parameters
-        ----------
-        model : sklearn estimator
-
-        Returns
-        -------
-        OpenMLFlow
-
-        """
-        # Get all necessary information about the model objects itself
-        (
-            parameters,
-            parameters_meta_info,
-            subcomponents,
-            subcomponents_explicit,
-        ) = self._extract_information_from_model(model)
-
-        # Check that a component does not occur multiple times in a flow as this
-        # is not supported by OpenML
-        self._check_multiple_occurence_of_component_in_flow(model, subcomponents)
-
-        # Create a flow name, which contains all components in brackets, e.g.:
-        # RandomizedSearchCV(Pipeline(StandardScaler,AdaBoostClassifier(DecisionTreeClassifier)),
-        # StandardScaler,AdaBoostClassifier(DecisionTreeClassifier))
-        class_name = model.__module__ + "." + model.__class__.__name__
-
-        # will be part of the name (in brackets)
-        sub_components_names = ""
-        for key in subcomponents:
-            name_thing = subcomponents[key]
-            if isinstance(name_thing, OpenMLFlow):
-                name = name_thing.name
-            elif (
-                isinstance(name_thing, str)
-                and subcomponents[key] in SKLEARN_PIPELINE_STRING_COMPONENTS
-            ):
-                name = name_thing
-            else:
-                raise TypeError(type(subcomponents[key]))
-
-            if key in subcomponents_explicit:
-                sub_components_names += "," + key + "=" + name
-            else:
-                sub_components_names += "," + name
-
-        # slice operation on string in order to get rid of leading comma
-        name = f"{class_name}({sub_components_names[1:]})" if sub_components_names else class_name
-        short_name = SklearnExtension.trim_flow_name(name)
-
-        # Get the external versions of all sub-components
-        external_version = self._get_external_version_string(model, subcomponents)
-        dependencies = self._get_dependencies()
-        tags = self._get_tags()
-
-        sklearn_description = self._get_sklearn_description(model)
-        return OpenMLFlow(
-            name=name,
-            class_name=class_name,
-            custom_name=short_name,
-            description=sklearn_description,
-            model=model,
-            components=subcomponents,
-            parameters=parameters,
-            parameters_meta_info=parameters_meta_info,
-            external_version=external_version,
-            tags=tags,
-            extension=self,
-            language="English",
-            dependencies=dependencies,
-        )
-
-    def _get_dependencies(self) -> str:
-        return self._min_dependency_str(sklearn.__version__)  # type: ignore
-
-    def _get_tags(self) -> list[str]:
-        sklearn_version = self._format_external_version("sklearn", sklearn.__version__)  # type: ignore
-        sklearn_version_formatted = sklearn_version.replace("==", "_")
-        return [
-            "openml-python",
-            "sklearn",
-            "scikit-learn",
-            "python",
-            sklearn_version_formatted,
-            # TODO: add more tags based on the scikit-learn
-            # module a flow is in? For example automatically
-            # annotate a class of sklearn.svm.SVC() with the
-            # tag svm?
-        ]
-
-    def _get_external_version_string(
-        self,
-        model: Any,
-        sub_components: dict[str, OpenMLFlow],
-    ) -> str:
-        # Create external version string for a flow, given the model and the
-        # already parsed dictionary of sub_components. Retrieves the external
-        # version of all subcomponents, which themselves already contain all
-        # requirements for their subcomponents. The external version string is a
-        # sorted concatenation of all modules which are present in this run.
-
-        external_versions = set()
-
-        # The model is None if the flow is a placeholder flow such as 'passthrough' or 'drop'
-        if model is not None:
-            model_package_name = model.__module__.split(".")[0]
-            module = importlib.import_module(model_package_name)
-            model_package_version_number = module.__version__  # type: ignore
-            external_version = self._format_external_version(
-                model_package_name,
-                model_package_version_number,
-            )
-            external_versions.add(external_version)
-
-        openml_version = self._format_external_version("openml", openml.__version__)
-        sklearn_version = self._format_external_version("sklearn", sklearn.__version__)  # type: ignore
-        external_versions.add(openml_version)
-        external_versions.add(sklearn_version)
-        for visitee in sub_components.values():
-            if isinstance(visitee, str) and visitee in SKLEARN_PIPELINE_STRING_COMPONENTS:
-                continue
-            for external_version in visitee.external_version.split(","):
-                external_versions.add(external_version)
-        return ",".join(sorted(external_versions))
-
-    def _check_multiple_occurence_of_component_in_flow(
-        self,
-        model: Any,
-        sub_components: dict[str, OpenMLFlow],
-    ) -> None:
-        to_visit_stack: list[OpenMLFlow] = []
-        to_visit_stack.extend(sub_components.values())
-        known_sub_components: set[str] = set()
-
-        while len(to_visit_stack) > 0:
-            visitee = to_visit_stack.pop()
-            if isinstance(visitee, str) and visitee in SKLEARN_PIPELINE_STRING_COMPONENTS:
-                known_sub_components.add(visitee)
-            elif visitee.name in known_sub_components:
-                raise ValueError(
-                    f"Found a second occurence of component {visitee.name} when "
-                    f"trying to serialize {model}.",
-                )
-            else:
-                known_sub_components.add(visitee.name)
-                to_visit_stack.extend(visitee.components.values())
-
-    def _extract_information_from_model(  # noqa: PLR0915, C901, PLR0912
-        self,
-        model: Any,
-    ) -> tuple[
-        OrderedDict[str, str | None],
-        OrderedDict[str, dict | None],
-        OrderedDict[str, OpenMLFlow],
-        set,
-    ]:
-        # This function contains four "global" states and is quite long and
-        # complicated. If it gets to complicated to ensure it's correctness,
-        # it would be best to make it a class with the four "global" states being
-        # the class attributes and the if/elif/else in the for-loop calls to
-        # separate class methods
-
-        # stores all entities that should become subcomponents
-        sub_components = OrderedDict()  # type: OrderedDict[str, OpenMLFlow]
-        # stores the keys of all subcomponents that should become
-        sub_components_explicit = set()
-        parameters: OrderedDict[str, str | None] = OrderedDict()
-        parameters_meta_info: OrderedDict[str, dict | None] = OrderedDict()
-        parameters_docs = self._extract_sklearn_param_info(model)
-
-        model_parameters = model.get_params(deep=False)
-        for k, v in sorted(model_parameters.items(), key=lambda t: t[0]):
-            rval = self._serialize_sklearn(v, model)
-
-            def flatten_all(list_):
-                """Flattens arbitrary depth lists of lists (e.g. [[1,2],[3,[1]]] -> [1,2,3,1])."""
-                for el in list_:
-                    if isinstance(el, (list, tuple)) and len(el) > 0:
-                        yield from flatten_all(el)
-                    else:
-                        yield el
-
-            # In case rval is a list of lists (or tuples), we need to identify two situations:
-            # - sklearn pipeline steps, feature union or base classifiers in voting classifier.
-            #   They look like e.g. [("imputer", Imputer()), ("classifier", SVC())]
-            # - a list of lists with simple types (e.g. int or str), such as for an OrdinalEncoder
-            #   where all possible values for each feature are described: [[0,1,2], [1,2,5]]
-            is_non_empty_list_of_lists_with_same_type = (
-                isinstance(rval, (list, tuple))
-                and len(rval) > 0
-                and isinstance(rval[0], (list, tuple))
-                and all(isinstance(rval_i, type(rval[0])) for rval_i in rval)
-            )
-
-            # Check that all list elements are of simple types.
-            nested_list_of_simple_types = (
-                is_non_empty_list_of_lists_with_same_type
-                and all(isinstance(el, SIMPLE_TYPES) for el in flatten_all(rval))
-                and all(
-                    len(rv) in (2, 3) and rv[1] not in SKLEARN_PIPELINE_STRING_COMPONENTS
-                    for rv in rval
-                )
-            )
-
-            if is_non_empty_list_of_lists_with_same_type and not nested_list_of_simple_types:
-                # If a list of lists is identified that include 'non-simple' types (e.g. objects),
-                # we assume they are steps in a pipeline, feature union, or base classifiers in
-                # a voting classifier.
-                parameter_value = []  # type: List
-                reserved_keywords = set(model.get_params(deep=False).keys())
-
-                for sub_component_tuple in rval:
-                    identifier = sub_component_tuple[0]
-                    sub_component = sub_component_tuple[1]
-                    sub_component_type = type(sub_component_tuple)
-                    if not 2 <= len(sub_component_tuple) <= 3:
-                        # length 2 is for {VotingClassifier.estimators,
-                        # Pipeline.steps, FeatureUnion.transformer_list}
-                        # length 3 is for ColumnTransformer
-                        raise ValueError(
-                            f"Length of tuple of type {sub_component_type}"
-                            " does not match assumptions"
-                        )
-
-                    if isinstance(sub_component, str):
-                        if sub_component not in SKLEARN_PIPELINE_STRING_COMPONENTS:
-                            msg = (
-                                "Second item of tuple does not match assumptions. "
-                                "If string, can be only 'drop' or 'passthrough' but"
-                                f"got {sub_component}"
-                            )
-                            raise ValueError(msg)
-                    elif sub_component is None:
-                        msg = (
-                            "Cannot serialize objects of None type. Please use a valid "
-                            "placeholder for None. Note that empty sklearn estimators can be "
-                            "replaced with 'drop' or 'passthrough'."
-                        )
-                        raise ValueError(msg)
-                    elif not isinstance(sub_component, OpenMLFlow):
-                        msg = (
-                            "Second item of tuple does not match assumptions. "
-                            f"Expected OpenMLFlow, got {type(sub_component)}"
-                        )
-                        raise TypeError(msg)
-
-                    if identifier in reserved_keywords:
-                        parent_model = f"{model.__module__}.{model.__class__.__name__}"
-                        msg = (
-                            "Found element shadowing official "
-                            f"parameter for {parent_model}: {identifier}"
-                        )
-                        raise PyOpenMLError(msg)
-
-                    # when deserializing the parameter
-                    sub_components_explicit.add(identifier)
-                    if isinstance(sub_component, str):
-                        external_version = self._get_external_version_string(None, {})
-                        dependencies = self._get_dependencies()
-                        tags = self._get_tags()
-
-                        sub_components[identifier] = OpenMLFlow(
-                            name=sub_component,
-                            description="Placeholder flow for scikit-learn's string pipeline "
-                            "members",
-                            components=OrderedDict(),
-                            parameters=OrderedDict(),
-                            parameters_meta_info=OrderedDict(),
-                            external_version=external_version,
-                            tags=tags,
-                            language="English",
-                            dependencies=dependencies,
-                            model=None,
-                        )
-                        component_reference: OrderedDict[str, str | dict] = OrderedDict()
-                        component_reference["oml-python:serialized_object"] = (
-                            COMPOSITION_STEP_CONSTANT
-                        )
-                        cr_value: dict[str, Any] = OrderedDict()
-                        cr_value["key"] = identifier
-                        cr_value["step_name"] = identifier
-                        if len(sub_component_tuple) == 3:
-                            cr_value["argument_1"] = sub_component_tuple[2]
-                        component_reference["value"] = cr_value
-                    else:
-                        sub_components[identifier] = sub_component
-                        component_reference = OrderedDict()
-                        component_reference["oml-python:serialized_object"] = COMPONENT_REFERENCE
-                        cr_value = OrderedDict()
-                        cr_value["key"] = identifier
-                        cr_value["step_name"] = identifier
-                        if len(sub_component_tuple) == 3:
-                            cr_value["argument_1"] = sub_component_tuple[2]
-                        component_reference["value"] = cr_value
-                    parameter_value.append(component_reference)
-
-                # Here (and in the elif and else branch below) are the only
-                # places where we encode a value as json to make sure that all
-                # parameter values still have the same type after
-                # deserialization
-                if isinstance(rval, tuple):
-                    parameter_json = json.dumps(tuple(parameter_value))
-                else:
-                    parameter_json = json.dumps(parameter_value)
-                parameters[k] = parameter_json
-
-            elif isinstance(rval, OpenMLFlow):
-                # A subcomponent, for example the base model in
-                # AdaBoostClassifier
-                sub_components[k] = rval
-                sub_components_explicit.add(k)
-                component_reference = OrderedDict()
-                component_reference["oml-python:serialized_object"] = COMPONENT_REFERENCE
-                cr_value = OrderedDict()
-                cr_value["key"] = k
-                cr_value["step_name"] = None
-                component_reference["value"] = cr_value
-                cr = self._serialize_sklearn(component_reference, model)
-                parameters[k] = json.dumps(cr)
-
-            elif not (hasattr(rval, "__len__") and len(rval) == 0):
-                rval = json.dumps(rval)
-                parameters[k] = rval
-            # a regular hyperparameter
-            else:
-                parameters[k] = None
-
-            if parameters_docs is not None:
-                data_type, description = parameters_docs[k]
-                parameters_meta_info[k] = OrderedDict(
-                    (("description", description), ("data_type", data_type)),
-                )
-            else:
-                parameters_meta_info[k] = OrderedDict((("description", None), ("data_type", None)))
-
-        return parameters, parameters_meta_info, sub_components, sub_components_explicit
-
-    def _get_fn_arguments_with_defaults(self, fn_name: Callable) -> tuple[dict, set]:
-        """
-        Returns
-        -------
-            i) a dict with all parameter names that have a default value, and
-            ii) a set with all parameter names that do not have a default
-
-        Parameters
-        ----------
-        fn_name : callable
-            The function of which we want to obtain the defaults
-
-        Returns
-        -------
-        params_with_defaults: dict
-            a dict mapping parameter name to the default value
-        params_without_defaults: set
-            a set with all parameters that do not have a default value
-        """
-        # parameters with defaults are optional, all others are required.
-        parameters = inspect.signature(fn_name).parameters
-        required_params = set()
-        optional_params = {}
-        for param in parameters:
-            parameter = parameters.get(param)
-            default_val = parameter.default  # type: ignore
-            if default_val is inspect.Signature.empty:
-                required_params.add(param)
-            else:
-                optional_params[param] = default_val
-        return optional_params, required_params
-
-    def _deserialize_model(  # noqa: C901
-        self,
-        flow: OpenMLFlow,
-        keep_defaults: bool,  # noqa: FBT001
-        recursion_depth: int,
-        strict_version: bool = True,  # noqa: FBT002, FBT001
-    ) -> Any:
-        logger.info(f"-{'-' * recursion_depth} deserialize {flow.name}")
-        model_name = flow.class_name
-        self._check_dependencies(flow.dependencies, strict_version=strict_version)
-
-        parameters = flow.parameters
-        components = flow.components
-        parameter_dict: dict[str, Any] = OrderedDict()
-
-        # Do a shallow copy of the components dictionary so we can remove the
-        # components from this copy once we added them into the pipeline. This
-        # allows us to not consider them any more when looping over the
-        # components, but keeping the dictionary of components untouched in the
-        # original components dictionary.
-        components_ = copy.copy(components)
-
-        for name in parameters:
-            value = parameters.get(name)
-            logger.info(f"--{'-' * recursion_depth} flow_parameter={name}, value={value}")
-            rval = self._deserialize_sklearn(
-                value,
-                components=components_,
-                initialize_with_defaults=keep_defaults,
-                recursion_depth=recursion_depth + 1,
-                strict_version=strict_version,
-            )
-            parameter_dict[name] = rval
-
-        for name in components:
-            if name in parameter_dict:
-                continue
-            if name not in components_:
-                continue
-            value = components[name]
-            logger.info(f"--{'-' * recursion_depth} flow_component={name}, value={value}")
-            rval = self._deserialize_sklearn(
-                value,
-                recursion_depth=recursion_depth + 1,
-                strict_version=strict_version,
-            )
-            parameter_dict[name] = rval
-
-        if model_name is None and flow.name in SKLEARN_PIPELINE_STRING_COMPONENTS:
-            return flow.name
-
-        assert model_name is not None
-        module_name = model_name.rsplit(".", 1)
-        model_class = getattr(importlib.import_module(module_name[0]), module_name[1])
-
-        if keep_defaults:
-            # obtain all params with a default
-            param_defaults, _ = self._get_fn_arguments_with_defaults(model_class.__init__)
-
-            # delete the params that have a default from the dict,
-            # so they get initialized with their default value
-            # except [...]
-            for param in param_defaults:
-                # [...] the ones that also have a key in the components dict.
-                # As OpenML stores different flows for ensembles with different
-                # (base-)components, in OpenML terms, these are not considered
-                # hyperparameters but rather constants (i.e., changing them would
-                # result in a different flow)
-                if param not in components:
-                    del parameter_dict[param]
-
-        if not strict_version:
-            # Ignore incompatible parameters
-            allowed_parameter = list(inspect.signature(model_class.__init__).parameters)
-            for p in list(parameter_dict.keys()):
-                if p not in allowed_parameter:
-                    warnings.warn(
-                        f"While deserializing in a non-strict way, parameter {p} is not "
-                        f"allowed for {model_class.__name__} likely due to a version mismatch. "
-                        "We ignore the parameter.",
-                        UserWarning,
-                        stacklevel=2,
-                    )
-                    del parameter_dict[p]
-
-        return model_class(**parameter_dict)
-
-    def _check_dependencies(
-        self,
-        dependencies: str,
-        strict_version: bool = True,  # noqa: FBT001, FBT002
-    ) -> None:
-        if not dependencies:
-            return
-
-        dependencies_list = dependencies.split("\n")
-        for dependency_string in dependencies_list:
-            match = DEPENDENCIES_PATTERN.match(dependency_string)
-            if not match:
-                raise ValueError(f"Cannot parse dependency {dependency_string}")
-
-            dependency_name = match.group("name")
-            operation = match.group("operation")
-            version = match.group("version")
-
-            module = importlib.import_module(dependency_name)
-            required_version = Version(version)
-            installed_version = Version(module.__version__)  # type: ignore
-
-            if operation == "==":
-                check = required_version == installed_version
-            elif operation == ">":
-                check = installed_version > required_version
-            elif operation == ">=":
-                check = (
-                    installed_version > required_version or installed_version == required_version
-                )
-            else:
-                raise NotImplementedError(f"operation '{operation}' is not supported")
-            message = (
-                f"Trying to deserialize a model with dependency {dependency_string} not satisfied."
-            )
-            if not check:
-                if strict_version:
-                    raise ValueError(message)
-
-                warnings.warn(message, category=UserWarning, stacklevel=2)
-
-    def _serialize_type(self, o: Any) -> OrderedDict[str, str]:
-        mapping = {
-            float: "float",
-            np.float32: "np.float32",
-            np.float64: "np.float64",
-            int: "int",
-            np.int32: "np.int32",
-            np.int64: "np.int64",
-        }
-        if Version(np.__version__) < Version("1.24"):
-            mapping[float] = "np.float"
-            mapping[int] = "np.int"
-
-        ret = OrderedDict()  # type: 'OrderedDict[str, str]'
-        ret["oml-python:serialized_object"] = "type"
-        ret["value"] = mapping[o]
-        return ret
-
-    def _deserialize_type(self, o: str) -> Any:
-        mapping = {
-            "float": float,
-            "np.float32": np.float32,
-            "np.float64": np.float64,
-            "int": int,
-            "np.int32": np.int32,
-            "np.int64": np.int64,
-        }
-
-        # TODO(eddiebergman): Might be able to remove this
-        if Version(np.__version__) < Version("1.24"):
-            mapping["np.float"] = np.float  # type: ignore # noqa: NPY001
-            mapping["np.int"] = np.int  # type: ignore # noqa: NPY001
-
-        return mapping[o]
-
-    def _serialize_rv_frozen(self, o: Any) -> OrderedDict[str, str | dict]:
-        args = o.args
-        kwds = o.kwds
-        a = o.a
-        b = o.b
-        dist = o.dist.__class__.__module__ + "." + o.dist.__class__.__name__
-        ret: OrderedDict[str, str | dict] = OrderedDict()
-        ret["oml-python:serialized_object"] = "rv_frozen"
-        ret["value"] = OrderedDict(
-            (("dist", dist), ("a", a), ("b", b), ("args", args), ("kwds", kwds)),
-        )
-        return ret
-
-    def _deserialize_rv_frozen(self, o: OrderedDict[str, str]) -> Any:
-        args = o["args"]
-        kwds = o["kwds"]
-        a = o["a"]
-        b = o["b"]
-        dist_name = o["dist"]
-
-        module_name = dist_name.rsplit(".", 1)
-        try:
-            rv_class = getattr(importlib.import_module(module_name[0]), module_name[1])
-        except AttributeError as e:
-            _tb = traceback.format_exc()
-            warnings.warn(
-                f"Cannot create model {dist_name} for flow. Reason is from error {type(e)}:{e}"
-                f"\nTraceback: {_tb}",
-                RuntimeWarning,
-                stacklevel=2,
-            )
-            return None
-
-        dist = scipy.stats.distributions.rv_frozen(rv_class(), *args, **kwds)  # type: ignore
-        dist.a = a
-        dist.b = b
-
-        return dist
-
-    def _serialize_function(self, o: Callable) -> OrderedDict[str, str]:
-        name = o.__module__ + "." + o.__name__
-        ret = OrderedDict()  # type: 'OrderedDict[str, str]'
-        ret["oml-python:serialized_object"] = "function"
-        ret["value"] = name
-        return ret
-
-    def _deserialize_function(self, name: str) -> Callable:
-        module_name = name.rsplit(".", 1)
-        return getattr(importlib.import_module(module_name[0]), module_name[1])
-
-    def _serialize_cross_validator(self, o: Any) -> OrderedDict[str, str | dict]:
-        ret: OrderedDict[str, str | dict] = OrderedDict()
-
-        parameters = OrderedDict()  # type: 'OrderedDict[str, Any]'
-
-        # XXX this is copied from sklearn.model_selection._split
-        cls = o.__class__
-        init = getattr(cls.__init__, "deprecated_original", cls.__init__)
-        # Ignore varargs, kw and default values and pop self
-        init_signature = inspect.signature(init)  # type: ignore
-        # Consider the constructor parameters excluding 'self'
-        if init is object.__init__:
-            args = []  # type: List
-        else:
-            args = sorted(
-                [
-                    p.name
-                    for p in init_signature.parameters.values()
-                    if p.name != "self" and p.kind != p.VAR_KEYWORD
-                ],
-            )
-
-        for key in args:
-            # We need deprecation warnings to always be on in order to
-            # catch deprecated param values.
-            # This is set in utils/__init__.py but it gets overwritten
-            # when running under python3 somehow.
-            with warnings.catch_warnings(record=True) as w:
-                warnings.simplefilter("always", DeprecationWarning)
-                value = getattr(o, key, None)
-                if w is not None and len(w) and w[0].category is DeprecationWarning:
-                    # if the parameter is deprecated, don't show it
-                    continue
-
-            if not (isinstance(value, Sized) and len(value) == 0):
-                value = json.dumps(value)
-                parameters[key] = value
-            else:
-                parameters[key] = None
-
-        ret["oml-python:serialized_object"] = "cv_object"
-        name = o.__module__ + "." + o.__class__.__name__
-        value = OrderedDict([("name", name), ("parameters", parameters)])
-        ret["value"] = value
-
-        return ret
-
-    def _deserialize_cross_validator(
-        self,
-        value: OrderedDict[str, Any],
-        recursion_depth: int,
-        strict_version: bool = True,  # noqa: FBT002, FBT001
-    ) -> Any:
-        model_name = value["name"]
-        parameters = value["parameters"]
-
-        module_name = model_name.rsplit(".", 1)
-        model_class = getattr(importlib.import_module(module_name[0]), module_name[1])
-        for parameter in parameters:
-            parameters[parameter] = self._deserialize_sklearn(
-                parameters[parameter],
-                recursion_depth=recursion_depth + 1,
-                strict_version=strict_version,
-            )
-        return model_class(**parameters)
-
-    def _format_external_version(
-        self,
-        model_package_name: str,
-        model_package_version_number: str,
-    ) -> str:
-        return f"{model_package_name}=={model_package_version_number}"
-
-    @staticmethod
-    def _get_parameter_values_recursive(
-        param_grid: dict | list[dict],
-        parameter_name: str,
-    ) -> list[Any]:
-        """
-        Returns a list of values for a given hyperparameter, encountered
-        recursively throughout the flow. (e.g., n_jobs can be defined
-        for various flows)
-
-        Parameters
-        ----------
-        param_grid: Union[Dict, List[Dict]]
-            Dict mapping from hyperparameter list to value, to a list of
-            such dicts
-
-        parameter_name: str
-            The hyperparameter that needs to be inspected
-
-        Returns
-        -------
-        List
-            A list of all values of hyperparameters with this name
-        """
-        if isinstance(param_grid, dict):
-            return [
-                value
-                for param, value in param_grid.items()
-                if param.split("__")[-1] == parameter_name
-            ]
-
-        if isinstance(param_grid, list):
-            result = []
-            for sub_grid in param_grid:
-                result.extend(
-                    SklearnExtension._get_parameter_values_recursive(sub_grid, parameter_name),
-                )
-            return result
-
-        raise ValueError("Param_grid should either be a dict or list of dicts")
-
-    def _prevent_optimize_n_jobs(self, model):
-        """
-        Ensures that HPO classes will not optimize the n_jobs hyperparameter
-
-        Parameters
-        ----------
-        model:
-            The model that will be fitted
-        """
-        if self._is_hpo_class(model):
-            if isinstance(model, sklearn.model_selection.GridSearchCV):
-                param_distributions = model.param_grid
-            elif isinstance(model, sklearn.model_selection.RandomizedSearchCV):
-                param_distributions = model.param_distributions
-            else:
-                if hasattr(model, "param_distributions"):
-                    param_distributions = model.param_distributions
-                else:
-                    raise AttributeError(
-                        "Using subclass BaseSearchCV other than "
-                        "{GridSearchCV, RandomizedSearchCV}. "
-                        "Could not find attribute "
-                        "param_distributions.",
-                    )
-                logger.warning(
-                    "Warning! Using subclass BaseSearchCV other than "
-                    "{GridSearchCV, RandomizedSearchCV}. "
-                    "Should implement param check. ",
-                )
-            n_jobs_vals = SklearnExtension._get_parameter_values_recursive(
-                param_distributions,
-                "n_jobs",
-            )
-            if len(n_jobs_vals) > 0:
-                raise PyOpenMLError(
-                    "openml-python should not be used to optimize the n_jobs parameter.",
-                )
-
-    ################################################################################################
-    # Methods for performing runs with extension modules
-
-    def is_estimator(self, model: Any) -> bool:
-        """Check whether the given model is a scikit-learn estimator.
-
-        This function is only required for backwards compatibility and will be removed in the
-        near future.
-
-        Parameters
-        ----------
-        model : Any
-
-        Returns
-        -------
-        bool
-        """
-        o = model
-        return hasattr(o, "fit") and hasattr(o, "get_params") and hasattr(o, "set_params")
-
-    def seed_model(self, model: Any, seed: int | None = None) -> Any:  # noqa: C901
-        """Set the random state of all the unseeded components of a model and return the seeded
-        model.
-
-        Required so that all seed information can be uploaded to OpenML for reproducible results.
-
-        Models that are already seeded will maintain the seed. In this case,
-        only integer seeds are allowed (An exception is raised when a RandomState was used as
-        seed).
-
-        Parameters
-        ----------
-        model : sklearn model
-            The model to be seeded
-        seed : int
-            The seed to initialize the RandomState with. Unseeded subcomponents
-            will be seeded with a random number from the RandomState.
-
-        Returns
-        -------
-        Any
-        """
-
-        def _seed_current_object(current_value):
-            if isinstance(current_value, int):  # acceptable behaviour
-                return False
-
-            if isinstance(current_value, np.random.RandomState):
-                raise ValueError(
-                    "Models initialized with a RandomState object are not "
-                    "supported. Please seed with an integer. ",
-                )
-
-            if current_value is not None:
-                raise ValueError(
-                    "Models should be seeded with int or None (this should never happen). ",
-                )
-
-            return True
-
-        rs = np.random.RandomState(seed)
-        model_params = model.get_params()
-        random_states = {}
-        for param_name in sorted(model_params):
-            if "random_state" in param_name:
-                current_value = model_params[param_name]
-                # important to draw the value at this point (and not in the if
-                # statement) this way we guarantee that if a different set of
-                # subflows is seeded, the same number of the random generator is
-                # used
-                new_value = rs.randint(0, 2**16)
-                if _seed_current_object(current_value):
-                    random_states[param_name] = new_value
-
-            # Also seed CV objects!
-            elif isinstance(model_params[param_name], sklearn.model_selection.BaseCrossValidator):
-                if not hasattr(model_params[param_name], "random_state"):
-                    continue
-
-                current_value = model_params[param_name].random_state
-                new_value = rs.randint(0, 2**16)
-                if _seed_current_object(current_value):
-                    model_params[param_name].random_state = new_value
-
-        model.set_params(**random_states)
-        return model
-
-    def check_if_model_fitted(self, model: Any) -> bool:
-        """Returns True/False denoting if the model has already been fitted/trained
-
-        Parameters
-        ----------
-        model : Any
-
-        Returns
-        -------
-        bool
-        """
-        from sklearn.exceptions import NotFittedError
-        from sklearn.utils.validation import check_is_fitted
-
-        try:
-            # check if model is fitted
-            check_is_fitted(model)
-
-            # Creating random dummy data of arbitrary size
-            dummy_data = np.random.uniform(size=(10, 3))  # noqa: NPY002
-            # Using 'predict' instead of 'sklearn.utils.validation.check_is_fitted' for a more
-            # robust check that works across sklearn versions and models. Internally, 'predict'
-            # should call 'check_is_fitted' for every concerned attribute, thus offering a more
-            # assured check than explicit calls to 'check_is_fitted'
-            model.predict(dummy_data)
-            # Will reach here if the model was fit on a dataset with 3 features
-            return True
-        except NotFittedError:  # needs to be the first exception to be caught
-            # Model is not fitted, as is required
-            return False
-        except ValueError:
-            # Will reach here if the model was fit on a dataset with more or less than 3 features
-            return True
-
-    def _run_model_on_fold(  # noqa: PLR0915, PLR0913, C901, PLR0912
-        self,
-        model: Any,
-        task: OpenMLTask,
-        X_train: np.ndarray | scipy.sparse.spmatrix | pd.DataFrame,
-        rep_no: int,
-        fold_no: int,
-        y_train: np.ndarray | None = None,
-        X_test: np.ndarray | scipy.sparse.spmatrix | pd.DataFrame | None = None,
-    ) -> tuple[
-        np.ndarray,
-        pd.DataFrame | None,
-        OrderedDict[str, float],
-        OpenMLRunTrace | None,
-    ]:
-        """Run a model on a repeat,fold,subsample triplet of the task and return prediction
-        information.
-
-        Furthermore, it will measure run time measures in case multi-core behaviour allows this.
-        * exact user cpu time will be measured if the number of cores is set (recursive throughout
-        the model) exactly to 1
-        * wall clock time will be measured if the number of cores is set (recursive throughout the
-        model) to any given number (but not when it is set to -1)
-
-        Returns the data that is necessary to construct the OpenML Run object. Is used by
-        run_task_get_arff_content. Do not use this function unless you know what you are doing.
-
-        Parameters
-        ----------
-        model : Any
-            The UNTRAINED model to run. The model instance will be copied and not altered.
-        task : OpenMLTask
-            The task to run the model on.
-        X_train : array-like
-            Training data for the given repetition and fold.
-        rep_no : int
-            The repeat of the experiment (0-based; in case of 1 time CV, always 0)
-        fold_no : int
-            The fold nr of the experiment (0-based; in case of holdout, always 0)
-        y_train : Optional[np.ndarray] (default=None)
-            Target attributes for supervised tasks. In case of classification, these are integer
-            indices to the potential classes specified by dataset.
-        X_test : Optional, array-like (default=None)
-            Test attributes to test for generalization in supervised tasks.
-
-        Returns
-        -------
-        pred_y : np.ndarray
-            Predictions on the training/test set, depending on the task type.
-            For supervised tasks, predictions are on the test set.
-            For unsupervised tasks, predictions are on the training set.
-        proba_y : pd.DataFrame, optional
-            Predicted probabilities for the test set.
-            None, if task is not Classification or Learning Curve prediction.
-        user_defined_measures : OrderedDict[str, float]
-            User defined measures that were generated on this fold
-        trace : OpenMLRunTrace, optional
-            arff trace object from a fitted model and the trace content obtained by
-            repeatedly calling ``run_model_on_task``
-        """
-
-        def _prediction_to_probabilities(
-            y: np.ndarray | list,
-            model_classes: list[Any],
-            class_labels: list[str] | None,
-        ) -> pd.DataFrame:
-            """Transforms predicted probabilities to match with OpenML class indices.
-
-            Parameters
-            ----------
-            y : np.ndarray
-                Predicted probabilities (possibly omitting classes if they were not present in the
-                training data).
-            model_classes : list
-                List of classes known_predicted by the model, ordered by their index.
-            class_labels : list
-                List of classes as stored in the task object fetched from server.
-
-            Returns
-            -------
-            pd.DataFrame
-            """
-            if class_labels is None:
-                raise ValueError("The task has no class labels")
-
-            if isinstance(y_train, np.ndarray) and isinstance(class_labels[0], str):
-                # mapping (decoding) the predictions to the categories
-                # creating a separate copy to not change the expected pred_y type
-                y = [class_labels[pred] for pred in y]  # list or numpy array of predictions
-
-            # model_classes: sklearn classifier mapping from original array id to
-            # prediction index id
-            if not isinstance(model_classes, list):
-                raise ValueError("please convert model classes to list prior to calling this fn")
-
-            # DataFrame allows more accurate mapping of classes as column names
-            result = pd.DataFrame(
-                0,
-                index=np.arange(len(y)),
-                columns=model_classes,
-                dtype=np.float32,
-            )
-            for obs, prediction in enumerate(y):
-                result.loc[obs, prediction] = 1.0
-            return result
-
-        if isinstance(task, OpenMLSupervisedTask):
-            if y_train is None:
-                raise TypeError("argument y_train must not be of type None")
-            if X_test is None:
-                raise TypeError("argument X_test must not be of type None")
-
-        model_copy = sklearn.base.clone(model, safe=True)
-        # sanity check: prohibit users from optimizing n_jobs
-        self._prevent_optimize_n_jobs(model_copy)
-        # measures and stores runtimes
-        user_defined_measures = OrderedDict()  # type: 'OrderedDict[str, float]'
-        try:
-            # for measuring runtime. Only available since Python 3.3
-            modelfit_start_cputime = time.process_time()
-            modelfit_start_walltime = time.time()
-
-            if isinstance(task, OpenMLSupervisedTask):
-                model_copy.fit(X_train, y_train)  # type: ignore
-            elif isinstance(task, OpenMLClusteringTask):
-                model_copy.fit(X_train)  # type: ignore
-
-            modelfit_dur_cputime = (time.process_time() - modelfit_start_cputime) * 1000
-            modelfit_dur_walltime = (time.time() - modelfit_start_walltime) * 1000
-
-            user_defined_measures["usercpu_time_millis_training"] = modelfit_dur_cputime
-            refit_time = model_copy.refit_time_ * 1000 if hasattr(model_copy, "refit_time_") else 0  # type: ignore
-            user_defined_measures["wall_clock_time_millis_training"] = modelfit_dur_walltime
-
-        except AttributeError as e:
-            # typically happens when training a regressor on classification task
-            raise PyOpenMLError(str(e)) from e
-
-        if isinstance(task, (OpenMLClassificationTask, OpenMLLearningCurveTask)):
-            # search for model classes_ (might differ depending on modeltype)
-            # first, pipelines are a special case (these don't have a classes_
-            # object, but rather borrows it from the last step. We do this manually,
-            # because of the BaseSearch check)
-            if isinstance(model_copy, sklearn.pipeline.Pipeline):
-                used_estimator = model_copy.steps[-1][-1]
-            else:
-                used_estimator = model_copy
-
-            if self._is_hpo_class(used_estimator):
-                model_classes = used_estimator.best_estimator_.classes_
-            else:
-                model_classes = used_estimator.classes_
-
-            if not isinstance(model_classes, list):
-                model_classes = model_classes.tolist()
-
-            # to handle the case when dataset is numpy and categories are encoded
-            # however the class labels stored in task are still categories
-            if isinstance(y_train, np.ndarray) and isinstance(
-                cast("List", task.class_labels)[0],
-                str,
-            ):
-                model_classes = [cast("List[str]", task.class_labels)[i] for i in model_classes]
-
-        modelpredict_start_cputime = time.process_time()
-        modelpredict_start_walltime = time.time()
-
-        # In supervised learning this returns the predictions for Y, in clustering
-        # it returns the clusters
-        if isinstance(task, OpenMLSupervisedTask):
-            pred_y = model_copy.predict(X_test)
-        elif isinstance(task, OpenMLClusteringTask):
-            pred_y = model_copy.predict(X_train)
-        else:
-            raise ValueError(task)
-
-        modelpredict_duration_cputime = (time.process_time() - modelpredict_start_cputime) * 1000
-        user_defined_measures["usercpu_time_millis_testing"] = modelpredict_duration_cputime
-        user_defined_measures["usercpu_time_millis"] = (
-            modelfit_dur_cputime + modelpredict_duration_cputime
-        )
-        modelpredict_duration_walltime = (time.time() - modelpredict_start_walltime) * 1000
-        user_defined_measures["wall_clock_time_millis_testing"] = modelpredict_duration_walltime
-        user_defined_measures["wall_clock_time_millis"] = (
-            modelfit_dur_walltime + modelpredict_duration_walltime + refit_time
-        )
-
-        if isinstance(task, (OpenMLClassificationTask, OpenMLLearningCurveTask)):
-            try:
-                proba_y = model_copy.predict_proba(X_test)
-                proba_y = pd.DataFrame(proba_y, columns=model_classes)  # handles X_test as numpy
-            except AttributeError:  # predict_proba is not available when probability=False
-                proba_y = _prediction_to_probabilities(pred_y, model_classes, task.class_labels)
-
-            if task.class_labels is not None:
-                if proba_y.shape[1] != len(task.class_labels):
-                    # Remap the probabilities in case there was a class missing
-                    # at training time. By default, the classification targets
-                    # are mapped to be zero-based indices to the actual classes.
-                    # Therefore, the model_classes contain the correct indices to
-                    # the correct probability array. Example:
-                    # classes in the dataset: 0, 1, 2, 3, 4, 5
-                    # classes in the training set: 0, 1, 2, 4, 5
-                    # then we need to add a column full of zeros into the probabilities
-                    # for class 3 because the rest of the library expects that the
-                    # probabilities are ordered the same way as the classes are ordered).
-                    message = (
-                        f"Estimator only predicted for {proba_y.shape[1]}/{len(task.class_labels)}"
-                        " classes!"
-                    )
-                    warnings.warn(message, stacklevel=2)
-                    openml.config.logger.warning(message)
-
-                    for _i, col in enumerate(task.class_labels):
-                        # adding missing columns with 0 probability
-                        if col not in model_classes:
-                            proba_y[col] = 0
-                    # We re-order the columns to move possibly added missing columns into place.
-                    proba_y = proba_y[task.class_labels]
-            else:
-                raise ValueError("The task has no class labels")
-
-            if not np.all(set(proba_y.columns) == set(task.class_labels)):
-                missing_cols = list(set(task.class_labels) - set(proba_y.columns))
-                raise ValueError("Predicted probabilities missing for the columns: ", missing_cols)
-
-        elif isinstance(task, (OpenMLRegressionTask, OpenMLClusteringTask)):
-            proba_y = None
-        else:
-            raise TypeError(type(task))
-
-        if self._is_hpo_class(model_copy):
-            trace_data = self._extract_trace_data(model_copy, rep_no, fold_no)
-            trace: OpenMLRunTrace | None = self._obtain_arff_trace(
-                model_copy,
-                trace_data,
-            )
-        else:
-            trace = None
-
-        return pred_y, proba_y, user_defined_measures, trace
-
-    def obtain_parameter_values(  # noqa: C901, PLR0915
-        self,
-        flow: OpenMLFlow,
-        model: Any = None,
-    ) -> list[dict[str, Any]]:
-        """Extracts all parameter settings required for the flow from the model.
-
-        If no explicit model is provided, the parameters will be extracted from `flow.model`
-        instead.
-
-        Parameters
-        ----------
-        flow : OpenMLFlow
-            OpenMLFlow object (containing flow ids, i.e., it has to be downloaded from the server)
-
-        model: Any, optional (default=None)
-            The model from which to obtain the parameter values. Must match the flow signature.
-            If None, use the model specified in ``OpenMLFlow.model``.
-
-        Returns
-        -------
-        list
-            A list of dicts, where each dict has the following entries:
-            - ``oml:name`` : str: The OpenML parameter name
-            - ``oml:value`` : mixed: A representation of the parameter value
-            - ``oml:component`` : int: flow id to which the parameter belongs
-        """
-        openml.flows.functions._check_flow_for_server_id(flow)
-
-        def get_flow_dict(_flow):
-            flow_map = {_flow.name: _flow.flow_id}
-            for subflow in _flow.components:
-                flow_map.update(get_flow_dict(_flow.components[subflow]))
-            return flow_map
-
-        def extract_parameters(  # noqa: PLR0915, PLR0912, C901
-            _flow,
-            _flow_dict,
-            component_model,
-            _main_call=False,  # noqa: FBT002
-            main_id=None,
-        ):
-            def is_subcomponent_specification(values):
-                # checks whether the current value can be a specification of
-                # subcomponents, as for example the value for steps parameter
-                # (in Pipeline) or transformers parameter (in
-                # ColumnTransformer).
-                return (
-                    # Specification requires list/tuple of list/tuple with
-                    # at least length 2.
-                    isinstance(values, (tuple, list))
-                    and all(isinstance(item, (tuple, list)) and len(item) > 1 for item in values)
-                    # And each component needs to be a flow or interpretable string
-                    and all(
-                        isinstance(item[1], openml.flows.OpenMLFlow)
-                        or (
-                            isinstance(item[1], str)
-                            and item[1] in SKLEARN_PIPELINE_STRING_COMPONENTS
-                        )
-                        for item in values
-                    )
-                )
-
-            # _flow is openml flow object, _param dict maps from flow name to flow
-            # id for the main call, the param dict can be overridden (useful for
-            # unit tests / sentinels) this way, for flows without subflows we do
-            # not have to rely on _flow_dict
-            exp_parameters = set(_flow.parameters)
-            if (
-                isinstance(component_model, str)
-                and component_model in SKLEARN_PIPELINE_STRING_COMPONENTS
-            ):
-                model_parameters = set()
-            else:
-                model_parameters = set(component_model.get_params(deep=False))
-            if len(exp_parameters.symmetric_difference(model_parameters)) != 0:
-                flow_params = sorted(exp_parameters)
-                model_params = sorted(model_parameters)
-                raise ValueError(
-                    "Parameters of the model do not match the "
-                    "parameters expected by the "
-                    "flow:\nexpected flow parameters: "
-                    f"{flow_params}\nmodel parameters: {model_params}",
-                )
-            exp_components = set(_flow.components)
-            if (
-                isinstance(component_model, str)
-                and component_model in SKLEARN_PIPELINE_STRING_COMPONENTS
-            ):
-                model_components = set()
-            else:
-                _ = set(component_model.get_params(deep=False))
-                model_components = {
-                    mp
-                    for mp in component_model.get_params(deep=True)
-                    if "__" not in mp and mp not in _
-                }
-            if len(exp_components.symmetric_difference(model_components)) != 0:
-                is_problem = True
-                if len(exp_components - model_components) > 0:
-                    # If an expected component is not returned as a component by get_params(),
-                    # this means that it is also a parameter -> we need to check that this is
-                    # actually the case
-                    difference = exp_components - model_components
-                    component_in_model_parameters = []
-                    for component in difference:
-                        if component in model_parameters:
-                            component_in_model_parameters.append(True)
-                        else:
-                            component_in_model_parameters.append(False)
-                    is_problem = not all(component_in_model_parameters)
-                if is_problem:
-                    flow_components = sorted(exp_components)
-                    model_components = sorted(model_components)
-                    raise ValueError(
-                        "Subcomponents of the model do not match the "
-                        "parameters expected by the "
-                        "flow:\nexpected flow subcomponents: "
-                        f"{flow_components}\nmodel subcomponents: {model_components}",
-                    )
-
-            _params = []
-            for _param_name in _flow.parameters:
-                _current = OrderedDict()
-                _current["oml:name"] = _param_name
-
-                current_param_values = self.model_to_flow(component_model.get_params()[_param_name])
-
-                # Try to filter out components (a.k.a. subflows) which are
-                # handled further down in the code (by recursively calling
-                # this function)!
-                if isinstance(current_param_values, openml.flows.OpenMLFlow):
-                    continue
-
-                if is_subcomponent_specification(current_param_values):
-                    # complex parameter value, with subcomponents
-                    parsed_values = []
-                    for subcomponent in current_param_values:
-                        # scikit-learn stores usually tuples in the form
-                        # (name (str), subcomponent (mixed), argument
-                        # (mixed)). OpenML replaces the subcomponent by an
-                        # OpenMLFlow object.
-                        if len(subcomponent) < 2 or len(subcomponent) > 3:
-                            raise ValueError("Component reference should be size {2,3}. ")
-
-                        subcomponent_identifier = subcomponent[0]
-                        subcomponent_flow = subcomponent[1]
-                        if not isinstance(subcomponent_identifier, str):
-                            raise TypeError(
-                                "Subcomponent identifier should be of type string, "
-                                f"but is {type(subcomponent_identifier)}",
-                            )
-                        if not isinstance(subcomponent_flow, (openml.flows.OpenMLFlow, str)):
-                            if (
-                                isinstance(subcomponent_flow, str)
-                                and subcomponent_flow in SKLEARN_PIPELINE_STRING_COMPONENTS
-                            ):
-                                pass
-                            else:
-                                raise TypeError(
-                                    "Subcomponent flow should be of type flow, but is"
-                                    f" {type(subcomponent_flow)}",
-                                )
-
-                        current = {
-                            "oml-python:serialized_object": COMPONENT_REFERENCE,
-                            "value": {
-                                "key": subcomponent_identifier,
-                                "step_name": subcomponent_identifier,
-                            },
-                        }
-                        if len(subcomponent) == 3:
-                            if not isinstance(subcomponent[2], list) and not isinstance(
-                                subcomponent[2],
-                                OrderedDict,
-                            ):
-                                raise TypeError(
-                                    "Subcomponent argument should be list or OrderedDict",
-                                )
-                            current["value"]["argument_1"] = subcomponent[2]
-                        parsed_values.append(current)
-                    parsed_values = json.dumps(parsed_values)
-                else:
-                    # vanilla parameter value
-                    parsed_values = json.dumps(current_param_values)
-
-                _current["oml:value"] = parsed_values
-                if _main_call:
-                    _current["oml:component"] = main_id
-                else:
-                    _current["oml:component"] = _flow_dict[_flow.name]
-                _params.append(_current)
-
-            for _identifier in _flow.components:
-                subcomponent_model = component_model.get_params()[_identifier]
-                _params.extend(
-                    extract_parameters(
-                        _flow.components[_identifier],
-                        _flow_dict,
-                        subcomponent_model,
-                    ),
-                )
-            return _params
-
-        flow_dict = get_flow_dict(flow)
-        model = model if model is not None else flow.model
-        return extract_parameters(flow, flow_dict, model, _main_call=True, main_id=flow.flow_id)
-
-    def _openml_param_name_to_sklearn(
-        self,
-        openml_parameter: openml.setups.OpenMLParameter,
-        flow: OpenMLFlow,
-    ) -> str:
-        """
-        Converts the name of an OpenMLParameter into the sklean name, given a flow.
-
-        Parameters
-        ----------
-        openml_parameter: OpenMLParameter
-            The parameter under consideration
-
-        flow: OpenMLFlow
-            The flow that provides context.
-
-        Returns
-        -------
-        sklearn_parameter_name: str
-            The name the parameter will have once used in scikit-learn
-        """
-        if not isinstance(openml_parameter, openml.setups.OpenMLParameter):
-            raise ValueError("openml_parameter should be an instance of OpenMLParameter")
-        if not isinstance(flow, OpenMLFlow):
-            raise ValueError("flow should be an instance of OpenMLFlow")
-
-        flow_structure = flow.get_structure("name")
-        if openml_parameter.flow_name not in flow_structure:
-            raise ValueError("Obtained OpenMLParameter and OpenMLFlow do not correspond. ")
-        name = openml_parameter.flow_name  # for PEP8
-        return "__".join(flow_structure[name] + [openml_parameter.parameter_name])
-
-    ################################################################################################
-    # Methods for hyperparameter optimization
-
-    def _is_hpo_class(self, model: Any) -> bool:
-        """Check whether the model performs hyperparameter optimization.
-
-        Used to check whether an optimization trace can be extracted from the model after
-        running it.
-
-        Parameters
-        ----------
-        model : Any
-
-        Returns
-        -------
-        bool
-        """
-        return isinstance(model, sklearn.model_selection._search.BaseSearchCV)
-
-    def instantiate_model_from_hpo_class(
-        self,
-        model: Any,
-        trace_iteration: OpenMLTraceIteration,
-    ) -> Any:
-        """Instantiate a ``base_estimator`` which can be searched over by the hyperparameter
-        optimization model.
-
-        Parameters
-        ----------
-        model : Any
-            A hyperparameter optimization model which defines the model to be instantiated.
-        trace_iteration : OpenMLTraceIteration
-            Describing the hyperparameter settings to instantiate.
-
-        Returns
-        -------
-        Any
-        """
-        if not self._is_hpo_class(model):
-            raise AssertionError(
-                f"Flow model {model} is not an instance of"
-                " sklearn.model_selection._search.BaseSearchCV",
-            )
-        base_estimator = model.estimator
-        base_estimator.set_params(**trace_iteration.get_parameters())
-        return base_estimator
-
-    def _extract_trace_data(self, model, rep_no, fold_no):
-        """Extracts data from a machine learning model's cross-validation results
-        and creates an ARFF (Attribute-Relation File Format) trace.
-
-        Parameters
-        ----------
-        model : Any
-            A fitted hyperparameter optimization model.
-        rep_no : int
-            The repetition number.
-        fold_no : int
-            The fold number.
-
-        Returns
-        -------
-        A list of ARFF tracecontent.
-        """
-        arff_tracecontent = []
-        for itt_no in range(len(model.cv_results_["mean_test_score"])):
-            # we use the string values for True and False, as it is defined in
-            # this way by the OpenML server
-            selected = "false"
-            if itt_no == model.best_index_:
-                selected = "true"
-            test_score = model.cv_results_["mean_test_score"][itt_no]
-            arff_line = [rep_no, fold_no, itt_no, test_score, selected]
-            for key in model.cv_results_:
-                if key.startswith("param_"):
-                    value = model.cv_results_[key][itt_no]
-                    # Built-in serializer does not convert all numpy types,
-                    # these methods convert them to built-in types instead.
-                    if isinstance(value, np.generic):
-                        # For scalars it actually returns scalars, not a list
-                        value = value.tolist()
-                    serialized_value = json.dumps(value) if value is not np.ma.masked else np.nan
-                    arff_line.append(serialized_value)
-            arff_tracecontent.append(arff_line)
-        return arff_tracecontent
-
-    def _obtain_arff_trace(
-        self,
-        model: Any,
-        trace_content: list,
-    ) -> OpenMLRunTrace:
-        """Create arff trace object from a fitted model and the trace content obtained by
-        repeatedly calling ``run_model_on_task``.
-
-        Parameters
-        ----------
-        model : Any
-            A fitted hyperparameter optimization model.
-
-        trace_content : List[List]
-            Trace content obtained by ``openml.runs.run_flow_on_task``.
-
-        Returns
-        -------
-        OpenMLRunTrace
-        """
-        if not self._is_hpo_class(model):
-            raise AssertionError(
-                f"Flow model {model} is not an instance of "
-                "sklearn.model_selection._search.BaseSearchCV",
-            )
-        if not hasattr(model, "cv_results_"):
-            raise ValueError("model should contain `cv_results_`")
-
-        # attributes that will be in trace arff, regardless of the model
-        trace_attributes = [
-            ("repeat", "NUMERIC"),
-            ("fold", "NUMERIC"),
-            ("iteration", "NUMERIC"),
-            ("evaluation", "NUMERIC"),
-            ("selected", ["true", "false"]),
-        ]
-
-        # model dependent attributes for trace arff
-        for key in model.cv_results_:
-            if key.startswith("param_"):
-                # supported types should include all types, including bool,
-                # int float
-                supported_basic_types = (bool, int, float, str)
-                for param_value in model.cv_results_[key]:
-                    if isinstance(param_value, np.generic):
-                        param_value = param_value.tolist()  # noqa: PLW2901
-                    if (
-                        isinstance(param_value, supported_basic_types)
-                        or param_value is None
-                        or param_value is np.ma.masked
-                    ):
-                        # basic string values
-                        type = "STRING"  # noqa: A001
-                    elif isinstance(param_value, (list, tuple)) and all(
-                        isinstance(i, int) for i in param_value
-                    ):
-                        # list of integers (usually for selecting features)
-                        # hyperparameter layer_sizes of MLPClassifier
-                        type = "STRING"  # noqa: A001
-                    else:
-                        raise TypeError(f"Unsupported param type in param grid: {key}")
-
-                # renamed the attribute param to parameter, as this is a required
-                # OpenML convention - this also guards against name collisions
-                # with the required trace attributes
-                attribute = (PREFIX + key[6:], type)  # type: ignore
-                trace_attributes.append(attribute)
-
-        return OpenMLRunTrace.generate(
-            trace_attributes,
-            trace_content,
-        )

From 9d93485b78df7c9342349bfe06ba2bc20ce7f91f Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Thu, 19 Jun 2025 12:27:21 +0200
Subject: [PATCH 2/8] Remove scikit-learn extension submodule

It will now be hosted in a separate repository
---
 .../test_sklearn_extension/__init__.py        |    0
 .../test_sklearn_extension.py                 | 2422 -----------------
 2 files changed, 2422 deletions(-)
 delete mode 100644 tests/test_extensions/test_sklearn_extension/__init__.py
 delete mode 100644 tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py

diff --git a/tests/test_extensions/test_sklearn_extension/__init__.py b/tests/test_extensions/test_sklearn_extension/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
deleted file mode 100644
index 9913436e4..000000000
--- a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
+++ /dev/null
@@ -1,2422 +0,0 @@
-# License: BSD 3-Clause
-from __future__ import annotations
-
-import collections
-import json
-import os
-import re
-import sys
-import unittest
-import warnings
-from collections import OrderedDict
-from packaging.version import Version
-from typing import Any
-from unittest import mock
-
-import numpy as np
-import pandas as pd
-import pytest
-import scipy.optimize
-import scipy.stats
-import sklearn.base
-import sklearn.cluster
-import sklearn.datasets
-import sklearn.decomposition
-import sklearn.dummy
-import sklearn.ensemble
-import sklearn.feature_selection
-import sklearn.gaussian_process
-import sklearn.linear_model
-import sklearn.model_selection
-import sklearn.naive_bayes
-import sklearn.neural_network
-import sklearn.pipeline
-import sklearn.preprocessing
-import sklearn.tree
-from packaging import version
-from sklearn.pipeline import make_pipeline
-from sklearn.preprocessing import OneHotEncoder, StandardScaler
-
-import openml
-from openml.exceptions import PyOpenMLError
-from openml.extensions.sklearn import SklearnExtension, cat, cont
-from openml.flows import OpenMLFlow
-from openml.flows.functions import assert_flows_equal
-from openml.runs.trace import OpenMLRunTrace
-from openml.testing import CustomImputer, SimpleImputer, TestBase
-
-this_directory = os.path.dirname(os.path.abspath(__file__))
-sys.path.append(this_directory)
-
-
-__version__ = 0.1
-
-
-class Model(sklearn.base.BaseEstimator):
-    def __init__(self, boolean, integer, floating_point_value):
-        self.boolean = boolean
-        self.integer = integer
-        self.floating_point_value = floating_point_value
-
-    def fit(self, X, y):
-        pass
-
-
-def _cat_col_selector(X):
-    return X.select_dtypes(include=["object", "category"]).columns
-
-
-def _get_sklearn_preprocessing():
-    from sklearn.compose import ColumnTransformer
-
-    return [
-        (
-            "cat_handling",
-            ColumnTransformer(
-                transformers=[
-                    (
-                        "cat",
-                        sklearn.pipeline.Pipeline(
-                            [
-                                (
-                                    "cat_si",
-                                    SimpleImputer(
-                                        strategy="constant",
-                                        fill_value="missing",
-                                    ),
-                                ),
-                                ("cat_ohe", OneHotEncoder(handle_unknown="ignore")),
-                            ],
-                        ),
-                        _cat_col_selector,
-                    )
-                ],
-                remainder="passthrough",
-            ),
-        ),
-        ("imp", SimpleImputer()),
-    ]
-
-
-class TestSklearnExtensionFlowFunctions(TestBase):
-    # Splitting not helpful, these test's don't rely on the server and take less
-    # than 1 seconds
-
-    def setUp(self):
-        super().setUp(n_levels=2)
-        iris = sklearn.datasets.load_iris()
-        self.X = iris.data
-        self.y = iris.target
-
-        self.extension = SklearnExtension()
-
-    def _get_expected_pipeline_description(self, model: Any) -> str:
-        if version.parse(sklearn.__version__) >= version.parse("1.0"):
-            expected_fixture = (
-                "Pipeline of transforms with a final estimator.\n\nSequentially"
-                " apply a list of transforms and a final estimator.\n"
-                "Intermediate steps of the pipeline must be 'transforms', that "
-                "is, they\nmust implement `fit` and `transform` methods.\nThe final "
-                "estimator only needs to implement `fit`.\nThe transformers in "
-                "the pipeline can be cached using ``memory`` argument.\n\nThe "
-                "purpose of the pipeline is to assemble several steps that can "
-                "be\ncross-validated together while setting different parameters"
-                ". For this, it\nenables setting parameters of the various steps"
-                " using their names and the\nparameter name separated by a `'__'`,"
-                " as in the example below. A step's\nestimator may be replaced "
-                "entirely by setting the parameter with its name\nto another "
-                "estimator, or a transformer removed by setting it to\n"
-                "`'passthrough'` or `None`."
-            )
-        elif version.parse(sklearn.__version__) >= version.parse("0.21.0"):
-            expected_fixture = (
-                "Pipeline of transforms with a final estimator.\n\nSequentially"
-                " apply a list of transforms and a final estimator.\n"
-                "Intermediate steps of the pipeline must be 'transforms', that "
-                "is, they\nmust implement fit and transform methods.\nThe final "
-                "estimator only needs to implement fit.\nThe transformers in "
-                "the pipeline can be cached using ``memory`` argument.\n\nThe "
-                "purpose of the pipeline is to assemble several steps that can "
-                "be\ncross-validated together while setting different parameters"
-                ".\nFor this, it enables setting parameters of the various steps"
-                " using their\nnames and the parameter name separated by a '__',"
-                " as in the example below.\nA step's estimator may be replaced "
-                "entirely by setting the parameter\nwith its name to another "
-                "estimator, or a transformer removed by setting\nit to "
-                "'passthrough' or ``None``."
-            )
-        else:
-            expected_fixture = self.extension._get_sklearn_description(model)
-        return expected_fixture
-
-    def _serialization_test_helper(
-        self,
-        model,
-        X,
-        y,
-        subcomponent_parameters,
-        dependencies_mock_call_count=(1, 2),
-    ):
-        # Regex pattern for memory addresses of style 0x7f8e0f31ecf8
-        pattern = re.compile("0x[0-9a-f]{12}")
-
-        with mock.patch.object(self.extension, "_check_dependencies") as check_dependencies_mock:
-            serialization = self.extension.model_to_flow(model)
-
-            if X is not None:
-                model.fit(X, y)
-
-            new_model = self.extension.flow_to_model(serialization)
-            # compares string representations of the dict, as it potentially
-            # contains complex objects that can not be compared with == op
-            assert re.sub(pattern, str(model.get_params()), "") == re.sub(
-                pattern, str(new_model.get_params()), ""
-            )
-
-            assert type(new_model) == type(model)
-            assert new_model is not model
-
-            if X is not None:
-                new_model.fit(self.X, self.y)
-
-            assert check_dependencies_mock.call_count == dependencies_mock_call_count[0]
-
-            xml = serialization._to_dict()
-            new_model2 = self.extension.flow_to_model(OpenMLFlow._from_dict(xml))
-            assert re.sub(pattern, str(model.get_params()), "") == re.sub(
-                pattern, str(new_model2.get_params()), ""
-            )
-
-            assert type(new_model2) == type(model)
-            assert new_model2 is not model
-
-            if X is not None:
-                new_model2.fit(self.X, self.y)
-
-            assert check_dependencies_mock.call_count == dependencies_mock_call_count[1]
-
-            if subcomponent_parameters:
-                for nm in (new_model, new_model2):
-                    new_model_params = nm.get_params()
-                    model_params = model.get_params()
-                    for subcomponent_parameter in subcomponent_parameters:
-                        assert type(new_model_params[subcomponent_parameter]) == type(
-                            model_params[subcomponent_parameter]
-                        )
-                        assert (
-                            new_model_params[subcomponent_parameter]
-                            is not model_params[subcomponent_parameter]
-                        )
-                        del new_model_params[subcomponent_parameter]
-                        del model_params[subcomponent_parameter]
-                    assert new_model_params == model_params
-
-            return serialization, new_model
-
-    @pytest.mark.sklearn()
-    def test_serialize_model(self):
-        max_features = "auto" if Version(sklearn.__version__) < Version("1.3") else "sqrt"
-        model = sklearn.tree.DecisionTreeClassifier(
-            criterion="entropy",
-            max_features=max_features,
-            max_leaf_nodes=2000,
-        )
-
-        tree_name = "tree" if Version(sklearn.__version__) < Version("0.22") else "_classes"
-        fixture_name = f"sklearn.tree.{tree_name}.DecisionTreeClassifier"
-        fixture_short_name = "sklearn.DecisionTreeClassifier"
-        # str obtained from self.extension._get_sklearn_description(model)
-        fixture_description = "A decision tree classifier."
-        version_fixture = self.extension._min_dependency_str(sklearn.__version__)
-
-        presort_val = "false" if Version(sklearn.__version__) < Version("0.22") else '"deprecated"'
-        # min_impurity_decrease has been introduced in 0.20
-        # min_impurity_split has been deprecated in 0.20
-        if Version(sklearn.__version__) < Version("0.19"):
-            fixture_parameters = OrderedDict(
-                (
-                    ("class_weight", "null"),
-                    ("criterion", '"entropy"'),
-                    ("max_depth", "null"),
-                    ("max_features", '"auto"'),
-                    ("max_leaf_nodes", "2000"),
-                    ("min_impurity_split", "1e-07"),
-                    ("min_samples_leaf", "1"),
-                    ("min_samples_split", "2"),
-                    ("min_weight_fraction_leaf", "0.0"),
-                    ("presort", "false"),
-                    ("random_state", "null"),
-                    ("splitter", '"best"'),
-                ),
-            )
-        elif Version(sklearn.__version__) < Version("1.0"):
-            fixture_parameters = OrderedDict(
-                (
-                    ("class_weight", "null"),
-                    ("criterion", '"entropy"'),
-                    ("max_depth", "null"),
-                    ("max_features", '"auto"'),
-                    ("max_leaf_nodes", "2000"),
-                    ("min_impurity_decrease", "0.0"),
-                    ("min_impurity_split", "null"),
-                    ("min_samples_leaf", "1"),
-                    ("min_samples_split", "2"),
-                    ("min_weight_fraction_leaf", "0.0"),
-                    ("presort", presort_val),
-                    ("random_state", "null"),
-                    ("splitter", '"best"'),
-                ),
-            )
-        elif Version(sklearn.__version__) < Version("1.4"):
-            fixture_parameters = OrderedDict(
-                (
-                    ("class_weight", "null"),
-                    ("criterion", '"entropy"'),
-                    ("max_depth", "null"),
-                    ("max_features", f'"{max_features}"'),
-                    ("max_leaf_nodes", "2000"),
-                    ("min_impurity_decrease", "0.0"),
-                    ("min_samples_leaf", "1"),
-                    ("min_samples_split", "2"),
-                    ("min_weight_fraction_leaf", "0.0"),
-                    ("presort", presort_val),
-                    ("random_state", "null"),
-                    ("splitter", '"best"'),
-                ),
-            )
-        else:
-            fixture_parameters = OrderedDict(
-                (
-                    ("class_weight", "null"),
-                    ("criterion", '"entropy"'),
-                    ("max_depth", "null"),
-                    ("max_features", f'"{max_features}"'),
-                    ("max_leaf_nodes", "2000"),
-                    ("min_impurity_decrease", "0.0"),
-                    ("min_samples_leaf", "1"),
-                    ("min_samples_split", "2"),
-                    ("min_weight_fraction_leaf", "0.0"),
-                    ("presort", presort_val),
-                    ("monotonic_cst", "null"),
-                    ("random_state", "null"),
-                    ("splitter", '"best"'),
-                ),
-            )
-
-        if Version(sklearn.__version__) >= Version("0.22"):
-            fixture_parameters.update({"ccp_alpha": "0.0"})
-            fixture_parameters.move_to_end("ccp_alpha", last=False)
-        if Version(sklearn.__version__) >= Version("0.24"):
-            del fixture_parameters["presort"]
-
-        structure_fixture = {f"sklearn.tree.{tree_name}.DecisionTreeClassifier": []}
-
-        serialization, _ = self._serialization_test_helper(
-            model,
-            X=self.X,
-            y=self.y,
-            subcomponent_parameters=None,
-        )
-        structure = serialization.get_structure("name")
-
-        assert serialization.name == fixture_name
-        assert serialization.class_name == fixture_name
-        assert serialization.custom_name == fixture_short_name
-        assert serialization.description == fixture_description
-        assert serialization.parameters == fixture_parameters
-        assert serialization.dependencies == version_fixture
-        self.assertDictEqual(structure, structure_fixture)
-
-    @pytest.mark.sklearn()
-    @pytest.mark.production()
-    def test_can_handle_flow(self):
-        openml.config.server = self.production_server
-
-        R_flow = openml.flows.get_flow(6794)
-        assert not self.extension.can_handle_flow(R_flow)
-        old_3rd_party_flow = openml.flows.get_flow(7660)
-        assert self.extension.can_handle_flow(old_3rd_party_flow)
-
-        openml.config.server = self.test_server
-
-    @pytest.mark.sklearn()
-    def test_serialize_model_clustering(self):
-        model = sklearn.cluster.KMeans()
-
-        sklearn_version = Version(sklearn.__version__)
-        cluster_name = "k_means_" if sklearn_version < Version("0.22") else "_kmeans"
-        fixture_name = f"sklearn.cluster.{cluster_name}.KMeans"
-        fixture_short_name = "sklearn.KMeans"
-        # str obtained from self.extension._get_sklearn_description(model)
-        fixture_description = "K-Means clustering{}".format(
-            "" if sklearn_version < Version("0.22") else ".",
-        )
-        version_fixture = self.extension._min_dependency_str(sklearn.__version__)
-
-        n_jobs_val = "1"
-        if sklearn_version >= Version("0.20"):
-            n_jobs_val = "null"
-        if sklearn_version >= Version("0.23"):
-            n_jobs_val = '"deprecated"'
-
-        precomp_val = '"auto"' if sklearn_version < Version("0.23") else '"deprecated"'
-        n_init = "10"
-        if sklearn_version >= Version("1.2"):
-            n_init = '"warn"'
-        if sklearn_version >= Version("1.4"):
-            n_init = '"auto"'
-
-        algorithm = '"auto"' if sklearn_version < Version("1.1") else '"lloyd"'
-        fixture_parameters = OrderedDict(
-            [
-                ("algorithm", algorithm),
-                ("copy_x", "true"),
-                ("init", '"k-means++"'),
-                ("max_iter", "300"),
-                ("n_clusters", "8"),
-                ("n_init", n_init),
-                ("n_jobs", n_jobs_val),
-                ("precompute_distances", precomp_val),
-                ("random_state", "null"),
-                ("tol", "0.0001"),
-                ("verbose", "0"),
-            ]
-        )
-
-        if sklearn_version >= Version("1.0"):
-            fixture_parameters.pop("n_jobs")
-            fixture_parameters.pop("precompute_distances")
-
-        fixture_structure = {f"sklearn.cluster.{cluster_name}.KMeans": []}
-
-        serialization, _ = self._serialization_test_helper(
-            model,
-            X=None,
-            y=None,
-            subcomponent_parameters=None,
-        )
-        structure = serialization.get_structure("name")
-
-        assert serialization.name == fixture_name
-        assert serialization.class_name == fixture_name
-        assert serialization.custom_name == fixture_short_name
-        assert serialization.description == fixture_description
-        assert serialization.parameters == fixture_parameters
-        assert serialization.dependencies == version_fixture
-        assert structure == fixture_structure
-
-    @pytest.mark.sklearn()
-    def test_serialize_model_with_subcomponent(self):
-        estimator_name = (
-            "base_estimator" if Version(sklearn.__version__) < Version("1.4") else "estimator"
-        )
-        estimator_param = {estimator_name: sklearn.tree.DecisionTreeClassifier()}
-        model = sklearn.ensemble.AdaBoostClassifier(
-            n_estimators=100,
-            **estimator_param,
-        )
-
-        weight_name = "{}weight_boosting".format(
-            "" if Version(sklearn.__version__) < Version("0.22") else "_",
-        )
-        tree_name = "tree" if Version(sklearn.__version__) < Version("0.22") else "_classes"
-        fixture_name = (
-            f"sklearn.ensemble.{weight_name}.AdaBoostClassifier"
-            f"({estimator_name}=sklearn.tree.{tree_name}.DecisionTreeClassifier)"
-        )
-        fixture_class_name = f"sklearn.ensemble.{weight_name}.AdaBoostClassifier"
-        fixture_short_name = "sklearn.AdaBoostClassifier"
-        # str obtained from self.extension._get_sklearn_description(model)
-        fixture_description = (
-            "An AdaBoost classifier.\n\nAn AdaBoost [1] classifier is a "
-            "meta-estimator that begins by fitting a\nclassifier on the original"
-            " dataset and then fits additional copies of the\nclassifier on the "
-            "same dataset but where the weights of incorrectly\nclassified "
-            "instances are adjusted such that subsequent classifiers focus\nmore"
-            " on difficult cases.\n\nThis class implements the algorithm known "
-            "as AdaBoost-SAMME [2]."
-        )
-        fixture_subcomponent_name = f"sklearn.tree.{tree_name}.DecisionTreeClassifier"
-        fixture_subcomponent_class_name = f"sklearn.tree.{tree_name}.DecisionTreeClassifier"
-        # str obtained from self.extension._get_sklearn_description(model.base_estimator)
-        fixture_subcomponent_description = "A decision tree classifier."
-        fixture_structure = {
-            fixture_name: [],
-            f"sklearn.tree.{tree_name}.DecisionTreeClassifier": [estimator_name],
-        }
-
-        serialization, _ = self._serialization_test_helper(
-            model,
-            X=self.X,
-            y=self.y,
-            subcomponent_parameters=[estimator_name],
-            dependencies_mock_call_count=(2, 4),
-        )
-        structure = serialization.get_structure("name")
-
-        assert serialization.name == fixture_name
-        assert serialization.class_name == fixture_class_name
-        assert serialization.custom_name == fixture_short_name
-        if Version(sklearn.__version__) < Version("1.4"):
-            assert serialization.description == fixture_description
-        assert serialization.parameters["algorithm"] == '"SAMME.R"'
-        assert isinstance(serialization.parameters[estimator_name], str)
-        assert serialization.parameters["learning_rate"] == "1.0"
-        assert serialization.parameters["n_estimators"] == "100"
-        assert serialization.components[estimator_name].name == fixture_subcomponent_name
-        assert (
-            serialization.components[estimator_name].class_name == fixture_subcomponent_class_name
-        )
-        assert (
-            serialization.components[estimator_name].description == fixture_subcomponent_description
-        )
-        self.assertDictEqual(structure, fixture_structure)
-
-    @pytest.mark.sklearn()
-    def test_serialize_pipeline(self):
-        scaler = sklearn.preprocessing.StandardScaler(with_mean=False)
-        dummy = sklearn.dummy.DummyClassifier(strategy="prior")
-        model = sklearn.pipeline.Pipeline(steps=[("scaler", scaler), ("dummy", dummy)])
-
-        scaler_name = "data" if Version(sklearn.__version__) < Version("0.22") else "_data"
-        fixture_name = (
-            "sklearn.pipeline.Pipeline("
-            f"scaler=sklearn.preprocessing.{scaler_name}.StandardScaler,"
-            "dummy=sklearn.dummy.DummyClassifier)"
-        )
-        fixture_short_name = "sklearn.Pipeline(StandardScaler,DummyClassifier)"
-        fixture_description = self._get_expected_pipeline_description(model)
-        fixture_structure = {
-            fixture_name: [],
-            f"sklearn.preprocessing.{scaler_name}.StandardScaler": ["scaler"],
-            "sklearn.dummy.DummyClassifier": ["dummy"],
-        }
-
-        serialization, new_model = self._serialization_test_helper(
-            model,
-            X=self.X,
-            y=self.y,
-            subcomponent_parameters=["scaler", "dummy", "steps"],
-            dependencies_mock_call_count=(3, 6),
-        )
-        structure = serialization.get_structure("name")
-
-        assert serialization.name == fixture_name
-        assert serialization.custom_name == fixture_short_name
-        if Version(sklearn.__version__) < Version("1.3"):
-            # Newer versions of scikit-learn have update docstrings
-            assert serialization.description == fixture_description
-        self.assertDictEqual(structure, fixture_structure)
-
-        # Comparing the pipeline
-        # The parameters only have the name of base objects(not the whole flow)
-        # as value
-        # memory parameter has been added in 0.19, verbose in 0.21
-        if Version(sklearn.__version__) < Version("0.19"):
-            assert len(serialization.parameters) == 1
-        elif Version(sklearn.__version__) < Version("0.21"):
-            assert len(serialization.parameters) == 2
-        else:
-            assert len(serialization.parameters) == 3
-
-        # Hard to compare two representations of a dict due to possibly
-        # different sorting. Making a json makes it easier
-        assert json.loads(serialization.parameters["steps"]) == [
-            {
-                "oml-python:serialized_object": "component_reference",
-                "value": {"key": "scaler", "step_name": "scaler"},
-            },
-            {
-                "oml-python:serialized_object": "component_reference",
-                "value": {"key": "dummy", "step_name": "dummy"},
-            },
-        ]
-
-        # Checking the sub-component
-        assert len(serialization.components) == 2
-        assert isinstance(serialization.components["scaler"], OpenMLFlow)
-        assert isinstance(serialization.components["dummy"], OpenMLFlow)
-
-        assert [step[0] for step in new_model.steps] == [step[0] for step in model.steps]
-        assert new_model.steps[0][1] is not model.steps[0][1]
-        assert new_model.steps[1][1] is not model.steps[1][1]
-
-    @pytest.mark.sklearn()
-    def test_serialize_pipeline_clustering(self):
-        scaler = sklearn.preprocessing.StandardScaler(with_mean=False)
-        km = sklearn.cluster.KMeans()
-        model = sklearn.pipeline.Pipeline(steps=[("scaler", scaler), ("clusterer", km)])
-
-        scaler_name = "data" if Version(sklearn.__version__) < Version("0.22") else "_data"
-        cluster_name = "k_means_" if Version(sklearn.__version__) < Version("0.22") else "_kmeans"
-        fixture_name = (
-            "sklearn.pipeline.Pipeline("
-            f"scaler=sklearn.preprocessing.{scaler_name}.StandardScaler,"
-            f"clusterer=sklearn.cluster.{cluster_name}.KMeans)"
-        )
-        fixture_short_name = "sklearn.Pipeline(StandardScaler,KMeans)"
-        fixture_description = self._get_expected_pipeline_description(model)
-        fixture_structure = {
-            fixture_name: [],
-            f"sklearn.preprocessing.{scaler_name}.StandardScaler": ["scaler"],
-            f"sklearn.cluster.{cluster_name}.KMeans": ["clusterer"],
-        }
-        serialization, new_model = self._serialization_test_helper(
-            model,
-            X=None,
-            y=None,
-            subcomponent_parameters=["scaler", "steps", "clusterer"],
-            dependencies_mock_call_count=(3, 6),
-        )
-        structure = serialization.get_structure("name")
-
-        assert serialization.name == fixture_name
-        assert serialization.custom_name == fixture_short_name
-        if Version(sklearn.__version__) < Version("1.3"):
-            # Newer versions of scikit-learn have update docstrings
-            assert serialization.description == fixture_description
-        self.assertDictEqual(structure, fixture_structure)
-
-        # Comparing the pipeline
-        # The parameters only have the name of base objects(not the whole flow)
-        # as value
-        # memory parameter has been added in 0.19
-        if Version(sklearn.__version__) < Version("0.19"):
-            assert len(serialization.parameters) == 1
-        elif Version(sklearn.__version__) < Version("0.21"):
-            assert len(serialization.parameters) == 2
-        else:
-            assert len(serialization.parameters) == 3
-        # Hard to compare two representations of a dict due to possibly
-        # different sorting. Making a json makes it easier
-        assert json.loads(serialization.parameters["steps"]) == [
-            {
-                "oml-python:serialized_object": "component_reference",
-                "value": {"key": "scaler", "step_name": "scaler"},
-            },
-            {
-                "oml-python:serialized_object": "component_reference",
-                "value": {"key": "clusterer", "step_name": "clusterer"},
-            },
-        ]
-
-        # Checking the sub-component
-        assert len(serialization.components) == 2
-        assert isinstance(serialization.components["scaler"], OpenMLFlow)
-        assert isinstance(serialization.components["clusterer"], OpenMLFlow)
-
-        assert [step[0] for step in new_model.steps] == [step[0] for step in model.steps]
-        assert new_model.steps[0][1] is not model.steps[0][1]
-        assert new_model.steps[1][1] is not model.steps[1][1]
-
-    @pytest.mark.sklearn()
-    @unittest.skipIf(
-        Version(sklearn.__version__) < Version("0.20"),
-        reason="columntransformer introduction in 0.20.0",
-    )
-    def test_serialize_column_transformer(self):
-        # temporary local import, dependend on version 0.20
-        import sklearn.compose
-
-        model = sklearn.compose.ColumnTransformer(
-            transformers=[
-                ("numeric", sklearn.preprocessing.StandardScaler(), [0, 1, 2]),
-                (
-                    "nominal",
-                    sklearn.preprocessing.OneHotEncoder(handle_unknown="ignore"),
-                    [3, 4, 5],
-                ),
-                ("drop", "drop", [6, 7, 8]),
-            ],
-            remainder="passthrough",
-        )
-
-        scaler_name = "data" if Version(sklearn.__version__) < Version("0.22") else "_data"
-        fixture = (
-            "sklearn.compose._column_transformer.ColumnTransformer("
-            f"numeric=sklearn.preprocessing.{scaler_name}.StandardScaler,"
-            "nominal=sklearn.preprocessing._encoders.OneHotEncoder,drop=drop)"
-        )
-        fixture_short_name = "sklearn.ColumnTransformer"
-
-        if version.parse(sklearn.__version__) >= version.parse("0.21.0"):
-            # str obtained from self.extension._get_sklearn_description(model)
-            fixture_description = (
-                "Applies transformers to columns of an array or pandas "
-                "DataFrame.\n\nThis estimator allows different columns or "
-                "column subsets of the input\nto be transformed separately and "
-                "the features generated by each transformer\nwill be "
-                "concatenated to form a single feature space.\nThis is useful "
-                "for heterogeneous or columnar data, to combine several\nfeature"
-                " extraction mechanisms or transformations into a single "
-                "transformer."
-            )
-        else:
-            fixture_description = self.extension._get_sklearn_description(model)
-
-        fixture_structure = {
-            fixture: [],
-            f"sklearn.preprocessing.{scaler_name}.StandardScaler": ["numeric"],
-            "sklearn.preprocessing._encoders.OneHotEncoder": ["nominal"],
-            "drop": ["drop"],
-        }
-
-        serialization = self.extension.model_to_flow(model)
-        structure = serialization.get_structure("name")
-        assert serialization.name == fixture
-        assert serialization.custom_name == fixture_short_name
-        assert serialization.description == fixture_description
-        self.assertDictEqual(structure, fixture_structure)
-
-    @pytest.mark.sklearn()
-    @unittest.skipIf(
-        Version(sklearn.__version__) < Version("0.20"),
-        reason="columntransformer introduction in 0.20.0",
-    )
-    def test_serialize_column_transformer_pipeline(self):
-        # temporary local import, dependend on version 0.20
-        import sklearn.compose
-
-        inner = sklearn.compose.ColumnTransformer(
-            transformers=[
-                ("numeric", sklearn.preprocessing.StandardScaler(), [0, 1, 2]),
-                (
-                    "nominal",
-                    sklearn.preprocessing.OneHotEncoder(handle_unknown="ignore"),
-                    [3, 4, 5],
-                ),
-            ],
-            remainder="passthrough",
-        )
-        model = sklearn.pipeline.Pipeline(
-            steps=[("transformer", inner), ("classifier", sklearn.tree.DecisionTreeClassifier())],
-        )
-        scaler_name = "data" if Version(sklearn.__version__) < Version("0.22") else "_data"
-        tree_name = "tree" if Version(sklearn.__version__) < Version("0.22") else "_classes"
-        fixture_name = (
-            "sklearn.pipeline.Pipeline("
-            "transformer=sklearn.compose._column_transformer."
-            "ColumnTransformer("
-            f"numeric=sklearn.preprocessing.{scaler_name}.StandardScaler,"
-            "nominal=sklearn.preprocessing._encoders.OneHotEncoder),"
-            f"classifier=sklearn.tree.{tree_name}.DecisionTreeClassifier)"
-        )
-        fixture_structure = {
-            f"sklearn.preprocessing.{scaler_name}.StandardScaler": [
-                "transformer",
-                "numeric",
-            ],
-            "sklearn.preprocessing._encoders.OneHotEncoder": ["transformer", "nominal"],
-            "sklearn.compose._column_transformer.ColumnTransformer(numeric="
-            f"sklearn.preprocessing.{scaler_name}.StandardScaler,nominal=sklearn."
-            "preprocessing._encoders.OneHotEncoder)": ["transformer"],
-            f"sklearn.tree.{tree_name}.DecisionTreeClassifier": ["classifier"],
-            fixture_name: [],
-        }
-
-        fixture_description = self._get_expected_pipeline_description(model)
-        serialization, new_model = self._serialization_test_helper(
-            model,
-            X=None,
-            y=None,
-            subcomponent_parameters=(
-                "transformer",
-                "classifier",
-                "transformer__transformers",
-                "steps",
-                "transformer__nominal",
-                "transformer__numeric",
-            ),
-            dependencies_mock_call_count=(5, 10),
-        )
-        structure = serialization.get_structure("name")
-        assert serialization.name == fixture_name
-        if Version(sklearn.__version__) < Version("1.3"):  # Not yet up-to-date for later versions
-            assert serialization.description == fixture_description
-        self.assertDictEqual(structure, fixture_structure)
-
-    @pytest.mark.sklearn()
-    @unittest.skipIf(
-        Version(sklearn.__version__) < Version("0.20"),
-        reason="Pipeline processing behaviour updated",
-    )
-    def test_serialize_feature_union(self):
-        sparse_parameter = (
-            "sparse" if Version(sklearn.__version__) < Version("1.4") else "sparse_output"
-        )
-        ohe_params = {sparse_parameter: False}
-        if Version(sklearn.__version__) >= Version("0.20"):
-            ohe_params["categories"] = "auto"
-        ohe = sklearn.preprocessing.OneHotEncoder(**ohe_params)
-        scaler = sklearn.preprocessing.StandardScaler()
-
-        fu = sklearn.pipeline.FeatureUnion(transformer_list=[("ohe", ohe), ("scaler", scaler)])
-        serialization, new_model = self._serialization_test_helper(
-            fu,
-            X=self.X,
-            y=self.y,
-            subcomponent_parameters=("ohe", "scaler", "transformer_list"),
-            dependencies_mock_call_count=(3, 6),
-        )
-        structure = serialization.get_structure("name")
-        # OneHotEncoder was moved to _encoders module in 0.20
-        module_name_encoder = (
-            "_encoders" if Version(sklearn.__version__) >= Version("0.20") else "data"
-        )
-        scaler_name = "data" if Version(sklearn.__version__) < Version("0.22") else "_data"
-        fixture_name = (
-            "sklearn.pipeline.FeatureUnion("
-            f"ohe=sklearn.preprocessing.{module_name_encoder}.OneHotEncoder,"
-            f"scaler=sklearn.preprocessing.{scaler_name}.StandardScaler)"
-        )
-        fixture_structure = {
-            fixture_name: [],
-            f"sklearn.preprocessing.{module_name_encoder}.OneHotEncoder": ["ohe"],
-            f"sklearn.preprocessing.{scaler_name}.StandardScaler": ["scaler"],
-        }
-        assert serialization.name == fixture_name
-        self.assertDictEqual(structure, fixture_structure)
-        assert new_model.transformer_list[0][0] == fu.transformer_list[0][0]
-        assert (
-            new_model.transformer_list[0][1].get_params() == fu.transformer_list[0][1].get_params()
-        )
-        assert new_model.transformer_list[1][0] == fu.transformer_list[1][0]
-        assert (
-            new_model.transformer_list[1][1].get_params() == fu.transformer_list[1][1].get_params()
-        )
-
-        assert [step[0] for step in new_model.transformer_list] == [
-            step[0] for step in fu.transformer_list
-        ]
-        assert new_model.transformer_list[0][1] is not fu.transformer_list[0][1]
-        assert new_model.transformer_list[1][1] is not fu.transformer_list[1][1]
-
-        fu.set_params(scaler="drop")
-        serialization, new_model = self._serialization_test_helper(
-            fu,
-            X=self.X,
-            y=self.y,
-            subcomponent_parameters=("ohe", "transformer_list"),
-            dependencies_mock_call_count=(3, 6),
-        )
-        assert (
-            serialization.name == "sklearn.pipeline.FeatureUnion("
-            f"ohe=sklearn.preprocessing.{module_name_encoder}.OneHotEncoder,"
-            "scaler=drop)"
-        )
-        assert new_model.transformer_list[1][1] == "drop"
-
-    @pytest.mark.sklearn()
-    def test_serialize_feature_union_switched_names(self):
-        ohe_params = (
-            {"categories": "auto"} if Version(sklearn.__version__) >= Version("0.20") else {}
-        )
-        ohe = sklearn.preprocessing.OneHotEncoder(**ohe_params)
-        scaler = sklearn.preprocessing.StandardScaler()
-        fu1 = sklearn.pipeline.FeatureUnion(transformer_list=[("ohe", ohe), ("scaler", scaler)])
-        fu2 = sklearn.pipeline.FeatureUnion(transformer_list=[("scaler", ohe), ("ohe", scaler)])
-
-        fu1_serialization, _ = self._serialization_test_helper(
-            fu1,
-            X=None,
-            y=None,
-            subcomponent_parameters=(),
-            dependencies_mock_call_count=(3, 6),
-        )
-        fu2_serialization, _ = self._serialization_test_helper(
-            fu2,
-            X=None,
-            y=None,
-            subcomponent_parameters=(),
-            dependencies_mock_call_count=(3, 6),
-        )
-
-        # OneHotEncoder was moved to _encoders module in 0.20
-        module_name_encoder = (
-            "_encoders" if Version(sklearn.__version__) >= Version("0.20") else "data"
-        )
-        scaler_name = "data" if Version(sklearn.__version__) < Version("0.22") else "_data"
-        assert (
-            fu1_serialization.name == "sklearn.pipeline.FeatureUnion("
-            f"ohe=sklearn.preprocessing.{module_name_encoder}.OneHotEncoder,"
-            f"scaler=sklearn.preprocessing.{scaler_name}.StandardScaler)"
-        )
-        assert (
-            fu2_serialization.name == "sklearn.pipeline.FeatureUnion("
-            f"scaler=sklearn.preprocessing.{module_name_encoder}.OneHotEncoder,"
-            f"ohe=sklearn.preprocessing.{scaler_name}.StandardScaler)"
-        )
-
-    @pytest.mark.sklearn()
-    @unittest.skipIf(
-        Version(sklearn.__version__) >= Version("1.4"),
-        "AdaBoost parameter name changed as did the way its forwarded to GridSearchCV",
-    )
-    def test_serialize_complex_flow(self):
-        ohe = sklearn.preprocessing.OneHotEncoder(handle_unknown="ignore")
-        scaler = sklearn.preprocessing.StandardScaler(with_mean=False)
-        boosting = sklearn.ensemble.AdaBoostClassifier(
-            base_estimator=sklearn.tree.DecisionTreeClassifier(),
-        )
-        model = sklearn.pipeline.Pipeline(
-            steps=[("ohe", ohe), ("scaler", scaler), ("boosting", boosting)],
-        )
-        parameter_grid = {
-            "boosting__base_estimator__max_depth": scipy.stats.randint(1, 10),
-            "boosting__learning_rate": scipy.stats.uniform(0.01, 0.99),
-            "boosting__n_estimators": [1, 5, 10, 100],
-        }
-        # convert to ordered dict, sorted by keys) due to param grid check
-        parameter_grid = OrderedDict(sorted(parameter_grid.items()))
-        cv = sklearn.model_selection.StratifiedKFold(n_splits=5, shuffle=True)
-        rs = sklearn.model_selection.RandomizedSearchCV(
-            estimator=model,
-            param_distributions=parameter_grid,
-            cv=cv,
-        )
-        serialized, new_model = self._serialization_test_helper(
-            rs,
-            X=self.X,
-            y=self.y,
-            subcomponent_parameters=(),
-            dependencies_mock_call_count=(6, 12),
-        )
-        structure = serialized.get_structure("name")
-        # OneHotEncoder was moved to _encoders module in 0.20
-        module_name_encoder = (
-            "_encoders" if Version(sklearn.__version__) >= Version("0.20") else "data"
-        )
-        ohe_name = f"sklearn.preprocessing.{module_name_encoder}.OneHotEncoder"
-        scaler_name = "sklearn.preprocessing.{}.StandardScaler".format(
-            "data" if Version(sklearn.__version__) < Version("0.22") else "_data",
-        )
-        tree_name = "sklearn.tree.{}.DecisionTreeClassifier".format(
-            "tree" if Version(sklearn.__version__) < Version("0.22") else "_classes",
-        )
-        weight_name = "weight" if Version(sklearn.__version__) < Version("0.22") else "_weight"
-        boosting_name = "sklearn.ensemble.{}_boosting.AdaBoostClassifier(base_estimator={})".format(
-            weight_name,
-            tree_name,
-        )
-        pipeline_name = "sklearn.pipeline.Pipeline(ohe={},scaler={},boosting={})".format(
-            ohe_name,
-            scaler_name,
-            boosting_name,
-        )
-        fixture_name = (
-            f"sklearn.model_selection._search.RandomizedSearchCV(estimator={pipeline_name})"
-        )
-        fixture_structure = {
-            ohe_name: ["estimator", "ohe"],
-            scaler_name: ["estimator", "scaler"],
-            tree_name: ["estimator", "boosting", "base_estimator"],
-            boosting_name: ["estimator", "boosting"],
-            pipeline_name: ["estimator"],
-            fixture_name: [],
-        }
-        assert serialized.name == fixture_name
-        assert structure == fixture_structure
-
-    @pytest.mark.sklearn()
-    @unittest.skipIf(
-        Version(sklearn.__version__) < Version("0.21"),
-        reason="Pipeline till 0.20 doesn't support 'passthrough'",
-    )
-    def test_serialize_strings_as_pipeline_steps(self):
-        import sklearn.compose
-
-        # First check: test whether a passthrough in a pipeline is serialized correctly
-        model = sklearn.pipeline.Pipeline(steps=[("transformer", "passthrough")])
-        serialized = self.extension.model_to_flow(model)
-        assert isinstance(serialized, OpenMLFlow)
-        assert len(serialized.components) == 1
-        assert serialized.components["transformer"].name == "passthrough"
-        serialized = self.extension._serialize_sklearn(
-            ("transformer", "passthrough"),
-            parent_model=model,
-        )
-        assert serialized == ("transformer", "passthrough")
-        extracted_info = self.extension._extract_information_from_model(model)
-        assert len(extracted_info[2]) == 1
-        assert isinstance(extracted_info[2]["transformer"], OpenMLFlow)
-        assert extracted_info[2]["transformer"].name == "passthrough"
-
-        # Second check: test whether a lone passthrough in a column transformer is serialized
-        # correctly
-        model = sklearn.compose.ColumnTransformer([("passthrough", "passthrough", (0,))])
-        serialized = self.extension.model_to_flow(model)
-        assert isinstance(serialized, OpenMLFlow)
-        assert len(serialized.components) == 1
-        assert serialized.components["passthrough"].name == "passthrough"
-        serialized = self.extension._serialize_sklearn(
-            ("passthrough", "passthrough"),
-            parent_model=model,
-        )
-        assert serialized == ("passthrough", "passthrough")
-        extracted_info = self.extension._extract_information_from_model(model)
-        assert len(extracted_info[2]) == 1
-        assert isinstance(extracted_info[2]["passthrough"], OpenMLFlow)
-        assert extracted_info[2]["passthrough"].name == "passthrough"
-
-        # Third check: passthrough and drop in a column transformer
-        model = sklearn.compose.ColumnTransformer(
-            [("passthrough", "passthrough", (0,)), ("drop", "drop", (1,))],
-        )
-        serialized = self.extension.model_to_flow(model)
-        assert isinstance(serialized, OpenMLFlow)
-        assert len(serialized.components) == 2
-        assert serialized.components["passthrough"].name == "passthrough"
-        assert serialized.components["drop"].name == "drop"
-        serialized = self.extension._serialize_sklearn(
-            ("passthrough", "passthrough"),
-            parent_model=model,
-        )
-        assert serialized == ("passthrough", "passthrough")
-        extracted_info = self.extension._extract_information_from_model(model)
-        assert len(extracted_info[2]) == 2
-        assert isinstance(extracted_info[2]["passthrough"], OpenMLFlow)
-        assert isinstance(extracted_info[2]["drop"], OpenMLFlow)
-        assert extracted_info[2]["passthrough"].name == "passthrough"
-        assert extracted_info[2]["drop"].name == "drop"
-
-        # Fourth check: having an actual preprocessor in the column transformer, too
-        model = sklearn.compose.ColumnTransformer(
-            [
-                ("passthrough", "passthrough", (0,)),
-                ("drop", "drop", (1,)),
-                ("test", sklearn.preprocessing.StandardScaler(), (2,)),
-            ],
-        )
-        serialized = self.extension.model_to_flow(model)
-        assert isinstance(serialized, OpenMLFlow)
-        assert len(serialized.components) == 3
-        assert serialized.components["passthrough"].name == "passthrough"
-        assert serialized.components["drop"].name == "drop"
-        serialized = self.extension._serialize_sklearn(
-            ("passthrough", "passthrough"),
-            parent_model=model,
-        )
-        assert serialized == ("passthrough", "passthrough")
-        extracted_info = self.extension._extract_information_from_model(model)
-        assert len(extracted_info[2]) == 3
-        assert isinstance(extracted_info[2]["passthrough"], OpenMLFlow)
-        assert isinstance(extracted_info[2]["drop"], OpenMLFlow)
-        assert extracted_info[2]["passthrough"].name == "passthrough"
-        assert extracted_info[2]["drop"].name == "drop"
-
-        # Fifth check: test whether a lone drop in a feature union is serialized correctly
-        model = sklearn.pipeline.FeatureUnion([("drop", "drop")])
-        serialized = self.extension.model_to_flow(model)
-        assert isinstance(serialized, OpenMLFlow)
-        assert len(serialized.components) == 1
-        assert serialized.components["drop"].name == "drop"
-        serialized = self.extension._serialize_sklearn(("drop", "drop"), parent_model=model)
-        assert serialized == ("drop", "drop")
-        extracted_info = self.extension._extract_information_from_model(model)
-        assert len(extracted_info[2]) == 1
-        assert isinstance(extracted_info[2]["drop"], OpenMLFlow)
-        assert extracted_info[2]["drop"].name == "drop"
-
-    @pytest.mark.sklearn()
-    def test_serialize_type(self):
-        supported_types = [float, np.float32, np.float64, int, np.int32, np.int64]
-        if Version(np.__version__) < Version("1.24"):
-            supported_types.append(float)
-            supported_types.append(int)
-
-        for supported_type in supported_types:
-            serialized = self.extension.model_to_flow(supported_type)
-            deserialized = self.extension.flow_to_model(serialized)
-            assert deserialized == supported_type
-
-    @pytest.mark.sklearn()
-    def test_serialize_rvs(self):
-        supported_rvs = [
-            scipy.stats.norm(loc=1, scale=5),
-            scipy.stats.expon(loc=1, scale=5),
-            scipy.stats.randint(low=-3, high=15),
-        ]
-
-        for supported_rv in supported_rvs:
-            serialized = self.extension.model_to_flow(supported_rv)
-            deserialized = self.extension.flow_to_model(serialized)
-            assert type(deserialized.dist) == type(supported_rv.dist)
-            del deserialized.dist
-            del supported_rv.dist
-            assert deserialized.__dict__ == supported_rv.__dict__
-
-    @pytest.mark.sklearn()
-    def test_serialize_function(self):
-        serialized = self.extension.model_to_flow(sklearn.feature_selection.chi2)
-        deserialized = self.extension.flow_to_model(serialized)
-        assert deserialized == sklearn.feature_selection.chi2
-
-    @pytest.mark.sklearn()
-    def test_serialize_cvobject(self):
-        methods = [sklearn.model_selection.KFold(3), sklearn.model_selection.LeaveOneOut()]
-        fixtures = [
-            OrderedDict(
-                [
-                    ("oml-python:serialized_object", "cv_object"),
-                    (
-                        "value",
-                        OrderedDict(
-                            [
-                                ("name", "sklearn.model_selection._split.KFold"),
-                                (
-                                    "parameters",
-                                    OrderedDict(
-                                        [
-                                            ("n_splits", "3"),
-                                            ("random_state", "null"),
-                                            ("shuffle", "false"),
-                                        ],
-                                    ),
-                                ),
-                            ],
-                        ),
-                    ),
-                ],
-            ),
-            OrderedDict(
-                [
-                    ("oml-python:serialized_object", "cv_object"),
-                    (
-                        "value",
-                        OrderedDict(
-                            [
-                                ("name", "sklearn.model_selection._split.LeaveOneOut"),
-                                ("parameters", OrderedDict()),
-                            ],
-                        ),
-                    ),
-                ],
-            ),
-        ]
-        for method, fixture in zip(methods, fixtures):
-            m = self.extension.model_to_flow(method)
-            assert m == fixture
-
-            m_new = self.extension.flow_to_model(m)
-            assert m_new is not m
-            assert isinstance(m_new, type(method))
-
-    @pytest.mark.sklearn()
-    def test_serialize_simple_parameter_grid(self):
-        # We cannot easily test for scipy random variables in here, but they
-        # should be covered
-
-        # Examples from the scikit-learn documentation
-        models = [sklearn.svm.SVC(), sklearn.ensemble.RandomForestClassifier()]
-        grids = [
-            [
-                OrderedDict([("C", [1, 10, 100, 1000]), ("kernel", ["linear"])]),
-                OrderedDict(
-                    [("C", [1, 10, 100, 1000]), ("gamma", [0.001, 0.0001]), ("kernel", ["rbf"])],
-                ),
-            ],
-            OrderedDict(
-                [
-                    ("bootstrap", [True, False]),
-                    ("criterion", ["gini", "entropy"]),
-                    ("max_depth", [3, None]),
-                    ("max_features", [1, 3, 10]),
-                    ("min_samples_leaf", [1, 3, 10]),
-                    ("min_samples_split", [1, 3, 10]),
-                ],
-            ),
-        ]
-
-        for grid, model in zip(grids, models):
-            serialized = self.extension.model_to_flow(grid)
-            deserialized = self.extension.flow_to_model(serialized)
-
-            assert deserialized == grid
-            assert deserialized is not grid
-            # providing error_score because nan != nan
-            hpo = sklearn.model_selection.GridSearchCV(
-                param_grid=grid,
-                estimator=model,
-                error_score=-1000,
-            )
-
-            serialized = self.extension.model_to_flow(hpo)
-            deserialized = self.extension.flow_to_model(serialized)
-            assert hpo.param_grid == deserialized.param_grid
-            assert hpo.estimator.get_params() == deserialized.estimator.get_params()
-            hpo_params = hpo.get_params(deep=False)
-            deserialized_params = deserialized.get_params(deep=False)
-            del hpo_params["estimator"]
-            del deserialized_params["estimator"]
-            assert hpo_params == deserialized_params
-
-    @pytest.mark.sklearn()
-    @unittest.skip(
-        "This feature needs further reworking. If we allow several "
-        "components, we need to register them all in the downstream "
-        "flows. This is so far not implemented.",
-    )
-    def test_serialize_advanced_grid(self):
-        # TODO instead a GridSearchCV object should be serialized
-
-        # This needs to be in its own function because we cannot simply check
-        # for the equality of the grid, because scikit-learn objects don't
-        # really support the equality operator
-        # This will only work with sklearn==0.18
-        N_FEATURES_OPTIONS = [2, 4, 8]
-        C_OPTIONS = [1, 10, 100, 1000]
-        grid = [
-            {
-                "reduce_dim": [
-                    sklearn.decomposition.PCA(iterated_power=7),
-                    sklearn.decomposition.NMF(),
-                ],
-                "reduce_dim__n_components": N_FEATURES_OPTIONS,
-                "classify__C": C_OPTIONS,
-            },
-            {
-                "reduce_dim": [
-                    sklearn.feature_selection.SelectKBest(sklearn.feature_selection.chi2),
-                ],
-                "reduce_dim__k": N_FEATURES_OPTIONS,
-                "classify__C": C_OPTIONS,
-            },
-        ]
-
-        serialized = self.extension.model_to_flow(grid)
-        deserialized = self.extension.flow_to_model(serialized)
-
-        assert (
-            grid[0]["reduce_dim"][0].get_params() == deserialized[0]["reduce_dim"][0].get_params()
-        )
-        assert grid[0]["reduce_dim"][0] is not deserialized[0]["reduce_dim"][0]
-        assert (
-            grid[0]["reduce_dim"][1].get_params() == deserialized[0]["reduce_dim"][1].get_params()
-        )
-        assert grid[0]["reduce_dim"][1] is not deserialized[0]["reduce_dim"][1]
-        assert grid[0]["reduce_dim__n_components"] == deserialized[0]["reduce_dim__n_components"]
-        assert grid[0]["classify__C"] == deserialized[0]["classify__C"]
-        assert (
-            grid[1]["reduce_dim"][0].get_params() == deserialized[1]["reduce_dim"][0].get_params()
-        )
-        assert grid[1]["reduce_dim"][0] is not deserialized[1]["reduce_dim"][0]
-        assert grid[1]["reduce_dim__k"] == deserialized[1]["reduce_dim__k"]
-        assert grid[1]["classify__C"] == deserialized[1]["classify__C"]
-
-    @pytest.mark.sklearn()
-    def test_serialize_advanced_grid_fails(self):
-        # This unit test is checking that the test we skip above would actually fail
-
-        param_grid = {
-            "base_estimator": [
-                sklearn.tree.DecisionTreeClassifier(),
-                sklearn.tree.ExtraTreeClassifier(),
-            ],
-        }
-
-        clf = sklearn.model_selection.GridSearchCV(
-            sklearn.ensemble.BaggingClassifier(),
-            param_grid=param_grid,
-        )
-        with pytest.raises(
-            TypeError,
-            match=re.compile(r".*OpenML.*Flow.*is not JSON serializable", flags=re.DOTALL),
-        ):
-            self.extension.model_to_flow(clf)
-
-    @pytest.mark.sklearn()
-    def test_serialize_resampling(self):
-        kfold = sklearn.model_selection.StratifiedKFold(n_splits=4, shuffle=True)
-        serialized = self.extension.model_to_flow(kfold)
-        deserialized = self.extension.flow_to_model(serialized)
-        # Best approximation to get_params()
-        assert str(deserialized) == str(kfold)
-        assert deserialized is not kfold
-
-    @pytest.mark.sklearn()
-    def test_hypothetical_parameter_values(self):
-        # The hypothetical parameter values of true, 1, 0.1 formatted as a
-        # string (and their correct serialization and deserialization) an only
-        #  be checked inside a model
-
-        model = Model("true", "1", "0.1")
-
-        serialized = self.extension.model_to_flow(model)
-        serialized.external_version = "sklearn==test123"
-        deserialized = self.extension.flow_to_model(serialized)
-        assert deserialized.get_params() == model.get_params()
-        assert deserialized is not model
-
-    @pytest.mark.sklearn()
-    def test_gaussian_process(self):
-        opt = scipy.optimize.fmin_l_bfgs_b
-        kernel = sklearn.gaussian_process.kernels.Matern()
-        gp = sklearn.gaussian_process.GaussianProcessClassifier(kernel=kernel, optimizer=opt)
-        with pytest.raises(
-            TypeError,
-            match=r"Matern\(length_scale=1, nu=1.5\), <class 'sklearn.gaussian_process.kernels.Matern'>",
-        ):
-            self.extension.model_to_flow(gp)
-
-    @pytest.mark.sklearn()
-    def test_error_on_adding_component_multiple_times_to_flow(self):
-        # this function implicitly checks
-        # - openml.flows._check_multiple_occurence_of_component_in_flow()
-        pca = sklearn.decomposition.PCA()
-        pca2 = sklearn.decomposition.PCA()
-        pipeline = sklearn.pipeline.Pipeline((("pca1", pca), ("pca2", pca2)))
-        fixture = "Found a second occurence of component .*.PCA when trying to serialize Pipeline"
-        with pytest.raises(ValueError, match=fixture):
-            self.extension.model_to_flow(pipeline)
-
-        fu = sklearn.pipeline.FeatureUnion((("pca1", pca), ("pca2", pca2)))
-        fixture = (
-            "Found a second occurence of component .*.PCA when trying to serialize FeatureUnion"
-        )
-        with pytest.raises(ValueError, match=fixture):
-            self.extension.model_to_flow(fu)
-
-        fs = sklearn.feature_selection.SelectKBest()
-        fu2 = sklearn.pipeline.FeatureUnion((("pca1", pca), ("fs", fs)))
-        pipeline2 = sklearn.pipeline.Pipeline((("fu", fu2), ("pca2", pca2)))
-        fixture = "Found a second occurence of component .*.PCA when trying to serialize Pipeline"
-        with pytest.raises(ValueError, match=fixture):
-            self.extension.model_to_flow(pipeline2)
-
-    @pytest.mark.sklearn()
-    def test_subflow_version_propagated(self):
-        this_directory = os.path.dirname(os.path.abspath(__file__))
-        tests_directory = os.path.abspath(os.path.join(this_directory, "..", ".."))
-        sys.path.append(tests_directory)
-        import tests.test_flows.dummy_learn.dummy_forest
-
-        pca = sklearn.decomposition.PCA()
-        dummy = tests.test_flows.dummy_learn.dummy_forest.DummyRegressor()
-        pipeline = sklearn.pipeline.Pipeline((("pca", pca), ("dummy", dummy)))
-        flow = self.extension.model_to_flow(pipeline)
-        # In python2.7, the unit tests work differently on travis-ci; therefore,
-        # I put the alternative travis-ci answer here as well. While it has a
-        # different value, it is still correct as it is a propagation of the
-        # subclasses' module name
-        assert flow.external_version == "{},{},{}".format(
-            self.extension._format_external_version("openml", openml.__version__),
-            self.extension._format_external_version("sklearn", sklearn.__version__),
-            self.extension._format_external_version("tests", "0.1"),
-        )
-
-    @pytest.mark.sklearn()
-    @mock.patch("warnings.warn")
-    def test_check_dependencies(self, warnings_mock):
-        dependencies = ["sklearn==0.1", "sklearn>=99.99.99", "sklearn>99.99.99"]
-        for dependency in dependencies:
-            self.assertRaises(ValueError, self.extension._check_dependencies, dependency)
-
-    @pytest.mark.sklearn()
-    def test_illegal_parameter_names(self):
-        # illegal name: estimators
-        clf1 = sklearn.ensemble.VotingClassifier(
-            estimators=[
-                ("estimators", sklearn.ensemble.RandomForestClassifier()),
-                ("whatevs", sklearn.ensemble.ExtraTreesClassifier()),
-            ],
-        )
-        clf2 = sklearn.ensemble.VotingClassifier(
-            estimators=[
-                ("whatevs", sklearn.ensemble.RandomForestClassifier()),
-                ("estimators", sklearn.ensemble.ExtraTreesClassifier()),
-            ],
-        )
-        cases = [clf1, clf2]
-
-        for case in cases:
-            self.assertRaises(PyOpenMLError, self.extension.model_to_flow, case)
-
-    @pytest.mark.sklearn()
-    def test_paralizable_check(self):
-        # using this model should pass the test (if param distribution is
-        # legal)
-        singlecore_bagging = sklearn.ensemble.BaggingClassifier()
-        # using this model should return false (if param distribution is legal)
-        multicore_bagging = sklearn.ensemble.BaggingClassifier(n_jobs=5)
-        # using this param distribution should raise an exception
-        illegal_param_dist = {"base__n_jobs": [-1, 0, 1]}
-        # using this param distribution should not raise an exception
-        legal_param_dist = {"n_estimators": [2, 3, 4]}
-
-        estimator_name = (
-            "base_estimator" if Version(sklearn.__version__) < Version("1.4") else "estimator"
-        )
-        legal_models = [
-            sklearn.ensemble.RandomForestClassifier(),
-            sklearn.ensemble.RandomForestClassifier(n_jobs=5),
-            sklearn.ensemble.RandomForestClassifier(n_jobs=-1),
-            sklearn.pipeline.Pipeline(
-                steps=[("bag", sklearn.ensemble.BaggingClassifier(n_jobs=1))],
-            ),
-            sklearn.pipeline.Pipeline(
-                steps=[("bag", sklearn.ensemble.BaggingClassifier(n_jobs=5))],
-            ),
-            sklearn.pipeline.Pipeline(
-                steps=[("bag", sklearn.ensemble.BaggingClassifier(n_jobs=-1))],
-            ),
-            sklearn.model_selection.GridSearchCV(singlecore_bagging, legal_param_dist),
-            sklearn.model_selection.GridSearchCV(multicore_bagging, legal_param_dist),
-            sklearn.ensemble.BaggingClassifier(
-                n_jobs=-1,
-                **{estimator_name: sklearn.ensemble.RandomForestClassifier(n_jobs=5)},
-            ),
-        ]
-        illegal_models = [
-            sklearn.model_selection.GridSearchCV(singlecore_bagging, illegal_param_dist),
-            sklearn.model_selection.GridSearchCV(multicore_bagging, illegal_param_dist),
-        ]
-
-        if Version(sklearn.__version__) < Version("0.20"):
-            has_refit_time = [False, False, False, False, False, False, False, False, False]
-        else:
-            has_refit_time = [False, False, False, False, False, False, True, True, False]
-
-        X, y = sklearn.datasets.load_iris(return_X_y=True)
-        for model, refit_time in zip(legal_models, has_refit_time):
-            model.fit(X, y)
-            assert refit_time == hasattr(model, "refit_time_")
-
-        for model in illegal_models:
-            with pytest.raises(PyOpenMLError):
-                self.extension._prevent_optimize_n_jobs(model)
-
-    @pytest.mark.sklearn()
-    def test__get_fn_arguments_with_defaults(self):
-        sklearn_version = Version(sklearn.__version__)
-        if sklearn_version < Version("0.19"):
-            fns = [
-                (sklearn.ensemble.RandomForestRegressor.__init__, 15),
-                (sklearn.tree.DecisionTreeClassifier.__init__, 12),
-                (sklearn.pipeline.Pipeline.__init__, 0),
-            ]
-        elif sklearn_version < Version("0.21"):
-            fns = [
-                (sklearn.ensemble.RandomForestRegressor.__init__, 16),
-                (sklearn.tree.DecisionTreeClassifier.__init__, 13),
-                (sklearn.pipeline.Pipeline.__init__, 1),
-            ]
-        elif sklearn_version < Version("0.22"):
-            fns = [
-                (sklearn.ensemble.RandomForestRegressor.__init__, 16),
-                (sklearn.tree.DecisionTreeClassifier.__init__, 13),
-                (sklearn.pipeline.Pipeline.__init__, 2),
-            ]
-        elif sklearn_version < Version("0.23"):
-            fns = [
-                (sklearn.ensemble.RandomForestRegressor.__init__, 18),
-                (sklearn.tree.DecisionTreeClassifier.__init__, 14),
-                (sklearn.pipeline.Pipeline.__init__, 2),
-            ]
-        elif sklearn_version < Version("0.24"):
-            fns = [
-                (sklearn.ensemble.RandomForestRegressor.__init__, 18),
-                (sklearn.tree.DecisionTreeClassifier.__init__, 14),
-                (sklearn.pipeline.Pipeline.__init__, 2),
-            ]
-        elif sklearn_version < Version("1.0"):
-            fns = [
-                (sklearn.ensemble.RandomForestRegressor.__init__, 18),
-                (sklearn.tree.DecisionTreeClassifier.__init__, 13),
-                (sklearn.pipeline.Pipeline.__init__, 2),
-            ]
-        elif sklearn_version < Version("1.4"):
-            fns = [
-                (sklearn.ensemble.RandomForestRegressor.__init__, 17),
-                (sklearn.tree.DecisionTreeClassifier.__init__, 12),
-                (sklearn.pipeline.Pipeline.__init__, 2),
-            ]
-        else:
-            fns = [
-                (sklearn.ensemble.RandomForestRegressor.__init__, 18),
-                (sklearn.tree.DecisionTreeClassifier.__init__, 13),
-                (sklearn.pipeline.Pipeline.__init__, 2),
-            ]
-
-        for fn, num_params_with_defaults in fns:
-            defaults, defaultless = self.extension._get_fn_arguments_with_defaults(fn)
-            assert isinstance(defaults, dict)
-            assert isinstance(defaultless, set)
-            # check whether we have both defaults and defaultless params
-            assert len(defaults) == num_params_with_defaults
-            assert len(defaultless) > 0
-            # check no overlap
-            self.assertSetEqual(set(defaults.keys()), set(defaults.keys()) - defaultless)
-            self.assertSetEqual(defaultless, defaultless - set(defaults.keys()))
-
-    @pytest.mark.sklearn()
-    def test_deserialize_with_defaults(self):
-        # used the 'initialize_with_defaults' flag of the deserialization
-        # method to return a flow that contains default hyperparameter
-        # settings.
-        steps = [
-            ("Imputer", SimpleImputer()),
-            ("OneHotEncoder", sklearn.preprocessing.OneHotEncoder()),
-            ("Estimator", sklearn.tree.DecisionTreeClassifier()),
-        ]
-        pipe_orig = sklearn.pipeline.Pipeline(steps=steps)
-
-        pipe_adjusted = sklearn.clone(pipe_orig)
-        if Version(sklearn.__version__) < Version("0.23"):
-            params = {
-                "Imputer__strategy": "median",
-                "OneHotEncoder__sparse": False,
-                "Estimator__min_samples_leaf": 42,
-            }
-        elif Version(sklearn.__version__) < Version("1.4"):
-            params = {
-                "Imputer__strategy": "mean",
-                "OneHotEncoder__sparse": True,
-                "Estimator__min_samples_leaf": 1,
-            }
-        else:
-            params = {
-                "Imputer__strategy": "mean",
-                "OneHotEncoder__sparse_output": True,
-                "Estimator__min_samples_leaf": 1,
-            }
-        pipe_adjusted.set_params(**params)
-        flow = self.extension.model_to_flow(pipe_adjusted)
-        pipe_deserialized = self.extension.flow_to_model(flow, initialize_with_defaults=True)
-
-        # we want to compare pipe_deserialized and pipe_orig. We use the flow
-        # equals function for this
-        assert_flows_equal(
-            self.extension.model_to_flow(pipe_orig),
-            self.extension.model_to_flow(pipe_deserialized),
-        )
-
-    @pytest.mark.sklearn()
-    def test_deserialize_adaboost_with_defaults(self):
-        # used the 'initialize_with_defaults' flag of the deserialization
-        # method to return a flow that contains default hyperparameter
-        # settings.
-        steps = [
-            ("Imputer", SimpleImputer()),
-            ("OneHotEncoder", sklearn.preprocessing.OneHotEncoder()),
-            (
-                "Estimator",
-                sklearn.ensemble.AdaBoostClassifier(sklearn.tree.DecisionTreeClassifier()),
-            ),
-        ]
-        pipe_orig = sklearn.pipeline.Pipeline(steps=steps)
-
-        pipe_adjusted = sklearn.clone(pipe_orig)
-        if Version(sklearn.__version__) < Version("0.22"):
-            params = {
-                "Imputer__strategy": "median",
-                "OneHotEncoder__sparse": False,
-                "Estimator__n_estimators": 10,
-            }
-        elif Version(sklearn.__version__) < Version("1.4"):
-            params = {
-                "Imputer__strategy": "mean",
-                "OneHotEncoder__sparse": True,
-                "Estimator__n_estimators": 50,
-            }
-        else:
-            params = {
-                "Imputer__strategy": "mean",
-                "OneHotEncoder__sparse_output": True,
-                "Estimator__n_estimators": 50,
-            }
-        pipe_adjusted.set_params(**params)
-        flow = self.extension.model_to_flow(pipe_adjusted)
-        pipe_deserialized = self.extension.flow_to_model(flow, initialize_with_defaults=True)
-
-        # we want to compare pipe_deserialized and pipe_orig. We use the flow
-        # equals function for this
-        assert_flows_equal(
-            self.extension.model_to_flow(pipe_orig),
-            self.extension.model_to_flow(pipe_deserialized),
-        )
-
-    @pytest.mark.sklearn()
-    def test_deserialize_complex_with_defaults(self):
-        # used the 'initialize_with_defaults' flag of the deserialization
-        # method to return a flow that contains default hyperparameter
-        # settings.
-        steps = [
-            ("Imputer", SimpleImputer()),
-            ("OneHotEncoder", sklearn.preprocessing.OneHotEncoder()),
-            (
-                "Estimator",
-                sklearn.ensemble.AdaBoostClassifier(
-                    sklearn.ensemble.BaggingClassifier(
-                        sklearn.ensemble.GradientBoostingClassifier(),
-                    ),
-                ),
-            ),
-        ]
-        pipe_orig = sklearn.pipeline.Pipeline(steps=steps)
-
-        pipe_adjusted = sklearn.clone(pipe_orig)
-        impute_strategy = "median" if Version(sklearn.__version__) < Version("0.23") else "mean"
-        sparse = Version(sklearn.__version__) >= Version("0.23")
-        sparse_parameter = (
-            "sparse" if Version(sklearn.__version__) < Version("1.4") else "sparse_output"
-        )
-        estimator_name = (
-            "base_estimator" if Version(sklearn.__version__) < Version("1.2") else "estimator"
-        )
-        params = {
-            "Imputer__strategy": impute_strategy,
-            f"OneHotEncoder__{sparse_parameter}": sparse,
-            "Estimator__n_estimators": 10,
-            f"Estimator__{estimator_name}__n_estimators": 10,
-            f"Estimator__{estimator_name}__{estimator_name}__learning_rate": 0.1,
-        }
-
-        pipe_adjusted.set_params(**params)
-        flow = self.extension.model_to_flow(pipe_adjusted)
-        pipe_deserialized = self.extension.flow_to_model(flow, initialize_with_defaults=True)
-
-        # we want to compare pipe_deserialized and pipe_orig. We use the flow
-        # equals function for this
-        assert_flows_equal(
-            self.extension.model_to_flow(pipe_orig),
-            self.extension.model_to_flow(pipe_deserialized),
-        )
-
-    @pytest.mark.sklearn()
-    def test_openml_param_name_to_sklearn(self):
-        scaler = sklearn.preprocessing.StandardScaler(with_mean=False)
-        estimator_name = (
-            "base_estimator" if Version(sklearn.__version__) < Version("1.4") else "estimator"
-        )
-        boosting = sklearn.ensemble.AdaBoostClassifier(
-            **{estimator_name: sklearn.tree.DecisionTreeClassifier()},
-        )
-        model = sklearn.pipeline.Pipeline(steps=[("scaler", scaler), ("boosting", boosting)])
-        flow = self.extension.model_to_flow(model)
-        task = openml.tasks.get_task(115)  # diabetes; crossvalidation
-        run = openml.runs.run_flow_on_task(flow, task)
-        run = run.publish()
-        TestBase._mark_entity_for_removal("run", run.run_id)
-        TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {run.run_id}")
-        run = openml.runs.get_run(run.run_id)
-        setup = openml.setups.get_setup(run.setup_id)
-
-        # make sure to test enough parameters
-        assert len(setup.parameters) > 15
-
-        for parameter in setup.parameters.values():
-            sklearn_name = self.extension._openml_param_name_to_sklearn(parameter, flow)
-
-            # test the inverse. Currently, OpenML stores the hyperparameter
-            # fullName as flow.name + flow.version + parameter.name on the
-            # server (but this behaviour is not documented and might or might
-            # not change in the future. Hence, we won't offer this
-            # transformation functionality in the main package yet.)
-            splitted = sklearn_name.split("__")
-            if len(splitted) > 1:  # if len is 1, it is part of root flow
-                subflow = flow.get_subflow(splitted[0:-1])
-            else:
-                subflow = flow
-            openml_name = f"{subflow.name}({subflow.version})_{splitted[-1]}"
-            assert parameter.full_name == openml_name
-
-    @pytest.mark.sklearn()
-    def test_obtain_parameter_values_flow_not_from_server(self):
-        model = sklearn.linear_model.LogisticRegression(solver="lbfgs")
-        flow = self.extension.model_to_flow(model)
-        logistic_name = (
-            "logistic" if Version(sklearn.__version__) < Version("0.22") else "_logistic"
-        )
-        msg = f"Flow sklearn.linear_model.{logistic_name}.LogisticRegression has no flow_id!"
-
-        with pytest.raises(ValueError, match=msg):
-            self.extension.obtain_parameter_values(flow)
-
-        estimator_name = (
-            "base_estimator" if Version(sklearn.__version__) < Version("1.4") else "estimator"
-        )
-        model = sklearn.ensemble.AdaBoostClassifier(
-            **{
-                estimator_name: sklearn.linear_model.LogisticRegression(
-                    solver="lbfgs",
-                ),
-            }
-        )
-        flow = self.extension.model_to_flow(model)
-        flow.flow_id = 1
-        with pytest.raises(ValueError, match=msg):
-            self.extension.obtain_parameter_values(flow)
-
-    @pytest.mark.sklearn()
-    def test_obtain_parameter_values(self):
-        model = sklearn.model_selection.RandomizedSearchCV(
-            estimator=sklearn.ensemble.RandomForestClassifier(n_estimators=5),
-            param_distributions={
-                "max_depth": [3, None],
-                "max_features": [1, 2, 3, 4],
-                "min_samples_split": [2, 3, 4, 5, 6, 7, 8, 9, 10],
-                "min_samples_leaf": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
-                "bootstrap": [True, False],
-                "criterion": ["gini", "entropy"],
-            },
-            cv=sklearn.model_selection.StratifiedKFold(n_splits=2, random_state=1, shuffle=True),
-            n_iter=5,
-        )
-        flow = self.extension.model_to_flow(model)
-        flow.flow_id = 1
-        flow.components["estimator"].flow_id = 2
-        parameters = self.extension.obtain_parameter_values(flow)
-        for parameter in parameters:
-            assert parameter["oml:component"] is not None, parameter
-            if parameter["oml:name"] == "n_estimators":
-                assert parameter["oml:value"] == "5"
-                assert parameter["oml:component"] == 2
-
-    @pytest.mark.sklearn()
-    def test_numpy_type_allowed_in_flow(self):
-        """Simple numpy types should be serializable."""
-        dt = sklearn.tree.DecisionTreeClassifier(
-            max_depth=np.float64(3.0),
-            min_samples_leaf=np.int32(5),
-        )
-        self.extension.model_to_flow(dt)
-
-    @pytest.mark.sklearn()
-    def test_numpy_array_not_allowed_in_flow(self):
-        """Simple numpy arrays should not be serializable."""
-        bin = sklearn.preprocessing.MultiLabelBinarizer(classes=np.asarray([1, 2, 3]))
-        with pytest.raises(TypeError):
-            self.extension.model_to_flow(bin)
-
-
-class TestSklearnExtensionRunFunctions(TestBase):
-    _multiprocess_can_split_ = True
-
-    def setUp(self):
-        super().setUp(n_levels=2)
-        self.extension = SklearnExtension()
-
-    ################################################################################################
-    # Test methods for performing runs with this extension module
-
-    @pytest.mark.sklearn()
-    def test_run_model_on_task(self):
-        task = openml.tasks.get_task(1)  # anneal; crossvalidation
-        # using most_frequent imputer since dataset has mixed types and to keep things simple
-        pipe = sklearn.pipeline.Pipeline(
-            [
-                ("imp", SimpleImputer(strategy="most_frequent")),
-                ("dummy", sklearn.dummy.DummyClassifier()),
-            ],
-        )
-        openml.runs.run_model_on_task(pipe, task)
-
-    @pytest.mark.sklearn()
-    def test_seed_model(self):
-        # randomized models that are initialized without seeds, can be seeded
-        randomized_clfs = [
-            sklearn.ensemble.BaggingClassifier(),
-            sklearn.model_selection.RandomizedSearchCV(
-                sklearn.ensemble.RandomForestClassifier(),
-                {
-                    "max_depth": [3, None],
-                    "max_features": [1, 2, 3, 4],
-                    "bootstrap": [True, False],
-                    "criterion": ["gini", "entropy"],
-                    "random_state": [-1, 0, 1, 2],
-                },
-                cv=sklearn.model_selection.StratifiedKFold(n_splits=2, shuffle=True),
-            ),
-            sklearn.dummy.DummyClassifier(),
-        ]
-
-        for idx, clf in enumerate(randomized_clfs):
-            const_probe = 42
-            all_params = clf.get_params()
-            params = [key for key in all_params if key.endswith("random_state")]
-            assert len(params) > 0
-
-            # before param value is None
-            for param in params:
-                assert all_params[param] is None
-
-            # now seed the params
-            clf_seeded = self.extension.seed_model(clf, const_probe)
-            new_params = clf_seeded.get_params()
-
-            randstate_params = [key for key in new_params if key.endswith("random_state")]
-
-            # afterwards, param value is set
-            for param in randstate_params:
-                assert isinstance(new_params[param], int)
-                assert new_params[param] is not None
-
-            if idx == 1:
-                assert clf.cv.random_state == 56422
-
-    @pytest.mark.sklearn()
-    def test_seed_model_raises(self):
-        # the _set_model_seed_where_none should raise exception if random_state is
-        # anything else than an int
-        randomized_clfs = [
-            sklearn.ensemble.BaggingClassifier(random_state=np.random.RandomState(42)),
-            sklearn.dummy.DummyClassifier(random_state="OpenMLIsGreat"),
-        ]
-
-        for clf in randomized_clfs:
-            with pytest.raises(ValueError):
-                self.extension.seed_model(model=clf, seed=42)
-
-    @pytest.mark.sklearn()
-    def test_run_model_on_fold_classification_1_array(self):
-        task = openml.tasks.get_task(1)  # anneal; crossvalidation
-
-        X, y = task.get_X_and_y()
-        train_indices, test_indices = task.get_train_test_split_indices(repeat=0, fold=0, sample=0)
-        X_train = X.iloc[train_indices]
-        y_train = y.iloc[train_indices]
-        X_test = X.iloc[test_indices]
-        y_test = y.iloc[test_indices]
-
-        pipeline = sklearn.pipeline.Pipeline(
-            steps=[*_get_sklearn_preprocessing(), ("clf", sklearn.tree.DecisionTreeClassifier())],
-        )
-        # TODO add some mocking here to actually test the innards of this function, too!
-        res = self.extension._run_model_on_fold(
-            model=pipeline,
-            task=task,
-            fold_no=0,
-            rep_no=0,
-            X_train=X_train,
-            y_train=y_train,
-            X_test=X_test,
-        )
-
-        y_hat, y_hat_proba, user_defined_measures, trace = res
-
-        # predictions
-        assert isinstance(y_hat, np.ndarray)
-        assert y_hat.shape == y_test.shape
-        assert isinstance(y_hat_proba, pd.DataFrame)
-        assert y_hat_proba.shape == (y_test.shape[0], 6)
-        np.testing.assert_array_almost_equal(np.sum(y_hat_proba, axis=1), np.ones(y_test.shape))
-        # The class '4' (at index 3) is not present in the training data. We check that the
-        # predicted probabilities for that class are zero!
-        np.testing.assert_array_almost_equal(
-            y_hat_proba.iloc[:, 3].to_numpy(),
-            np.zeros(y_test.shape),
-        )
-        for i in (0, 1, 2, 4, 5):
-            assert np.any(y_hat_proba.iloc[:, i].to_numpy() != np.zeros(y_test.shape))
-
-        # check user defined measures
-        fold_evaluations: dict[str, dict[int, dict[int, float]]] = collections.defaultdict(
-            lambda: collections.defaultdict(dict)
-        )
-        for measure in user_defined_measures:
-            fold_evaluations[measure][0][0] = user_defined_measures[measure]
-
-        # trace. SGD does not produce any
-        assert trace is None
-
-        self._check_fold_timing_evaluations(
-            fold_evaluations,
-            num_repeats=1,
-            num_folds=1,
-            task_type=task.task_type_id,
-            check_scores=False,
-        )
-
-    @pytest.mark.sklearn()
-    @unittest.skipIf(
-        Version(sklearn.__version__) < Version("0.21"),
-        reason="SimpleImputer, ColumnTransformer available only after 0.19 and "
-        "Pipeline till 0.20 doesn't support indexing and 'passthrough'",
-    )
-    def test_run_model_on_fold_classification_1_dataframe(self):
-        from sklearn.compose import ColumnTransformer
-
-        task = openml.tasks.get_task(1)  # anneal; crossvalidation
-
-        # diff test_run_model_on_fold_classification_1_array()
-        X, y = task.get_X_and_y()
-        train_indices, test_indices = task.get_train_test_split_indices(repeat=0, fold=0, sample=0)
-        X_train = X.iloc[train_indices]
-        y_train = y.iloc[train_indices]
-        X_test = X.iloc[test_indices]
-        y_test = y.iloc[test_indices]
-
-        # Helper functions to return required columns for ColumnTransformer
-        sparse = {
-            "sparse" if Version(sklearn.__version__) < Version("1.4") else "sparse_output": False
-        }
-        cat_imp = make_pipeline(
-            SimpleImputer(strategy="most_frequent"),
-            OneHotEncoder(handle_unknown="ignore", **sparse),
-        )
-        cont_imp = make_pipeline(CustomImputer(strategy="mean"), StandardScaler())
-        ct = ColumnTransformer([("cat", cat_imp, cat), ("cont", cont_imp, cont)])
-        pipeline = sklearn.pipeline.Pipeline(
-            steps=[("transform", ct), ("estimator", sklearn.tree.DecisionTreeClassifier())],
-        )
-        # TODO add some mocking here to actually test the innards of this function, too!
-        res = self.extension._run_model_on_fold(
-            model=pipeline,
-            task=task,
-            fold_no=0,
-            rep_no=0,
-            X_train=X_train,
-            y_train=y_train,
-            X_test=X_test,
-        )
-
-        y_hat, y_hat_proba, user_defined_measures, trace = res
-
-        # predictions
-        assert isinstance(y_hat, np.ndarray)
-        assert y_hat.shape == y_test.shape
-        assert isinstance(y_hat_proba, pd.DataFrame)
-        assert y_hat_proba.shape == (y_test.shape[0], 6)
-        np.testing.assert_array_almost_equal(np.sum(y_hat_proba, axis=1), np.ones(y_test.shape))
-        # The class '4' (at index 3) is not present in the training data. We check that the
-        # predicted probabilities for that class are zero!
-        np.testing.assert_array_almost_equal(
-            y_hat_proba.iloc[:, 3].to_numpy(),
-            np.zeros(y_test.shape),
-        )
-        for i in (0, 1, 2, 4, 5):
-            assert np.any(y_hat_proba.iloc[:, i].to_numpy() != np.zeros(y_test.shape))
-
-        # check user defined measures
-        fold_evaluations: dict[str, dict[int, dict[int, float]]] = collections.defaultdict(
-            lambda: collections.defaultdict(dict)
-        )
-        for measure in user_defined_measures:
-            fold_evaluations[measure][0][0] = user_defined_measures[measure]
-
-        # trace. SGD does not produce any
-        assert trace is None
-
-        self._check_fold_timing_evaluations(
-            fold_evaluations,
-            num_repeats=1,
-            num_folds=1,
-            task_type=task.task_type_id,
-            check_scores=False,
-        )
-
-    @pytest.mark.sklearn()
-    def test_run_model_on_fold_classification_2(self):
-        task = openml.tasks.get_task(7)  # kr-vs-kp; crossvalidation
-
-        X, y = task.get_X_and_y()
-        train_indices, test_indices = task.get_train_test_split_indices(repeat=0, fold=0, sample=0)
-        X_train = X.iloc[train_indices]
-        y_train = y.iloc[train_indices]
-        X_test = X.iloc[test_indices]
-        y_test = y.iloc[test_indices]
-
-        pipeline = sklearn.model_selection.GridSearchCV(
-            sklearn.pipeline.Pipeline(
-                steps=[
-                    *_get_sklearn_preprocessing(),
-                    ("clf", sklearn.tree.DecisionTreeClassifier()),
-                ],
-            ),
-            {"clf__max_depth": [1, 2]},
-        )
-        # TODO add some mocking here to actually test the innards of this function, too!
-        res = self.extension._run_model_on_fold(
-            model=pipeline,
-            task=task,
-            fold_no=0,
-            rep_no=0,
-            X_train=X_train,
-            y_train=y_train,
-            X_test=X_test,
-        )
-
-        y_hat, y_hat_proba, user_defined_measures, trace = res
-
-        # predictions
-        assert isinstance(y_hat, np.ndarray)
-        assert y_hat.shape == y_test.shape
-        assert isinstance(y_hat_proba, pd.DataFrame)
-        assert y_hat_proba.shape == (y_test.shape[0], 2)
-        np.testing.assert_array_almost_equal(np.sum(y_hat_proba, axis=1), np.ones(y_test.shape))
-        for i in (0, 1):
-            assert np.any(y_hat_proba.to_numpy()[:, i] != np.zeros(y_test.shape))
-
-        # check user defined measures
-        fold_evaluations: dict[str, dict[int, dict[int, float]]] = collections.defaultdict(
-            lambda: collections.defaultdict(dict)
-        )
-        for measure in user_defined_measures:
-            fold_evaluations[measure][0][0] = user_defined_measures[measure]
-
-        # check that it produced and returned a trace object of the correct length
-        assert isinstance(trace, OpenMLRunTrace)
-        assert len(trace.trace_iterations) == 2
-
-        self._check_fold_timing_evaluations(
-            fold_evaluations,
-            num_repeats=1,
-            num_folds=1,
-            task_type=task.task_type_id,
-            check_scores=False,
-        )
-
-    @pytest.mark.sklearn()
-    def test_run_model_on_fold_classification_3(self):
-        class HardNaiveBayes(sklearn.naive_bayes.GaussianNB):
-            # class for testing a naive bayes classifier that does not allow soft
-            # predictions
-            def predict_proba(*args, **kwargs):
-                raise AttributeError("predict_proba is not available when probability=False")
-
-        # task 1 (test server) is important: it is a task with an unused class
-        tasks = [
-            1,  # anneal; crossvalidation
-            3,  # anneal; crossvalidation
-            115,  # diabetes; crossvalidation
-        ]
-        flow = unittest.mock.Mock()
-        flow.name = "dummy"
-
-        for task_id in tasks:
-            task = openml.tasks.get_task(task_id)
-            X, y = task.get_X_and_y()
-            train_indices, test_indices = task.get_train_test_split_indices(
-                repeat=0,
-                fold=0,
-                sample=0,
-            )
-            X_train = X.iloc[train_indices]
-            y_train = y.iloc[train_indices]
-            X_test = X.iloc[test_indices]
-            clf1 = sklearn.pipeline.Pipeline(
-                steps=[
-                    *_get_sklearn_preprocessing(),
-                    ("estimator", sklearn.naive_bayes.GaussianNB()),
-                ],
-            )
-            clf2 = sklearn.pipeline.Pipeline(
-                steps=[*_get_sklearn_preprocessing(), ("estimator", HardNaiveBayes())],
-            )
-
-            pred_1, proba_1, _, _ = self.extension._run_model_on_fold(
-                model=clf1,
-                task=task,
-                X_train=X_train,
-                y_train=y_train,
-                X_test=X_test,
-                fold_no=0,
-                rep_no=0,
-            )
-            pred_2, proba_2, _, _ = self.extension._run_model_on_fold(
-                model=clf2,
-                task=task,
-                X_train=X_train,
-                y_train=y_train,
-                X_test=X_test,
-                fold_no=0,
-                rep_no=0,
-            )
-
-            # verifies that the predictions are identical
-            np.testing.assert_array_equal(pred_1, pred_2)
-            np.testing.assert_array_almost_equal(np.sum(proba_1, axis=1), np.ones(X_test.shape[0]))
-            # Test that there are predictions other than ones and zeros
-            assert np.sum(proba_1.to_numpy() == 0) + np.sum(proba_1.to_numpy() == 1) < X_test.shape[
-                0
-            ] * len(task.class_labels)
-
-            np.testing.assert_array_almost_equal(np.sum(proba_2, axis=1), np.ones(X_test.shape[0]))
-            # Test that there are only ones and zeros predicted
-            assert np.sum(proba_2.to_numpy() == 0) + np.sum(
-                proba_2.to_numpy() == 1
-            ) == X_test.shape[0] * len(task.class_labels)
-
-    @pytest.mark.sklearn()
-    @pytest.mark.production()
-    def test_run_model_on_fold_regression(self):
-        # There aren't any regression tasks on the test server
-        openml.config.server = self.production_server
-        task = openml.tasks.get_task(2999)
-
-        X, y = task.get_X_and_y()
-        train_indices, test_indices = task.get_train_test_split_indices(repeat=0, fold=0, sample=0)
-        X_train = X.iloc[train_indices]
-        y_train = y.iloc[train_indices]
-        X_test = X.iloc[test_indices]
-        y_test = y.iloc[test_indices]
-
-        pipeline = sklearn.pipeline.Pipeline(
-            steps=[("imp", SimpleImputer()), ("clf", sklearn.tree.DecisionTreeRegressor())],
-        )
-        # TODO add some mocking here to actually test the innards of this function, too!
-        res = self.extension._run_model_on_fold(
-            model=pipeline,
-            task=task,
-            fold_no=0,
-            rep_no=0,
-            X_train=X_train,
-            y_train=y_train,
-            X_test=X_test,
-        )
-
-        y_hat, y_hat_proba, user_defined_measures, trace = res
-
-        # predictions
-        assert isinstance(y_hat, np.ndarray)
-        assert y_hat.shape == y_test.shape
-        assert y_hat_proba is None
-
-        # check user defined measures
-        fold_evaluations: dict[str, dict[int, dict[int, float]]] = collections.defaultdict(
-            lambda: collections.defaultdict(dict)
-        )
-        for measure in user_defined_measures:
-            fold_evaluations[measure][0][0] = user_defined_measures[measure]
-
-        # trace. SGD does not produce any
-        assert trace is None
-
-        self._check_fold_timing_evaluations(
-            fold_evaluations,
-            num_repeats=1,
-            num_folds=1,
-            task_type=task.task_type_id,
-            check_scores=False,
-        )
-
-    @pytest.mark.sklearn()
-    @pytest.mark.production()
-    def test_run_model_on_fold_clustering(self):
-        # There aren't any regression tasks on the test server
-        openml.config.server = self.production_server
-        task = openml.tasks.get_task(126033)
-
-        X = task.get_X()
-
-        pipeline = sklearn.pipeline.Pipeline(
-            steps=[*_get_sklearn_preprocessing(), ("clf", sklearn.cluster.KMeans())],
-        )
-        # TODO add some mocking here to actually test the innards of this function, too!
-        res = self.extension._run_model_on_fold(
-            model=pipeline,
-            task=task,
-            fold_no=0,
-            rep_no=0,
-            X_train=X,
-        )
-
-        y_hat, y_hat_proba, user_defined_measures, trace = res
-
-        # predictions
-        assert isinstance(y_hat, np.ndarray)
-        assert y_hat.shape == (X.shape[0],)
-        assert y_hat_proba is None
-
-        # check user defined measures
-        fold_evaluations: dict[str, dict[int, dict[int, float]]] = collections.defaultdict(
-            lambda: collections.defaultdict(dict)
-        )
-        for measure in user_defined_measures:
-            fold_evaluations[measure][0][0] = user_defined_measures[measure]
-
-        # trace. SGD does not produce any
-        assert trace is None
-
-        self._check_fold_timing_evaluations(
-            fold_evaluations,
-            num_repeats=1,
-            num_folds=1,
-            task_type=task.task_type_id,
-            check_scores=False,
-        )
-
-    @pytest.mark.sklearn()
-    def test__extract_trace_data(self):
-        param_grid = {
-            "hidden_layer_sizes": [[5, 5], [10, 10], [20, 20]],
-            "activation": ["identity", "logistic", "tanh", "relu"],
-            "learning_rate_init": [0.1, 0.01, 0.001, 0.0001],
-            "max_iter": [10, 20, 40, 80],
-        }
-        num_iters = 10
-        task = openml.tasks.get_task(20)  # balance-scale; crossvalidation
-        clf = sklearn.model_selection.RandomizedSearchCV(
-            sklearn.neural_network.MLPClassifier(),
-            param_grid,
-            n_iter=num_iters,
-        )
-        # just run the task on the model (without invoking any fancy extension & openml code)
-        train, _ = task.get_train_test_split_indices(0, 0)
-        X, y = task.get_X_and_y()
-        with warnings.catch_warnings():
-            warnings.simplefilter("ignore")
-            clf.fit(X.iloc[train], y.iloc[train])
-
-        # check num layers of MLP
-        assert clf.best_estimator_.hidden_layer_sizes in param_grid["hidden_layer_sizes"]
-
-        trace_list = self.extension._extract_trace_data(clf, rep_no=0, fold_no=0)
-        trace = self.extension._obtain_arff_trace(clf, trace_list)
-
-        assert isinstance(trace, OpenMLRunTrace)
-        assert isinstance(trace_list, list)
-        assert len(trace_list) == num_iters
-
-        for trace_iteration in iter(trace):
-            assert trace_iteration.repeat == 0
-            assert trace_iteration.fold == 0
-            assert trace_iteration.iteration >= 0
-            assert trace_iteration.iteration <= num_iters
-            assert trace_iteration.setup_string is None
-            assert isinstance(trace_iteration.evaluation, float)
-            assert np.isfinite(trace_iteration.evaluation)
-            assert isinstance(trace_iteration.selected, bool)
-
-            assert len(trace_iteration.parameters) == len(param_grid)
-            for param in param_grid:
-                # Prepend with the "parameter_" prefix
-                param_in_trace = f"parameter_{param}"
-                assert param_in_trace in trace_iteration.parameters
-                param_value = json.loads(trace_iteration.parameters[param_in_trace])
-                assert param_value in param_grid[param]
-
-    @pytest.mark.sklearn()
-    def test_trim_flow_name(self):
-        import re
-
-        long = """sklearn.pipeline.Pipeline(
-                    columntransformer=sklearn.compose._column_transformer.ColumnTransformer(
-                        numeric=sklearn.pipeline.Pipeline(
-                            SimpleImputer=sklearn.preprocessing.imputation.Imputer,
-                            standardscaler=sklearn.preprocessing.data.StandardScaler),
-                        nominal=sklearn.pipeline.Pipeline(
-                            simpleimputer=sklearn.impute.SimpleImputer,
-                            onehotencoder=sklearn.preprocessing._encoders.OneHotEncoder)),
-                    variancethreshold=sklearn.feature_selection.variance_threshold.VarianceThreshold,
-                    svc=sklearn.svm.classes.SVC)"""
-        short = "sklearn.Pipeline(ColumnTransformer,VarianceThreshold,SVC)"
-        shorter = "sklearn.Pipeline(...,SVC)"
-        long_stripped, _ = re.subn(r"\s", "", long)
-        assert short == SklearnExtension.trim_flow_name(long_stripped)
-        assert shorter == SklearnExtension.trim_flow_name(long_stripped, extra_trim_length=50)
-
-        long = """sklearn.pipeline.Pipeline(
-                    imputation=openmlstudy14.preprocessing.ConditionalImputer,
-                    hotencoding=sklearn.preprocessing.data.OneHotEncoder,
-                    variencethreshold=sklearn.feature_selection.variance_threshold.VarianceThreshold,
-                    classifier=sklearn.ensemble.forest.RandomForestClassifier)"""
-        short = "sklearn.Pipeline(ConditionalImputer,OneHotEncoder,VarianceThreshold,RandomForestClassifier)"  # noqa: E501
-        long_stripped, _ = re.subn(r"\s", "", long)
-        assert short == SklearnExtension.trim_flow_name(long_stripped)
-
-        long = """sklearn.pipeline.Pipeline(
-                    SimpleImputer=sklearn.preprocessing.imputation.Imputer,
-                    VarianceThreshold=sklearn.feature_selection.variance_threshold.VarianceThreshold, # noqa: E501
-                    Estimator=sklearn.model_selection._search.RandomizedSearchCV(
-                        estimator=sklearn.tree.tree.DecisionTreeClassifier))"""
-        short = (
-            "sklearn.Pipeline(Imputer,VarianceThreshold,RandomizedSearchCV(DecisionTreeClassifier))"
-        )
-        long_stripped, _ = re.subn(r"\s", "", long)
-        assert short == SklearnExtension.trim_flow_name(long_stripped)
-
-        long = """sklearn.model_selection._search.RandomizedSearchCV(
-                    estimator=sklearn.pipeline.Pipeline(
-                        SimpleImputer=sklearn.preprocessing.imputation.Imputer,
-                        classifier=sklearn.ensemble.forest.RandomForestClassifier))"""
-        short = "sklearn.RandomizedSearchCV(Pipeline(Imputer,RandomForestClassifier))"
-        long_stripped, _ = re.subn(r"\s", "", long)
-        assert short == SklearnExtension.trim_flow_name(long_stripped)
-
-        long = """sklearn.pipeline.FeatureUnion(
-                    pca=sklearn.decomposition.pca.PCA,
-                    svd=sklearn.decomposition.truncated_svd.TruncatedSVD)"""
-        short = "sklearn.FeatureUnion(PCA,TruncatedSVD)"
-        long_stripped, _ = re.subn(r"\s", "", long)
-        assert short == SklearnExtension.trim_flow_name(long_stripped)
-
-        long = "sklearn.ensemble.forest.RandomForestClassifier"
-        short = "sklearn.RandomForestClassifier"
-        assert short == SklearnExtension.trim_flow_name(long)
-
-        assert SklearnExtension.trim_flow_name("weka.IsolationForest") == "weka.IsolationForest"
-
-    @pytest.mark.sklearn()
-    @unittest.skipIf(
-        Version(sklearn.__version__) < Version("0.21"),
-        reason="SimpleImputer, ColumnTransformer available only after 0.19 and "
-        "Pipeline till 0.20 doesn't support indexing and 'passthrough'",
-    )
-    def test_run_on_model_with_empty_steps(self):
-        from sklearn.compose import ColumnTransformer
-
-        # testing 'drop', 'passthrough', None as non-actionable sklearn estimators
-        dataset = openml.datasets.get_dataset(128)  # iris
-        task = openml.tasks.get_task(59)  # mfeat-pixel; crossvalidation
-
-        X, y, categorical_ind, feature_names = dataset.get_data(
-            target=dataset.default_target_attribute,
-        )
-        categorical_ind = np.array(categorical_ind)
-        (cat_idx,) = np.where(categorical_ind)
-        (cont_idx,) = np.where(~categorical_ind)
-
-        clf = make_pipeline(
-            ColumnTransformer(
-                [
-                    (
-                        "cat",
-                        make_pipeline(SimpleImputer(strategy="most_frequent"), OneHotEncoder()),
-                        cat_idx.tolist(),
-                    ),
-                    (
-                        "cont",
-                        make_pipeline(SimpleImputer(strategy="median"), StandardScaler()),
-                        cont_idx.tolist(),
-                    ),
-                ],
-            ),
-        )
-
-        clf = sklearn.pipeline.Pipeline(
-            [
-                ("dummystep", "passthrough"),  # adding 'passthrough' as an estimator
-                ("prep", clf),
-                ("classifier", sklearn.svm.SVC(gamma="auto")),
-            ],
-        )
-
-        # adding 'drop' to a ColumnTransformer
-        if not categorical_ind.any():
-            clf[1][0].set_params(cat="drop")
-        if not (~categorical_ind).any():
-            clf[1][0].set_params(cont="drop")
-
-        # serializing model with non-actionable step
-        run, flow = openml.runs.run_model_on_task(model=clf, task=task, return_flow=True)
-
-        assert len(flow.components) == 3
-        assert isinstance(flow.components["dummystep"], OpenMLFlow)
-        assert flow.components["dummystep"].name == "passthrough"
-        assert isinstance(flow.components["classifier"], OpenMLFlow)
-        if Version(sklearn.__version__) < Version("0.22"):
-            assert flow.components["classifier"].name == "sklearn.svm.classes.SVC"
-        else:
-            assert flow.components["classifier"].name == "sklearn.svm._classes.SVC"
-        assert isinstance(flow.components["prep"], OpenMLFlow)
-        assert flow.components["prep"].class_name == "sklearn.pipeline.Pipeline"
-        assert isinstance(flow.components["prep"].components["columntransformer"], OpenMLFlow)
-        assert isinstance(
-            flow.components["prep"].components["columntransformer"].components["cat"], OpenMLFlow
-        )
-        assert (
-            flow.components["prep"].components["columntransformer"].components["cat"].name == "drop"
-        )
-
-        # de-serializing flow to a model with non-actionable step
-        model = self.extension.flow_to_model(flow)
-        model.fit(X, y)
-        assert type(model) == type(clf)
-        assert model != clf
-        assert len(model.named_steps) == 3
-        assert model.named_steps["dummystep"] == "passthrough"
-
-        xml = flow._to_dict()
-        new_model = self.extension.flow_to_model(OpenMLFlow._from_dict(xml))
-
-        new_model.fit(X, y)
-        assert type(new_model) == type(clf)
-        assert new_model != clf
-        assert len(new_model.named_steps) == 3
-        assert new_model.named_steps["dummystep"] == "passthrough"
-
-    @pytest.mark.sklearn()
-    def test_sklearn_serialization_with_none_step(self):
-        msg = (
-            "Cannot serialize objects of None type. Please use a valid "
-            "placeholder for None. Note that empty sklearn estimators can be "
-            "replaced with 'drop' or 'passthrough'."
-        )
-        clf = sklearn.pipeline.Pipeline(
-            [("dummystep", None), ("classifier", sklearn.svm.SVC(gamma="auto"))],
-        )
-        with pytest.raises(ValueError, match=msg):
-            self.extension.model_to_flow(clf)
-
-    @pytest.mark.sklearn()
-    @unittest.skipIf(
-        Version(sklearn.__version__) < Version("0.20"),
-        reason="columntransformer introduction in 0.20.0",
-    )
-    def test_failed_serialization_of_custom_class(self):
-        """Check if any custom class inherited from sklearn expectedly fails serialization"""
-        try:
-            from sklearn.impute import SimpleImputer
-        except ImportError:
-            # for lower versions
-            from sklearn.preprocessing import Imputer as SimpleImputer
-
-        import sklearn.tree
-        from sklearn.compose import ColumnTransformer
-        from sklearn.pipeline import Pipeline, make_pipeline
-        from sklearn.preprocessing import OneHotEncoder, StandardScaler
-
-        cat_imp = make_pipeline(
-            SimpleImputer(strategy="most_frequent"),
-            OneHotEncoder(handle_unknown="ignore"),
-        )
-        cont_imp = make_pipeline(CustomImputer(), StandardScaler())
-        ct = ColumnTransformer([("cat", cat_imp, cat), ("cont", cont_imp, cont)])
-        clf = Pipeline(
-            steps=[("preprocess", ct), ("estimator", sklearn.tree.DecisionTreeClassifier())],
-        )  # build a sklearn classifier
-
-        task = openml.tasks.get_task(253)  # profb; crossvalidation
-        try:
-            _ = openml.runs.run_model_on_task(clf, task)
-        except AttributeError as e:
-            if e.args[0] == "module '__main__' has no attribute '__version__'":
-                raise AttributeError(e)
-            else:
-                raise Exception(e)
-
-    @pytest.mark.sklearn()
-    @unittest.skipIf(
-        Version(sklearn.__version__) < Version("0.20"),
-        reason="columntransformer introduction in 0.20.0",
-    )
-    def test_setupid_with_column_transformer(self):
-        """Test to check if inclusion of ColumnTransformer in a pipleline is treated as a new
-        flow each time.
-        """
-        import sklearn.compose
-        from sklearn.svm import SVC
-
-        def column_transformer_pipe(task_id):
-            task = openml.tasks.get_task(task_id)
-            # make columntransformer
-            preprocessor = sklearn.compose.ColumnTransformer(
-                transformers=[
-                    ("num", StandardScaler(), cont),
-                    ("cat", OneHotEncoder(handle_unknown="ignore"), cat),
-                ],
-            )
-            # make pipeline
-            clf = SVC(gamma="scale", random_state=1)
-            pipe = make_pipeline(preprocessor, clf)
-            # run task
-            run = openml.runs.run_model_on_task(pipe, task, avoid_duplicate_runs=False)
-            run.publish()
-            return openml.runs.get_run(run.run_id)
-
-        run1 = column_transformer_pipe(11)  # only categorical
-        TestBase._mark_entity_for_removal("run", run1.run_id)
-        run2 = column_transformer_pipe(23)  # only numeric
-        TestBase._mark_entity_for_removal("run", run2.run_id)
-        assert run1.setup_id == run2.setup_id

From 0425929c55cbb89c2932c0d0b943f477ba908e2f Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Thu, 19 Jun 2025 12:30:39 +0200
Subject: [PATCH 3/8] Do not load sklearn extension by default

---
 openml/__init__.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/openml/__init__.py b/openml/__init__.py
index 48d301eec..c49505eb9 100644
--- a/openml/__init__.py
+++ b/openml/__init__.py
@@ -121,7 +121,3 @@ def populate_cache(
     "_api_calls",
     "__version__",
 ]
-
-# Load the scikit-learn extension by default
-# TODO(eddiebergman): Not sure why this is at the bottom of the file
-import openml.extensions.sklearn  # noqa: E402, F401

From 23ada0ea44bec186200b50933674d191f1aba902 Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Thu, 19 Jun 2025 12:42:00 +0200
Subject: [PATCH 4/8] Disable scikit-learn tests

---
 tests/test_flows/test_flow.py             |  4 +-
 tests/test_flows/test_flow_functions.py   |  4 +-
 tests/test_runs/test_run.py               |  6 +-
 tests/test_runs/test_run_functions.py     |  6 +-
 tests/test_setups/test_setup_functions.py |  4 +-
 tests/test_study/test_study_examples.py   | 77 -----------------------
 6 files changed, 12 insertions(+), 89 deletions(-)
 delete mode 100644 tests/test_study/test_study_examples.py

diff --git a/tests/test_flows/test_flow.py b/tests/test_flows/test_flow.py
index 4a5241b62..f725074b5 100644
--- a/tests/test_flows/test_flow.py
+++ b/tests/test_flows/test_flow.py
@@ -26,7 +26,7 @@
 
 import openml
 import openml.exceptions
-import openml.extensions.sklearn
+#import openml.extensions.sklearn
 import openml.utils
 from openml._api_calls import _perform_api_call
 from openml.testing import SimpleImputer, TestBase
@@ -37,7 +37,7 @@ class TestFlow(TestBase):
 
     def setUp(self):
         super().setUp()
-        self.extension = openml.extensions.sklearn.SklearnExtension()
+        self.extension = None #openml.extensions.sklearn.SklearnExtension()
 
     def tearDown(self):
         super().tearDown()
diff --git a/tests/test_flows/test_flow_functions.py b/tests/test_flows/test_flow_functions.py
index 40c78c822..b78781a1c 100644
--- a/tests/test_flows/test_flow_functions.py
+++ b/tests/test_flows/test_flow_functions.py
@@ -18,7 +18,7 @@
 from sklearn import ensemble
 
 import openml
-import openml.extensions.sklearn
+#import openml.extensions.sklearn
 from openml.exceptions import OpenMLNotAuthorizedError, OpenMLServerException
 from openml.testing import TestBase, create_request_response
 
@@ -283,7 +283,7 @@ def test_sklearn_to_flow_list_of_lists(self):
         from sklearn.preprocessing import OrdinalEncoder
 
         ordinal_encoder = OrdinalEncoder(categories=[[0, 1], [0, 1]])
-        extension = openml.extensions.sklearn.SklearnExtension()
+        extension = None # openml.extensions.sklearn.SklearnExtension()
 
         # Test serialization works
         flow = extension.model_to_flow(ordinal_encoder)
diff --git a/tests/test_runs/test_run.py b/tests/test_runs/test_run.py
index e58c72e2d..7e2117d79 100644
--- a/tests/test_runs/test_run.py
+++ b/tests/test_runs/test_run.py
@@ -16,7 +16,7 @@
 from sklearn.tree import DecisionTreeClassifier
 
 import openml
-import openml.extensions.sklearn
+#import openml.extensions.sklearn
 from openml import OpenMLRun
 from openml.testing import SimpleImputer, TestBase
 
@@ -299,7 +299,7 @@ def test_publish_with_local_loaded_flow(self):
         Publish a run tied to a local flow after it has first been saved to
          and loaded from disk.
         """
-        extension = openml.extensions.sklearn.SklearnExtension()
+        extension = None # openml.extensions.sklearn.SklearnExtension()
 
         for model, task in self._get_models_tasks_for_tests():
             # Make sure the flow does not exist on the server yet.
@@ -339,7 +339,7 @@ def test_publish_with_local_loaded_flow(self):
 
     @pytest.mark.sklearn()
     def test_offline_and_online_run_identical(self):
-        extension = openml.extensions.sklearn.SklearnExtension()
+        extension = None #openml.extensions.sklearn.SklearnExtension()
 
         for model, task in self._get_models_tasks_for_tests():
             # Make sure the flow does not exist on the server yet.
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index 58670b354..ed4847219 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -34,12 +34,12 @@
 import openml
 import openml._api_calls
 import openml.exceptions
-import openml.extensions.sklearn
+#import openml.extensions.sklearn
 from openml.exceptions import (
     OpenMLNotAuthorizedError,
     OpenMLServerException,
 )
-from openml.extensions.sklearn import cat, cont
+#from openml.extensions.sklearn import cat, cont
 from openml.runs.functions import (
     _run_task_get_arffcontent,
     delete_run,
@@ -108,7 +108,7 @@ class TestRun(TestBase):
 
     def setUp(self):
         super().setUp()
-        self.extension = openml.extensions.sklearn.SklearnExtension()
+        self.extension = None#openml.extensions.sklearn.SklearnExtension()
 
     def _wait_for_processed_run(self, run_id, max_waiting_time_seconds):
         # it can take a while for a run to be processed on the OpenML (test)
diff --git a/tests/test_setups/test_setup_functions.py b/tests/test_setups/test_setup_functions.py
index 88ac84805..5cc531e63 100644
--- a/tests/test_setups/test_setup_functions.py
+++ b/tests/test_setups/test_setup_functions.py
@@ -13,7 +13,7 @@
 
 import openml
 import openml.exceptions
-import openml.extensions.sklearn
+#import openml.extensions.sklearn
 from openml.testing import TestBase
 
 
@@ -31,7 +31,7 @@ class TestSetupFunctions(TestBase):
     _multiprocess_can_split_ = True
 
     def setUp(self):
-        self.extension = openml.extensions.sklearn.SklearnExtension()
+        self.extension = None# openml.extensions.sklearn.SklearnExtension()
         super().setUp()
 
     @pytest.mark.sklearn()
diff --git a/tests/test_study/test_study_examples.py b/tests/test_study/test_study_examples.py
deleted file mode 100644
index e3b21fc8c..000000000
--- a/tests/test_study/test_study_examples.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# License: BSD 3-Clause
-from __future__ import annotations
-
-import unittest
-from packaging.version import Version
-
-import pytest
-import sklearn
-
-from openml.extensions.sklearn import cat, cont
-from openml.testing import TestBase
-
-
-class TestStudyFunctions(TestBase):
-    _multiprocess_can_split_ = True
-    """Test the example code of Bischl et al. (2018)"""
-
-    @pytest.mark.sklearn()
-    @unittest.skipIf(
-        Version(sklearn.__version__) < Version("0.24"),
-        reason="columntransformer introduction in 0.24.0",
-    )
-    def test_Figure1a(self):
-        """Test listing in Figure 1a on a single task and the old OpenML100 study.
-
-        The original listing is pasted into the comment below because it the actual unit test
-        differs a bit, as for example it does not run for all tasks, but only a single one.
-
-        import openml
-        import sklearn.tree, sklearn.preprocessing
-        benchmark_suite = openml.study.get_study('OpenML-CC18','tasks') # obtain the benchmark suite
-        clf = sklearn.pipeline.Pipeline(steps=[('imputer',sklearn.preprocessing.Imputer()),  ('estimator',sklearn.tree.DecisionTreeClassifier())]) # build a sklearn classifier
-        for task_id in benchmark_suite.tasks:                          # iterate over all tasks
-            task = openml.tasks.get_task(task_id)                        # download the OpenML task
-            X, y = task.get_X_and_y()                                    # get the data (not used in this example)
-            openml.config.apikey = 'FILL_IN_OPENML_API_KEY'              # set the OpenML Api Key
-            run = openml.runs.run_model_on_task(task,clf)                # run classifier on splits (requires API key)
-            score = run.get_metric_fn(sklearn.metrics.accuracy_score) # print accuracy score
-            print('Data set: %s; Accuracy: %0.2f' % (task.get_dataset().name,score.mean()))
-            run.publish()                                                # publish the experiment on OpenML (optional)
-            print('URL for run: %s/run/%d' %(openml.config.server,run.run_id))
-        """  # noqa: E501
-        import sklearn.metrics
-        import sklearn.tree
-        from sklearn.compose import ColumnTransformer
-        from sklearn.impute import SimpleImputer
-        from sklearn.pipeline import Pipeline, make_pipeline
-        from sklearn.preprocessing import OneHotEncoder, StandardScaler
-
-        import openml
-
-        benchmark_suite = openml.study.get_study("OpenML100", "tasks")  # obtain the benchmark suite
-        cat_imp = OneHotEncoder(handle_unknown="ignore")
-        cont_imp = make_pipeline(SimpleImputer(strategy="median"), StandardScaler())
-        ct = ColumnTransformer([("cat", cat_imp, cat), ("cont", cont_imp, cont)])
-        clf = Pipeline(
-            steps=[("preprocess", ct), ("estimator", sklearn.tree.DecisionTreeClassifier())],
-        )  # build a sklearn classifier
-        for task_id in benchmark_suite.tasks[:1]:  # iterate over all tasks
-            task = openml.tasks.get_task(task_id)  # download the OpenML task
-            X, y = task.get_X_and_y()  # get the data (not used in this example)
-            openml.config.apikey = openml.config.apikey  # set the OpenML Api Key
-            run = openml.runs.run_model_on_task(
-                clf,
-                task,
-                avoid_duplicate_runs=False,
-            )  # run classifier on splits (requires API key)
-            score = run.get_metric_fn(sklearn.metrics.accuracy_score)  # print accuracy score
-            TestBase.logger.info(
-                f"Data set: {task.get_dataset().name}; Accuracy: {score.mean():0.2f}",
-            )
-            run.publish()  # publish the experiment on OpenML (optional)
-            TestBase._mark_entity_for_removal("run", run.run_id)
-            TestBase.logger.info(
-                f"collected from {__file__.split('/')[-1]}: {run.run_id}",
-            )
-            TestBase.logger.info("URL for run: %s/run/%d" % (openml.config.server, run.run_id))

From 0f2ac1ad720b5354ee41aace29a3e38ede8c725e Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Thu, 19 Jun 2025 15:20:43 +0200
Subject: [PATCH 5/8] Tests fail successfully

---
 openml/extensions/functions.py          | 22 ++++++++++++++++++++--
 openml/flows/flow.py                    | 17 +++++++----------
 tests/test_extensions/test_functions.py |  2 ++
 tests/test_flows/test_flow_functions.py |  4 ++--
 tests/test_runs/test_run_functions.py   |  2 ++
 5 files changed, 33 insertions(+), 14 deletions(-)

diff --git a/openml/extensions/functions.py b/openml/extensions/functions.py
index 302ab246c..7a944c997 100644
--- a/openml/extensions/functions.py
+++ b/openml/extensions/functions.py
@@ -13,6 +13,13 @@
 
     from . import Extension
 
+SKLEARN_HINT = (
+    "But it looks related to scikit-learn. "
+    "Please install the OpenML scikit-learn extension (openml-sklearn) and try again. "
+    "For more information, see "
+    "https://github.com/openml/openml-sklearn?tab=readme-ov-file#installation"
+)
+
 
 def register_extension(extension: type[Extension]) -> None:
     """Register an extension.
@@ -57,7 +64,13 @@ def get_extension_by_flow(
             candidates.append(extension_class())
     if len(candidates) == 0:
         if raise_if_no_extension:
-            raise ValueError(f"No extension registered which can handle flow: {flow}")
+            install_instruction = ""
+            if flow.name.startswith("sklearn"):
+                install_instruction = SKLEARN_HINT
+            raise ValueError(
+                f"No extension registered which can handle flow: {flow.flow_id} ({flow.name}). "
+                f"{install_instruction}"
+            )
 
         return None
 
@@ -96,7 +109,12 @@ def get_extension_by_model(
             candidates.append(extension_class())
     if len(candidates) == 0:
         if raise_if_no_extension:
-            raise ValueError(f"No extension registered which can handle model: {model}")
+            install_instruction = ""
+            if type(model).__module__.startswith("sklearn"):
+                install_instruction = SKLEARN_HINT
+            raise ValueError(
+                f"No extension registered which can handle model: {model}. {install_instruction}"
+            )
 
         return None
 
diff --git a/openml/flows/flow.py b/openml/flows/flow.py
index a3ff50ca1..02d24e78b 100644
--- a/openml/flows/flow.py
+++ b/openml/flows/flow.py
@@ -4,7 +4,7 @@
 import logging
 from collections import OrderedDict
 from pathlib import Path
-from typing import Any, Hashable, Sequence
+from typing import Any, Hashable, Sequence, cast
 
 import xmltodict
 
@@ -157,10 +157,7 @@ def __init__(  # noqa: PLR0913
         self.language = language
         self.dependencies = dependencies
         self.flow_id = flow_id
-        if extension is None:
-            self._extension = get_extension_by_flow(self)
-        else:
-            self._extension = extension
+        self._extension = extension
 
     @property
     def id(self) -> int | None:
@@ -170,12 +167,12 @@ def id(self) -> int | None:
     @property
     def extension(self) -> Extension:
         """The extension of the flow (e.g., sklearn)."""
-        if self._extension is not None:
-            return self._extension
+        if self._extension is None:
+            self._extension = cast(
+                Extension, get_extension_by_flow(self, raise_if_no_extension=True)
+            )
 
-        raise RuntimeError(
-            f"No extension could be found for flow {self.flow_id}: {self.name}",
-        )
+        return self._extension
 
     def _get_repr_body_fields(self) -> Sequence[tuple[str, str | int | list[str]]]:
         """Collect all information to display in the __repr__ body."""
diff --git a/tests/test_extensions/test_functions.py b/tests/test_extensions/test_functions.py
index bc7937c88..ac4610a15 100644
--- a/tests/test_extensions/test_functions.py
+++ b/tests/test_extensions/test_functions.py
@@ -11,6 +11,8 @@
 
 class DummyFlow:
     external_version = "DummyFlow==0.1"
+    name = "Dummy Flow"
+    flow_id = 1
     dependencies = None
 
 
diff --git a/tests/test_flows/test_flow_functions.py b/tests/test_flows/test_flow_functions.py
index b78781a1c..b44de08bf 100644
--- a/tests/test_flows/test_flow_functions.py
+++ b/tests/test_flows/test_flow_functions.py
@@ -321,8 +321,8 @@ def test_get_flow_reinstantiate_model(self):
     def test_get_flow_reinstantiate_model_no_extension(self):
         # Flow 10 is a WEKA flow
         self.assertRaisesRegex(
-            RuntimeError,
-            "No extension could be found for flow 10: weka.SMO",
+            ValueError,
+            ".* flow: 10 \(weka.SMO\). ",
             openml.flows.get_flow,
             flow_id=10,
             reinstantiate=True,
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index ed4847219..ed90bcad5 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -1900,6 +1900,7 @@ def test_joblib_backends(self, parallel_mock):
         Version(sklearn.__version__) < Version("0.20"),
         reason="SimpleImputer doesn't handle mixed type DataFrame as input",
     )
+    @pytest.mark.sklearn()
     def test_delete_run(self):
         rs = np.random.randint(1, 2**31 - 1)
         clf = sklearn.pipeline.Pipeline(
@@ -1928,6 +1929,7 @@ def test_delete_run(self):
         Version(sklearn.__version__) < Version("0.20"),
         reason="SimpleImputer doesn't handle mixed type DataFrame as input",
     )
+    @pytest.mark.sklearn()
     def test_initialize_model_from_run_nonstrict(self):
         # We cannot guarantee that a run with an older version exists on the server.
         # Thus, we test it simply with a run that we know exists that might not be loose.

From 943afcc714a8682f0846c35449e22e90cd7c674b Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Thu, 19 Jun 2025 15:35:49 +0200
Subject: [PATCH 6/8] Add openml-sklearn as dependency of sklearn tests

---
 pyproject.toml    | 1 +
 tests/conftest.py | 1 +
 2 files changed, 2 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index fa9a70dc1..e32f113e5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -80,6 +80,7 @@ test=[
     "mypy",
     "ruff",
     "requests-mock",
+    "openml-sklearn",
 ]
 examples=[
     "matplotlib",
diff --git a/tests/conftest.py b/tests/conftest.py
index 778b0498b..28ff9df46 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -33,6 +33,7 @@
 import shutil
 from pathlib import Path
 import pytest
+import openml_sklearn
 
 import openml
 from openml.testing import TestBase

From 89f8fceb433b841032fb6a74567b5b6740fae275 Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Thu, 19 Jun 2025 16:00:40 +0200
Subject: [PATCH 7/8] Make use of openml_sklearn extension

---
 tests/test_flows/test_flow.py             | 6 ++++--
 tests/test_flows/test_flow_functions.py   | 4 ++--
 tests/test_runs/test_run.py               | 6 +++---
 tests/test_runs/test_run_functions.py     | 9 +++++----
 tests/test_setups/test_setup_functions.py | 4 ++--
 5 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/tests/test_flows/test_flow.py b/tests/test_flows/test_flow.py
index f725074b5..e6407a51c 100644
--- a/tests/test_flows/test_flow.py
+++ b/tests/test_flows/test_flow.py
@@ -24,20 +24,22 @@
 import sklearn.tree
 import xmltodict
 
+from openml_sklearn import SklearnExtension
+
 import openml
 import openml.exceptions
-#import openml.extensions.sklearn
 import openml.utils
 from openml._api_calls import _perform_api_call
 from openml.testing import SimpleImputer, TestBase
 
 
+
 class TestFlow(TestBase):
     _multiprocess_can_split_ = True
 
     def setUp(self):
         super().setUp()
-        self.extension = None #openml.extensions.sklearn.SklearnExtension()
+        self.extension = SklearnExtension()
 
     def tearDown(self):
         super().tearDown()
diff --git a/tests/test_flows/test_flow_functions.py b/tests/test_flows/test_flow_functions.py
index b44de08bf..4a9b03fd7 100644
--- a/tests/test_flows/test_flow_functions.py
+++ b/tests/test_flows/test_flow_functions.py
@@ -7,6 +7,7 @@
 from collections import OrderedDict
 from multiprocessing.managers import Value
 
+from openml_sklearn import SklearnExtension
 from packaging.version import Version
 from unittest import mock
 from unittest.mock import patch
@@ -18,7 +19,6 @@
 from sklearn import ensemble
 
 import openml
-#import openml.extensions.sklearn
 from openml.exceptions import OpenMLNotAuthorizedError, OpenMLServerException
 from openml.testing import TestBase, create_request_response
 
@@ -283,7 +283,7 @@ def test_sklearn_to_flow_list_of_lists(self):
         from sklearn.preprocessing import OrdinalEncoder
 
         ordinal_encoder = OrdinalEncoder(categories=[[0, 1], [0, 1]])
-        extension = None # openml.extensions.sklearn.SklearnExtension()
+        extension = SklearnExtension()
 
         # Test serialization works
         flow = extension.model_to_flow(ordinal_encoder)
diff --git a/tests/test_runs/test_run.py b/tests/test_runs/test_run.py
index 7e2117d79..88fa1672b 100644
--- a/tests/test_runs/test_run.py
+++ b/tests/test_runs/test_run.py
@@ -8,6 +8,7 @@
 import numpy as np
 import pytest
 import xmltodict
+from openml_sklearn import SklearnExtension
 from sklearn.base import clone
 from sklearn.dummy import DummyClassifier
 from sklearn.linear_model import LinearRegression
@@ -16,7 +17,6 @@
 from sklearn.tree import DecisionTreeClassifier
 
 import openml
-#import openml.extensions.sklearn
 from openml import OpenMLRun
 from openml.testing import SimpleImputer, TestBase
 
@@ -299,7 +299,7 @@ def test_publish_with_local_loaded_flow(self):
         Publish a run tied to a local flow after it has first been saved to
          and loaded from disk.
         """
-        extension = None # openml.extensions.sklearn.SklearnExtension()
+        extension = SklearnExtension()
 
         for model, task in self._get_models_tasks_for_tests():
             # Make sure the flow does not exist on the server yet.
@@ -339,7 +339,7 @@ def test_publish_with_local_loaded_flow(self):
 
     @pytest.mark.sklearn()
     def test_offline_and_online_run_identical(self):
-        extension = None #openml.extensions.sklearn.SklearnExtension()
+        extension = SklearnExtension()
 
         for model, task in self._get_models_tasks_for_tests():
             # Make sure the flow does not exist on the server yet.
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index ed90bcad5..725421d4f 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -7,6 +7,8 @@
 import time
 import unittest
 import warnings
+
+from openml_sklearn import SklearnExtension, cat, cont
 from packaging.version import Version
 from unittest import mock
 
@@ -34,7 +36,6 @@
 import openml
 import openml._api_calls
 import openml.exceptions
-#import openml.extensions.sklearn
 from openml.exceptions import (
     OpenMLNotAuthorizedError,
     OpenMLServerException,
@@ -108,7 +109,7 @@ class TestRun(TestBase):
 
     def setUp(self):
         super().setUp()
-        self.extension = None#openml.extensions.sklearn.SklearnExtension()
+        self.extension = SklearnExtension()
 
     def _wait_for_processed_run(self, run_id, max_waiting_time_seconds):
         # it can take a while for a run to be processed on the OpenML (test)
@@ -1750,7 +1751,7 @@ def test_format_prediction_task_regression(self):
         Version(sklearn.__version__) < Version("0.21"),
         reason="couldn't perform local tests successfully w/o bloating RAM",
     )
-    @mock.patch("openml.extensions.sklearn.SklearnExtension._prevent_optimize_n_jobs")
+    @mock.patch("openml_sklearn.SklearnExtension._prevent_optimize_n_jobs")
     def test__run_task_get_arffcontent_2(self, parallel_mock):
         """Tests if a run executed in parallel is collated correctly."""
         task = openml.tasks.get_task(7)  # Supervised Classification on kr-vs-kp
@@ -1824,7 +1825,7 @@ def test__run_task_get_arffcontent_2(self, parallel_mock):
         Version(sklearn.__version__) < Version("0.21"),
         reason="couldn't perform local tests successfully w/o bloating RAM",
     )
-    @mock.patch("openml.extensions.sklearn.SklearnExtension._prevent_optimize_n_jobs")
+    @mock.patch("openml_sklearn.SklearnExtension._prevent_optimize_n_jobs")
     def test_joblib_backends(self, parallel_mock):
         """Tests evaluation of a run using various joblib backends and n_jobs."""
         task = openml.tasks.get_task(7)  # Supervised Classification on kr-vs-kp
diff --git a/tests/test_setups/test_setup_functions.py b/tests/test_setups/test_setup_functions.py
index 5cc531e63..b805ca9d3 100644
--- a/tests/test_setups/test_setup_functions.py
+++ b/tests/test_setups/test_setup_functions.py
@@ -10,10 +10,10 @@
 import sklearn.base
 import sklearn.naive_bayes
 import sklearn.tree
+from openml_sklearn import SklearnExtension
 
 import openml
 import openml.exceptions
-#import openml.extensions.sklearn
 from openml.testing import TestBase
 
 
@@ -31,7 +31,7 @@ class TestSetupFunctions(TestBase):
     _multiprocess_can_split_ = True
 
     def setUp(self):
-        self.extension = None# openml.extensions.sklearn.SklearnExtension()
+        self.extension = SklearnExtension()
         super().setUp()
 
     @pytest.mark.sklearn()

From 8ab1992d5e887354a7092134c198767ad381a77a Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Thu, 19 Jun 2025 16:06:52 +0200
Subject: [PATCH 8/8] packaging is only used in test submodules

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index e32f113e5..91ec2327c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,7 +20,6 @@ dependencies = [
   "minio",
   "pyarrow",
   "tqdm",  # For MinIO download progress bars
-  "packaging",
 ]
 requires-python = ">=3.8"
 maintainers = [
@@ -81,6 +80,7 @@ test=[
     "ruff",
     "requests-mock",
     "openml-sklearn",
+    "packaging",
 ]
 examples=[
     "matplotlib",