Barroclough01
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.vscode/settings.json‎ b/‎.vscode/settings.json‎
diff --git a/‎Chapter03/drift/edition2-alibi-detect-examples/ed2-alibi-detect-ch3.ipynb‎
Lines changed: 556 additions & 67 deletions b/‎Chapter03/drift/edition2-alibi-detect-examples/ed2-alibi-detect-ch3.ipynb‎
Lines changed: 556 additions & 67 deletions
diff --git a/‎Chapter03/drift/evidently-drift-detection.ipynb‎
Lines changed: 947 additions & 93 deletions b/‎Chapter03/drift/evidently-drift-detection.ipynb‎
Lines changed: 947 additions & 93 deletions
diff --git a/‎Chapter03/hyperparameter-opt/hyperopt-example.ipynb‎
Lines changed: 35 additions & 11 deletions b/‎Chapter03/hyperparameter-opt/hyperopt-example.ipynb‎
Lines changed: 35 additions & 11 deletions
diff --git a/‎Chapter03/hyperparameter-opt/optuna-example.ipynb‎
Lines changed: 229 additions & 17 deletions b/‎Chapter03/hyperparameter-opt/optuna-example.ipynb‎
Lines changed: 229 additions & 17 deletions
diff --git a/‎Chapter03/mlewp-chapter03.yml‎
Lines changed: 0 additions & 9 deletions b/‎Chapter03/mlewp-chapter03.yml‎
Lines changed: 0 additions & 9 deletions
diff --git a/‎Chapter03/mlflow-advanced/mlflow-feature-engineering.py‎
Lines changed: 39 additions & 36 deletions b/‎Chapter03/mlflow-advanced/mlflow-feature-engineering.py‎
Lines changed: 39 additions & 36 deletions
diff --git a/‎Chapter03/mlflow-advanced/mlflow.db‎
220 KB b/‎Chapter03/mlflow-advanced/mlflow.db‎
220 KB
diff --git a/‎Chapter03/mlflow-advanced/model_registry_retrieve.py‎
Lines changed: 13 additions & 3 deletions b/‎Chapter03/mlflow-advanced/model_registry_retrieve.py‎
Lines changed: 13 additions & 3 deletions
@@ -168,5 +168,6 @@ cython_debug/
 **/artifacts/model
 **/artifacts/** 
 **/mlruns/** 
+Chapter03/mlflow-db
 Chapter08/.DS_Store
 Chapter09/.DS_Store
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -20,7 +20,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -36,7 +36,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -59,7 +59,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -77,9 +77,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 16/16 [00:00<00:00, 20.78trial/s, best loss: 0.09999999999999998]\n"
+     ]
+    }
+   ],
    "source": [
     "# Trials object to track progress\n",
     "trials = Trials()\n",
@@ -96,19 +104,35 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'C': 1.301001824535225,\n",
+       " 'fit_intercept': 1,\n",
+       " 'max_iter': 341,\n",
+       " 'solver': 1,\n",
+       " 'tol': 7.079664967803261e-05,\n",
+       " 'warm_start': 0}"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "best"
    ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python [conda env:mleng] *",
+   "display_name": "mlewp-chapter03",
    "language": "python",
-   "name": "conda-env-mleng-py"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -120,7 +144,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.5"
+   "version": "3.10.8"
   }
  },
  "nbformat": 4,
 
@@ -63,7 +63,6 @@ dependencies:
   - graphql-core=3.2.6
   - graphql-relay=3.2.0
   - greenlet=3.1.1
-  - gunicorn=23.0.0
   - h2=4.2.0
   - hpack=4.1.0
   - hyperframe=6.1.0
@@ -89,13 +88,9 @@ dependencies:
   - libcblas=3.9.0
   - libcrc32c=1.1.2
   - libcurl=8.11.1
-  - libcxx=19.1.7
   - libdeflate=1.23
-  - libedit=3.1.20250104
-  - libev=4.33
   - libevent=2.1.12
   - libffi=3.4.2
-  - libgfortran=5.0.0
   - libgfortran5=13.2.0
   - libgoogle-cloud=2.34.0
   - libgoogle-cloud-storage=2.34.0
@@ -132,7 +127,6 @@ dependencies:
   - mlflow-ui=2.20.1
   - multidict=6.1.0
   - munkres=1.1.4
-  - ncurses=6.5
   - numpy=1.26.4
   - openjpeg=2.5.3
   - openssl=3.4.0
@@ -175,7 +169,6 @@ dependencies:
   - qhull=2020.2
   - querystring_parser=1.2.4
   - re2=2024.07.02
-  - readline=8.2
   - requests=2.32.3
   - rsa=4.9
   - scikit-learn=1.6.1
@@ -201,11 +194,9 @@ dependencies:
   - xorg-libxau=1.0.12
   - xorg-libxdmcp=1.1.5
   - xz=5.6.4
-  - xz-gpl-tools=5.6.4
   - xz-tools=5.6.4
   - yaml=0.2.5
   - yarl=1.18.3
   - zipp=3.21.0
   - zstandard=0.23.0
   - zstd=1.5.6
-prefix: /opt/homebrew/Caskroom/miniforge/base/envs/mlewp-chapter03-hotfix
@@ -10,74 +10,82 @@
 import mlflow.sklearn
 import mlflow.pyfunc
 from mlflow.tracking import MlflowClient
+from mlflow.models.signature import infer_signature
 
 from pprint import pprint
 
-if __name__=="__main__":
+if __name__ == "__main__":
     # assume you have already run 'start-mlflow-server.sh'
     mlflow.set_tracking_uri("http://localhost:8000")
 
     X, y = load_wine(return_X_y=True)
 
     # Make a 70/30 train/test split
-    X_train, X_test, y_train, y_test = train_test_split(X, y,
-                                                        test_size=0.30,
-                                                        random_state=42)
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, test_size=0.30, random_state=42
+    )
 
     with mlflow.start_run(run_name="YOUR_RUN_NAME") as run:
-        params = {
-            'tol': 1e-2,
-            'solver': 'sag'
-        }
+        params = {"tol": 1e-2, "solver": "sag"}
         # Fit a ridge classifier after performing standard scaling
         std_scale_clf = make_pipeline(StandardScaler(), RidgeClassifier(**params))
         std_scale_clf.fit(X_train, y_train)
         y_pred_std_scale = std_scale_clf.predict(X_test)
 
-        mlflow.log_metrics({
-            'accuracy': metrics.accuracy_score(y_test, y_pred_std_scale),
-            'precision': metrics.precision_score(y_test, y_pred_std_scale, average='macro'),
-            'f1': metrics.f1_score(y_test, y_pred_std_scale, average='macro'),
-            'recall': metrics.recall_score(y_test, y_pred_std_scale, average='macro')
-        })
+        mlflow.log_metrics(
+            {
+                "accuracy": metrics.accuracy_score(y_test, y_pred_std_scale),
+                "precision": metrics.precision_score(
+                    y_test, y_pred_std_scale, average="macro"
+                ),
+                "f1": metrics.f1_score(y_test, y_pred_std_scale, average="macro"),
+                "recall": metrics.recall_score(
+                    y_test, y_pred_std_scale, average="macro"
+                ),
+            }
+        )
 
         mlflow.log_params(params)
 
-        # Log the sklearn model and register as version 1
+        # Create a small input example and infer the model signature so MLflow records input/output schema
+        # Use a few rows from the test set as an input example and infer signature from training input/output
+        try:
+            input_example = X_test[:5]
+            signature = infer_signature(X_train, std_scale_clf.predict(X_train))
+        except Exception:
+            # Fallback: don't fail logging if signature inference fails
+            input_example = None
+            signature = None
+
+        # Log the sklearn model and register as version 1 (include signature and input example)
         mlflow.sklearn.log_model(
             sk_model=std_scale_clf,
-            artifact_path="sklearn-model",
-            registered_model_name="sk-learn-std-scale-clf"
+            name="sklearn-model",
+            registered_model_name="sk-learn-std-scale-clf",
+            signature=signature,
+            input_example=input_example,
         )
 
     # Fetch specific model and version ...
     model_name = "sk-learn-std-scale-clf"
     model_version = 1
-    model = mlflow.pyfunc.load_model(
-        model_uri=f"models:/{model_name}/{model_version}"
-    )
+    model = mlflow.pyfunc.load_model(model_uri=f"models:/{model_name}/{model_version}")
     model.predict(X_test)
 
     # Transition the model stage to 'Staging'
     client = MlflowClient()
     client.transition_model_version_stage(
-        name="sk-learn-std-scale-clf",
-        version=1,
-        stage="Staging"
+        name="sk-learn-std-scale-clf", version=1, stage="Staging"
     )
     # Transition the model stage to 'Production'
     client = MlflowClient()
     client.transition_model_version_stage(
-        name="sk-learn-std-scale-clf",
-        version=1,
-        stage="Production"
+        name="sk-learn-std-scale-clf", version=1, stage="Production"
     )
 
     # Fetch model based on stage name ...
-    stage = 'Production'
-    model = mlflow.pyfunc.load_model(
-        model_uri=f"models:/{model_name}/{stage}"
-    )
+    stage = "Production"
+    model = mlflow.pyfunc.load_model(model_uri=f"models:/{model_name}/{stage}")
     # Search model versions of a given model name
     client = MlflowClient()
     for mv in client.search_model_versions("name='sk-learn-std-scale-clf'"):
@@ -86,10 +94,5 @@
     # Transition the model stage to 'Archived'
     client = MlflowClient()
     client.transition_model_version_stage(
-        name="sk-learn-std-scale-clf",
-        version=1,
-        stage="Archived"
+        name="sk-learn-std-scale-clf", version=1, stage="Archived"
     )
-
-
-
 
@@ -3,12 +3,22 @@
 
 import mlflow.pyfunc
 
+model_name = "sklearn-model"
+
 model_version_uri = "models:/{model_name}/1".format(model_name=model_name)
 
-print("Loading registered model version from URI: '{model_uri}'".format(model_uri=model_version_uri))
+print(
+    "Loading registered model version from URI: '{model_uri}'".format(
+        model_uri=model_version_uri
+    )
+)
 model_version_1 = mlflow.pyfunc.load_model(model_version_uri)
 
 model_production_uri = "models:/{model_name}/production".format(model_name=model_name)
 
-print("Loading registered model version from URI: '{model_uri}'".format(model_uri=model_production_uri))
-model_production = mlflow.pyfunc.load_model(model_production_uri)
+print(
+    "Loading registered model version from URI: '{model_uri}'".format(
+        model_uri=model_production_uri
+    )
+)
+model_production = mlflow.pyfunc.load_model(model_production_uri)