Skip to content

Commit 508e45c

Browse files
committed
Finished Chapter3
1 parent e91cba3 commit 508e45c

14 files changed

Lines changed: 1913 additions & 255 deletions

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,5 +168,6 @@ cython_debug/
168168
**/artifacts/model
169169
**/artifacts/**
170170
**/mlruns/**
171+
Chapter03/mlflow-db
171172
Chapter08/.DS_Store
172173
Chapter09/.DS_Store

.vscode/settings.json

Whitespace-only changes.

Chapter03/drift/edition2-alibi-detect-examples/ed2-alibi-detect-ch3.ipynb

Lines changed: 556 additions & 67 deletions
Large diffs are not rendered by default.

Chapter03/drift/evidently-drift-detection.ipynb

Lines changed: 947 additions & 93 deletions
Large diffs are not rendered by default.

Chapter03/hyperparameter-opt/hyperopt-example.ipynb

Lines changed: 35 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"cells": [
33
{
44
"cell_type": "code",
5-
"execution_count": null,
5+
"execution_count": 1,
66
"metadata": {},
77
"outputs": [],
88
"source": [
@@ -20,7 +20,7 @@
2020
},
2121
{
2222
"cell_type": "code",
23-
"execution_count": null,
23+
"execution_count": 2,
2424
"metadata": {},
2525
"outputs": [],
2626
"source": [
@@ -36,7 +36,7 @@
3636
},
3737
{
3838
"cell_type": "code",
39-
"execution_count": null,
39+
"execution_count": 3,
4040
"metadata": {},
4141
"outputs": [],
4242
"source": [
@@ -59,7 +59,7 @@
5959
},
6060
{
6161
"cell_type": "code",
62-
"execution_count": null,
62+
"execution_count": 4,
6363
"metadata": {},
6464
"outputs": [],
6565
"source": [
@@ -77,9 +77,17 @@
7777
},
7878
{
7979
"cell_type": "code",
80-
"execution_count": null,
80+
"execution_count": 5,
8181
"metadata": {},
82-
"outputs": [],
82+
"outputs": [
83+
{
84+
"name": "stdout",
85+
"output_type": "stream",
86+
"text": [
87+
"100%|██████████| 16/16 [00:00<00:00, 20.78trial/s, best loss: 0.09999999999999998]\n"
88+
]
89+
}
90+
],
8391
"source": [
8492
"# Trials object to track progress\n",
8593
"trials = Trials()\n",
@@ -96,19 +104,35 @@
96104
},
97105
{
98106
"cell_type": "code",
99-
"execution_count": null,
107+
"execution_count": 6,
100108
"metadata": {},
101-
"outputs": [],
109+
"outputs": [
110+
{
111+
"data": {
112+
"text/plain": [
113+
"{'C': 1.301001824535225,\n",
114+
" 'fit_intercept': 1,\n",
115+
" 'max_iter': 341,\n",
116+
" 'solver': 1,\n",
117+
" 'tol': 7.079664967803261e-05,\n",
118+
" 'warm_start': 0}"
119+
]
120+
},
121+
"execution_count": 6,
122+
"metadata": {},
123+
"output_type": "execute_result"
124+
}
125+
],
102126
"source": [
103127
"best"
104128
]
105129
}
106130
],
107131
"metadata": {
108132
"kernelspec": {
109-
"display_name": "Python [conda env:mleng] *",
133+
"display_name": "mlewp-chapter03",
110134
"language": "python",
111-
"name": "conda-env-mleng-py"
135+
"name": "python3"
112136
},
113137
"language_info": {
114138
"codemirror_mode": {
@@ -120,7 +144,7 @@
120144
"name": "python",
121145
"nbconvert_exporter": "python",
122146
"pygments_lexer": "ipython3",
123-
"version": "3.8.5"
147+
"version": "3.10.8"
124148
}
125149
},
126150
"nbformat": 4,

Chapter03/hyperparameter-opt/optuna-example.ipynb

Lines changed: 229 additions & 17 deletions
Large diffs are not rendered by default.

Chapter03/mlewp-chapter03.yml

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,6 @@ dependencies:
6363
- graphql-core=3.2.6
6464
- graphql-relay=3.2.0
6565
- greenlet=3.1.1
66-
- gunicorn=23.0.0
6766
- h2=4.2.0
6867
- hpack=4.1.0
6968
- hyperframe=6.1.0
@@ -89,13 +88,9 @@ dependencies:
8988
- libcblas=3.9.0
9089
- libcrc32c=1.1.2
9190
- libcurl=8.11.1
92-
- libcxx=19.1.7
9391
- libdeflate=1.23
94-
- libedit=3.1.20250104
95-
- libev=4.33
9692
- libevent=2.1.12
9793
- libffi=3.4.2
98-
- libgfortran=5.0.0
9994
- libgfortran5=13.2.0
10095
- libgoogle-cloud=2.34.0
10196
- libgoogle-cloud-storage=2.34.0
@@ -132,7 +127,6 @@ dependencies:
132127
- mlflow-ui=2.20.1
133128
- multidict=6.1.0
134129
- munkres=1.1.4
135-
- ncurses=6.5
136130
- numpy=1.26.4
137131
- openjpeg=2.5.3
138132
- openssl=3.4.0
@@ -175,7 +169,6 @@ dependencies:
175169
- qhull=2020.2
176170
- querystring_parser=1.2.4
177171
- re2=2024.07.02
178-
- readline=8.2
179172
- requests=2.32.3
180173
- rsa=4.9
181174
- scikit-learn=1.6.1
@@ -201,11 +194,9 @@ dependencies:
201194
- xorg-libxau=1.0.12
202195
- xorg-libxdmcp=1.1.5
203196
- xz=5.6.4
204-
- xz-gpl-tools=5.6.4
205197
- xz-tools=5.6.4
206198
- yaml=0.2.5
207199
- yarl=1.18.3
208200
- zipp=3.21.0
209201
- zstandard=0.23.0
210202
- zstd=1.5.6
211-
prefix: /opt/homebrew/Caskroom/miniforge/base/envs/mlewp-chapter03-hotfix

Chapter03/mlflow-advanced/mlflow-feature-engineering.py

Lines changed: 39 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -10,74 +10,82 @@
1010
import mlflow.sklearn
1111
import mlflow.pyfunc
1212
from mlflow.tracking import MlflowClient
13+
from mlflow.models.signature import infer_signature
1314

1415
from pprint import pprint
1516

16-
if __name__=="__main__":
17+
if __name__ == "__main__":
1718
# assume you have already run 'start-mlflow-server.sh'
1819
mlflow.set_tracking_uri("http://localhost:8000")
1920

2021
X, y = load_wine(return_X_y=True)
2122

2223
# Make a 70/30 train/test split
23-
X_train, X_test, y_train, y_test = train_test_split(X, y,
24-
test_size=0.30,
25-
random_state=42)
24+
X_train, X_test, y_train, y_test = train_test_split(
25+
X, y, test_size=0.30, random_state=42
26+
)
2627

2728
with mlflow.start_run(run_name="YOUR_RUN_NAME") as run:
28-
params = {
29-
'tol': 1e-2,
30-
'solver': 'sag'
31-
}
29+
params = {"tol": 1e-2, "solver": "sag"}
3230
# Fit a ridge classifier after performing standard scaling
3331
std_scale_clf = make_pipeline(StandardScaler(), RidgeClassifier(**params))
3432
std_scale_clf.fit(X_train, y_train)
3533
y_pred_std_scale = std_scale_clf.predict(X_test)
3634

37-
mlflow.log_metrics({
38-
'accuracy': metrics.accuracy_score(y_test, y_pred_std_scale),
39-
'precision': metrics.precision_score(y_test, y_pred_std_scale, average='macro'),
40-
'f1': metrics.f1_score(y_test, y_pred_std_scale, average='macro'),
41-
'recall': metrics.recall_score(y_test, y_pred_std_scale, average='macro')
42-
})
35+
mlflow.log_metrics(
36+
{
37+
"accuracy": metrics.accuracy_score(y_test, y_pred_std_scale),
38+
"precision": metrics.precision_score(
39+
y_test, y_pred_std_scale, average="macro"
40+
),
41+
"f1": metrics.f1_score(y_test, y_pred_std_scale, average="macro"),
42+
"recall": metrics.recall_score(
43+
y_test, y_pred_std_scale, average="macro"
44+
),
45+
}
46+
)
4347

4448
mlflow.log_params(params)
4549

46-
# Log the sklearn model and register as version 1
50+
# Create a small input example and infer the model signature so MLflow records input/output schema
51+
# Use a few rows from the test set as an input example and infer signature from training input/output
52+
try:
53+
input_example = X_test[:5]
54+
signature = infer_signature(X_train, std_scale_clf.predict(X_train))
55+
except Exception:
56+
# Fallback: don't fail logging if signature inference fails
57+
input_example = None
58+
signature = None
59+
60+
# Log the sklearn model and register as version 1 (include signature and input example)
4761
mlflow.sklearn.log_model(
4862
sk_model=std_scale_clf,
49-
artifact_path="sklearn-model",
50-
registered_model_name="sk-learn-std-scale-clf"
63+
name="sklearn-model",
64+
registered_model_name="sk-learn-std-scale-clf",
65+
signature=signature,
66+
input_example=input_example,
5167
)
5268

5369
# Fetch specific model and version ...
5470
model_name = "sk-learn-std-scale-clf"
5571
model_version = 1
56-
model = mlflow.pyfunc.load_model(
57-
model_uri=f"models:/{model_name}/{model_version}"
58-
)
72+
model = mlflow.pyfunc.load_model(model_uri=f"models:/{model_name}/{model_version}")
5973
model.predict(X_test)
6074

6175
# Transition the model stage to 'Staging'
6276
client = MlflowClient()
6377
client.transition_model_version_stage(
64-
name="sk-learn-std-scale-clf",
65-
version=1,
66-
stage="Staging"
78+
name="sk-learn-std-scale-clf", version=1, stage="Staging"
6779
)
6880
# Transition the model stage to 'Production'
6981
client = MlflowClient()
7082
client.transition_model_version_stage(
71-
name="sk-learn-std-scale-clf",
72-
version=1,
73-
stage="Production"
83+
name="sk-learn-std-scale-clf", version=1, stage="Production"
7484
)
7585

7686
# Fetch model based on stage name ...
77-
stage = 'Production'
78-
model = mlflow.pyfunc.load_model(
79-
model_uri=f"models:/{model_name}/{stage}"
80-
)
87+
stage = "Production"
88+
model = mlflow.pyfunc.load_model(model_uri=f"models:/{model_name}/{stage}")
8189
# Search model versions of a given model name
8290
client = MlflowClient()
8391
for mv in client.search_model_versions("name='sk-learn-std-scale-clf'"):
@@ -86,10 +94,5 @@
8694
# Transition the model stage to 'Archived'
8795
client = MlflowClient()
8896
client.transition_model_version_stage(
89-
name="sk-learn-std-scale-clf",
90-
version=1,
91-
stage="Archived"
97+
name="sk-learn-std-scale-clf", version=1, stage="Archived"
9298
)
93-
94-
95-
220 KB
Binary file not shown.

Chapter03/mlflow-advanced/model_registry_retrieve.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,22 @@
33

44
import mlflow.pyfunc
55

6+
model_name = "sklearn-model"
7+
68
model_version_uri = "models:/{model_name}/1".format(model_name=model_name)
79

8-
print("Loading registered model version from URI: '{model_uri}'".format(model_uri=model_version_uri))
10+
print(
11+
"Loading registered model version from URI: '{model_uri}'".format(
12+
model_uri=model_version_uri
13+
)
14+
)
915
model_version_1 = mlflow.pyfunc.load_model(model_version_uri)
1016

1117
model_production_uri = "models:/{model_name}/production".format(model_name=model_name)
1218

13-
print("Loading registered model version from URI: '{model_uri}'".format(model_uri=model_production_uri))
14-
model_production = mlflow.pyfunc.load_model(model_production_uri)
19+
print(
20+
"Loading registered model version from URI: '{model_uri}'".format(
21+
model_uri=model_production_uri
22+
)
23+
)
24+
model_production = mlflow.pyfunc.load_model(model_production_uri)

0 commit comments

Comments
 (0)