Skip to content

Commit c2004b6

Browse files
committed
Add minimal KServe + MinIO model serving example notebook
* Train a simple sklearn SVM classifier on the Iris dataset and serialize it with joblib * Upload the resulting `model.joblib` to a MinIO S3 bucket using `s3fs` * Generate and apply a KServe `InferenceService` manifest pointing to the uploaded model * Wait for the `InferenceService` to become ready and retrieve its URL * Send a test prediction request to the deployed service using an API key
1 parent 9cde31f commit c2004b6

1 file changed

Lines changed: 249 additions & 0 deletions

File tree

Lines changed: 249 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,249 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"id": "ef9cbd78",
6+
"metadata": {},
7+
"source": [
8+
"# Prepare the environment for the notebook"
9+
]
10+
},
11+
{
12+
"cell_type": "code",
13+
"execution_count": null,
14+
"id": "b3fca639-7bf9-4199-b296-bfdccabe0b96",
15+
"metadata": {},
16+
"outputs": [],
17+
"source": [
18+
"# sklearn, joblib, s3fs already come with the notebook image\n",
19+
"# %pip install sklearn joblib s3fs"
20+
]
21+
},
22+
{
23+
"cell_type": "code",
24+
"execution_count": null,
25+
"id": "3e7aeeac-0aeb-48cb-988f-fb2e444d1d53",
26+
"metadata": {},
27+
"outputs": [],
28+
"source": [
29+
"INFERENCE_SERVICE_API_KEY = \"\" # If not known, ask the cluster administrator for the API Key that is used to access the deployed InferenceServices.\n",
30+
"if not INFERENCE_SERVICE_API_KEY:\n",
31+
" raise RuntimeError(\"Please provide the API Key that will be used to test the deployed InferenceService\")"
32+
]
33+
},
34+
{
35+
"cell_type": "markdown",
36+
"id": "81293c0f",
37+
"metadata": {},
38+
"source": [
39+
"# Create a small model to be deployed as InferenceService"
40+
]
41+
},
42+
{
43+
"cell_type": "code",
44+
"execution_count": null,
45+
"id": "4d69463a-cc14-4e7e-81a1-95f7d29d60ea",
46+
"metadata": {},
47+
"outputs": [],
48+
"source": [
49+
"from sklearn import svm, datasets\n",
50+
"from joblib import dump"
51+
]
52+
},
53+
{
54+
"cell_type": "code",
55+
"execution_count": null,
56+
"id": "696e48a2-c974-4b43-9fd7-bc06f254502d",
57+
"metadata": {},
58+
"outputs": [],
59+
"source": [
60+
"# Create a small model with iris dataset\n",
61+
"iris = datasets.load_iris()\n",
62+
"clf = svm.SVC(gamma='scale')\n",
63+
"clf.fit(iris.data, iris.target)\n",
64+
"dump(clf, 'model.joblib')\n",
65+
"print(\"Iris model file model.joblib created!\")"
66+
]
67+
},
68+
{
69+
"cell_type": "markdown",
70+
"id": "e382f93b",
71+
"metadata": {},
72+
"source": [
73+
"# Push the created model to s3 storage (MinIO)"
74+
]
75+
},
76+
{
77+
"cell_type": "code",
78+
"execution_count": null,
79+
"id": "e7924d92-9812-4865-a99e-82f595e33dae",
80+
"metadata": {},
81+
"outputs": [],
82+
"source": [
83+
"import os, s3fs # for uploading the created model to minio"
84+
]
85+
},
86+
{
87+
"cell_type": "code",
88+
"execution_count": null,
89+
"id": "f557ff09-cbf8-49c2-b6ad-26bb648cd458",
90+
"metadata": {},
91+
"outputs": [],
92+
"source": [
93+
"# The notebook is already setup with minio credentials for the bucket that start with <namespace>-data\n",
94+
"with open(\"/var/run/secrets/kubernetes.io/serviceaccount/namespace\", \"r\") as namespace_file:\n",
95+
" namespace = namespace_file.read()\n",
96+
"s3_bucket = f\"{namespace}-data\"\n",
97+
"s3_model_path = f\"{s3_bucket}/minimal-kserve-example\"\n",
98+
"print(f\"The created model will be uploaded to s3://{s3_model_path}\")"
99+
]
100+
},
101+
{
102+
"cell_type": "code",
103+
"execution_count": null,
104+
"id": "9dad0c7e-9718-4d76-ac0a-9ba7a843ca00",
105+
"metadata": {},
106+
"outputs": [],
107+
"source": [
108+
"# Steps for uploading the created model to MinIO\n",
109+
"# Get the required values from the environment that is set during notebook creation\n",
110+
"aws_access_key_id = os.getenv(\"AWS_ACCESS_KEY_ID\", \"\")\n",
111+
"aws_secret_access_key = os.getenv(\"AWS_SECRET_ACCESS_KEY\", \"\")\n",
112+
"s3_endpoint = os.getenv(\"S3_ENDPOINT\", \"minio.minio\")\n",
113+
"s3_use_https = os.getenv(\"S3_USE_HTTPS\", \"0\") == \"1\"\n",
114+
"s3_verify_ssl = os.getenv(\"S3_VERIFY_SSL\", \"0\") == \"1\"\n",
115+
"s3_endpoint = f\"http{'s' if s3_use_https else ''}://{s3_endpoint}\"\n",
116+
"if not aws_access_key_id or not aws_secret_access_key:\n",
117+
" raise ValueError(\"AWS credentials not found in environment variables.\")\n",
118+
"\n",
119+
"# Upload the model to MinIO\n",
120+
"s3 = s3fs.S3FileSystem(endpoint_url=s3_endpoint, key=aws_access_key_id, secret=aws_secret_access_key, use_ssl=s3_verify_ssl)\n",
121+
"s3.put(\"model.joblib\", f\"{s3_model_path}/model.joblib\")\n",
122+
"# List the bucket content to see if upload was successful\n",
123+
"s3.ls(s3_model_path)"
124+
]
125+
},
126+
{
127+
"cell_type": "markdown",
128+
"id": "0b965d76",
129+
"metadata": {},
130+
"source": [
131+
"# Create the InferenceService manifest that will use the uploaded model and deploy it to the cluster"
132+
]
133+
},
134+
{
135+
"cell_type": "code",
136+
"execution_count": null,
137+
"id": "bd593707-a0f5-47e8-a65b-73e467fb2c8c",
138+
"metadata": {},
139+
"outputs": [],
140+
"source": [
141+
"# Create the manifest for the kserve InferenceService\n",
142+
"inference_service_name = \"kserve-minio-test\"\n",
143+
"inference_service_manifest= \\\n",
144+
"f\"\"\"\n",
145+
"apiVersion: serving.kserve.io/v1beta1\n",
146+
"kind: InferenceService\n",
147+
"metadata:\n",
148+
" name: {inference_service_name}\n",
149+
" namespace: {namespace}\n",
150+
"spec:\n",
151+
" predictor:\n",
152+
" model:\n",
153+
" modelFormat:\n",
154+
" name: sklearn\n",
155+
" storageUri: s3://{s3_model_path}/model.joblib\n",
156+
"\n",
157+
"\"\"\"\n",
158+
"manifest_file_name=\"inferenceservice.yaml\"\n",
159+
"with open(manifest_file_name, \"w\") as manifest_file:\n",
160+
" manifest_file.write(inference_service_manifest)"
161+
]
162+
},
163+
{
164+
"cell_type": "code",
165+
"execution_count": null,
166+
"id": "949bfd04-1b67-4cc2-b3fd-1a37453ab8df",
167+
"metadata": {},
168+
"outputs": [],
169+
"source": [
170+
"# Use kubectl to apply the created manifest\n",
171+
"# Jupyter notebook replaces {variable} with actual python value\n",
172+
"!kubectl apply -f {manifest_file_name} # Apply the manifest"
173+
]
174+
},
175+
{
176+
"cell_type": "code",
177+
"execution_count": null,
178+
"id": "ebb6216a-831b-481c-9500-29c4fe7c615b",
179+
"metadata": {},
180+
"outputs": [],
181+
"source": [
182+
"!kubectl wait inferenceservice --for=condition=ready --timeout 300s --namespace {namespace} {inference_service_name} # Wait for the kserve InferenceService to be ready.\n",
183+
"!kubectl get inferenceservice --namespace {namespace} {inference_service_name}"
184+
]
185+
},
186+
{
187+
"cell_type": "markdown",
188+
"id": "b575cb6d",
189+
"metadata": {},
190+
"source": [
191+
"# Test the deployed InferenceService with a sample request"
192+
]
193+
},
194+
{
195+
"cell_type": "code",
196+
"execution_count": null,
197+
"id": "a5ab81fc-f9b2-4dd0-b969-58a7a2236420",
198+
"metadata": {},
199+
"outputs": [],
200+
"source": [
201+
"# Below, we use {{ and }} to escape the curly braces in the jsonpath expression so Jupyter notebook does not try to replace them with python variables\n",
202+
"inference_service_url = !kubectl get inferenceservice --namespace {namespace} {inference_service_name} -o jsonpath='{{.status.url}}' \n",
203+
"inference_service_url = inference_service_url[0] # Jupyter notebook shell command executions returns an array\n",
204+
"print(inference_service_url)"
205+
]
206+
},
207+
{
208+
"cell_type": "code",
209+
"execution_count": null,
210+
"id": "ce43d966-76fa-4f7a-8a4f-e085d090656d",
211+
"metadata": {},
212+
"outputs": [],
213+
"source": [
214+
"# Test the deployed InferenceService.\n",
215+
"# The deployed service is protected by an API Key.\n",
216+
"import requests\n",
217+
"response = requests.post(\n",
218+
" f\"{inference_service_url}/v1/models/{inference_service_name}:predict\",\n",
219+
" headers={\n",
220+
" \"X-Api-Key\": INFERENCE_SERVICE_API_KEY\n",
221+
" },\n",
222+
" json={\"instances\": [[6.8, 2.8, 4.8, 1.4], [5.1, 3.5, 1.4, 0.2]]} # an iris instance is [sepal_length, sepal_width, petal_length, petal_width]\n",
223+
")\n",
224+
"print(response.json())"
225+
]
226+
}
227+
],
228+
"metadata": {
229+
"kernelspec": {
230+
"display_name": "Python 3 (ipykernel)",
231+
"language": "python",
232+
"name": "python3"
233+
},
234+
"language_info": {
235+
"codemirror_mode": {
236+
"name": "ipython",
237+
"version": 3
238+
},
239+
"file_extension": ".py",
240+
"mimetype": "text/x-python",
241+
"name": "python",
242+
"nbconvert_exporter": "python",
243+
"pygments_lexer": "ipython3",
244+
"version": "3.11.10"
245+
}
246+
},
247+
"nbformat": 4,
248+
"nbformat_minor": 5
249+
}

0 commit comments

Comments
 (0)