Add minimal KServe + MinIO model serving example notebook

Korel · Korel · commit c2004b613d75 · 2026-02-18T15:49:41.000+01:00
* Train a simple sklearn SVM classifier on the Iris dataset and serialize it with joblib
* Upload the resulting `model.joblib` to a MinIO S3 bucket using `s3fs`
* Generate and apply a KServe `InferenceService` manifest pointing to the uploaded model
* Wait for the `InferenceService` to become ready and retrieve its URL
* Send a test prediction request to the deployed service using an API key
diff --git a/serving/minimal-s3-model/minimal-s3-model.ipynb b/serving/minimal-s3-model/minimal-s3-model.ipynb
@@ -0,0 +1,249 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "ef9cbd78",
+   "metadata": {},
+   "source": [
+    "# Prepare the environment for the notebook"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b3fca639-7bf9-4199-b296-bfdccabe0b96",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# sklearn, joblib, s3fs already come with the notebook image\n",
+    "# %pip install sklearn joblib s3fs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3e7aeeac-0aeb-48cb-988f-fb2e444d1d53",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "INFERENCE_SERVICE_API_KEY = \"\" # If not known, ask the cluster administrator for the API Key that is used to access the deployed InferenceServices.\n",
+    "if not INFERENCE_SERVICE_API_KEY:\n",
+    "    raise RuntimeError(\"Please provide the API Key that will be used to test the deployed InferenceService\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "81293c0f",
+   "metadata": {},
+   "source": [
+    "# Create a small model to be deployed as InferenceService"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4d69463a-cc14-4e7e-81a1-95f7d29d60ea",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn import svm, datasets\n",
+    "from joblib import dump"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "696e48a2-c974-4b43-9fd7-bc06f254502d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create a small model with iris dataset\n",
+    "iris = datasets.load_iris()\n",
+    "clf = svm.SVC(gamma='scale')\n",
+    "clf.fit(iris.data, iris.target)\n",
+    "dump(clf, 'model.joblib')\n",
+    "print(\"Iris model file model.joblib created!\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e382f93b",
+   "metadata": {},
+   "source": [
+    "# Push the created model to s3 storage (MinIO)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e7924d92-9812-4865-a99e-82f595e33dae",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os, s3fs # for uploading the created model to minio"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f557ff09-cbf8-49c2-b6ad-26bb648cd458",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# The notebook is already setup with minio credentials for the bucket that start with <namespace>-data\n",
+    "with open(\"/var/run/secrets/kubernetes.io/serviceaccount/namespace\", \"r\") as namespace_file:\n",
+    "    namespace = namespace_file.read()\n",
+    "s3_bucket = f\"{namespace}-data\"\n",
+    "s3_model_path = f\"{s3_bucket}/minimal-kserve-example\"\n",
+    "print(f\"The created model will be uploaded to s3://{s3_model_path}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9dad0c7e-9718-4d76-ac0a-9ba7a843ca00",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Steps for uploading the created model to MinIO\n",
+    "# Get the required values from the environment that is set during notebook creation\n",
+    "aws_access_key_id = os.getenv(\"AWS_ACCESS_KEY_ID\", \"\")\n",
+    "aws_secret_access_key = os.getenv(\"AWS_SECRET_ACCESS_KEY\", \"\")\n",
+    "s3_endpoint = os.getenv(\"S3_ENDPOINT\", \"minio.minio\")\n",
+    "s3_use_https = os.getenv(\"S3_USE_HTTPS\", \"0\") == \"1\"\n",
+    "s3_verify_ssl = os.getenv(\"S3_VERIFY_SSL\", \"0\") == \"1\"\n",
+    "s3_endpoint = f\"http{'s' if s3_use_https else ''}://{s3_endpoint}\"\n",
+    "if not aws_access_key_id or not aws_secret_access_key:\n",
+    "    raise ValueError(\"AWS credentials not found in environment variables.\")\n",
+    "\n",
+    "# Upload the model to MinIO\n",
+    "s3 = s3fs.S3FileSystem(endpoint_url=s3_endpoint, key=aws_access_key_id, secret=aws_secret_access_key, use_ssl=s3_verify_ssl)\n",
+    "s3.put(\"model.joblib\", f\"{s3_model_path}/model.joblib\")\n",
+    "# List the bucket content to see if upload was successful\n",
+    "s3.ls(s3_model_path)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0b965d76",
+   "metadata": {},
+   "source": [
+    "# Create the InferenceService manifest that will use the uploaded model and deploy it to the cluster"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bd593707-a0f5-47e8-a65b-73e467fb2c8c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create the manifest for the kserve InferenceService\n",
+    "inference_service_name = \"kserve-minio-test\"\n",
+    "inference_service_manifest= \\\n",
+    "f\"\"\"\n",
+    "apiVersion: serving.kserve.io/v1beta1\n",
+    "kind: InferenceService\n",
+    "metadata:\n",
+    "  name: {inference_service_name}\n",
+    "  namespace: {namespace}\n",
+    "spec:\n",
+    "  predictor:\n",
+    "    model:\n",
+    "      modelFormat:\n",
+    "        name: sklearn\n",
+    "      storageUri: s3://{s3_model_path}/model.joblib\n",
+    "\n",
+    "\"\"\"\n",
+    "manifest_file_name=\"inferenceservice.yaml\"\n",
+    "with open(manifest_file_name, \"w\") as manifest_file:\n",
+    "    manifest_file.write(inference_service_manifest)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "949bfd04-1b67-4cc2-b3fd-1a37453ab8df",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Use kubectl to apply the created manifest\n",
+    "# Jupyter notebook replaces {variable} with actual python value\n",
+    "!kubectl apply -f {manifest_file_name} # Apply the manifest"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ebb6216a-831b-481c-9500-29c4fe7c615b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!kubectl wait inferenceservice --for=condition=ready --timeout 300s --namespace {namespace} {inference_service_name} # Wait for the kserve InferenceService to be ready.\n",
+    "!kubectl get inferenceservice --namespace {namespace} {inference_service_name}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b575cb6d",
+   "metadata": {},
+   "source": [
+    "# Test the deployed InferenceService with a sample request"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a5ab81fc-f9b2-4dd0-b969-58a7a2236420",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Below, we use {{ and }} to escape the curly braces in the jsonpath expression so Jupyter notebook does not try to replace them with python variables\n",
+    "inference_service_url = !kubectl get inferenceservice --namespace {namespace} {inference_service_name} -o jsonpath='{{.status.url}}' \n",
+    "inference_service_url = inference_service_url[0] # Jupyter notebook shell command executions returns an array\n",
+    "print(inference_service_url)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ce43d966-76fa-4f7a-8a4f-e085d090656d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Test the deployed InferenceService.\n",
+    "# The deployed service is protected by an API Key.\n",
+    "import requests\n",
+    "response = requests.post(\n",
+    "    f\"{inference_service_url}/v1/models/{inference_service_name}:predict\",\n",
+    "    headers={\n",
+    "        \"X-Api-Key\": INFERENCE_SERVICE_API_KEY\n",
+    "    },\n",
+    "    json={\"instances\": [[6.8, 2.8, 4.8, 1.4], [5.1, 3.5, 1.4, 0.2]]} # an iris instance is [sepal_length, sepal_width, petal_length, petal_width]\n",
+    ")\n",
+    "print(response.json())"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}