|
| 1 | +{ |
| 2 | + "cells": [ |
| 3 | + { |
| 4 | + "cell_type": "markdown", |
| 5 | + "id": "ef9cbd78", |
| 6 | + "metadata": {}, |
| 7 | + "source": [ |
| 8 | + "# Prepare the environment for the notebook" |
| 9 | + ] |
| 10 | + }, |
| 11 | + { |
| 12 | + "cell_type": "code", |
| 13 | + "execution_count": null, |
| 14 | + "id": "b3fca639-7bf9-4199-b296-bfdccabe0b96", |
| 15 | + "metadata": {}, |
| 16 | + "outputs": [], |
| 17 | + "source": [ |
| 18 | + "# sklearn, joblib, s3fs already come with the notebook image\n", |
| 19 | + "# %pip install sklearn joblib s3fs" |
| 20 | + ] |
| 21 | + }, |
| 22 | + { |
| 23 | + "cell_type": "code", |
| 24 | + "execution_count": null, |
| 25 | + "id": "3e7aeeac-0aeb-48cb-988f-fb2e444d1d53", |
| 26 | + "metadata": {}, |
| 27 | + "outputs": [], |
| 28 | + "source": [ |
| 29 | + "INFERENCE_SERVICE_API_KEY = \"\" # If not known, ask the cluster administrator for the API Key that is used to access the deployed InferenceServices.\n", |
| 30 | + "if not INFERENCE_SERVICE_API_KEY:\n", |
| 31 | + " raise RuntimeError(\"Please provide the API Key that will be used to test the deployed InferenceService\")" |
| 32 | + ] |
| 33 | + }, |
| 34 | + { |
| 35 | + "cell_type": "markdown", |
| 36 | + "id": "81293c0f", |
| 37 | + "metadata": {}, |
| 38 | + "source": [ |
| 39 | + "# Create a small model to be deployed as InferenceService" |
| 40 | + ] |
| 41 | + }, |
| 42 | + { |
| 43 | + "cell_type": "code", |
| 44 | + "execution_count": null, |
| 45 | + "id": "4d69463a-cc14-4e7e-81a1-95f7d29d60ea", |
| 46 | + "metadata": {}, |
| 47 | + "outputs": [], |
| 48 | + "source": [ |
| 49 | + "from sklearn import svm, datasets\n", |
| 50 | + "from joblib import dump" |
| 51 | + ] |
| 52 | + }, |
| 53 | + { |
| 54 | + "cell_type": "code", |
| 55 | + "execution_count": null, |
| 56 | + "id": "696e48a2-c974-4b43-9fd7-bc06f254502d", |
| 57 | + "metadata": {}, |
| 58 | + "outputs": [], |
| 59 | + "source": [ |
| 60 | + "# Create a small model with iris dataset\n", |
| 61 | + "iris = datasets.load_iris()\n", |
| 62 | + "clf = svm.SVC(gamma='scale')\n", |
| 63 | + "clf.fit(iris.data, iris.target)\n", |
| 64 | + "dump(clf, 'model.joblib')\n", |
| 65 | + "print(\"Iris model file model.joblib created!\")" |
| 66 | + ] |
| 67 | + }, |
| 68 | + { |
| 69 | + "cell_type": "markdown", |
| 70 | + "id": "e382f93b", |
| 71 | + "metadata": {}, |
| 72 | + "source": [ |
| 73 | + "# Push the created model to s3 storage (MinIO)" |
| 74 | + ] |
| 75 | + }, |
| 76 | + { |
| 77 | + "cell_type": "code", |
| 78 | + "execution_count": null, |
| 79 | + "id": "e7924d92-9812-4865-a99e-82f595e33dae", |
| 80 | + "metadata": {}, |
| 81 | + "outputs": [], |
| 82 | + "source": [ |
| 83 | + "import os, s3fs # for uploading the created model to minio" |
| 84 | + ] |
| 85 | + }, |
| 86 | + { |
| 87 | + "cell_type": "code", |
| 88 | + "execution_count": null, |
| 89 | + "id": "f557ff09-cbf8-49c2-b6ad-26bb648cd458", |
| 90 | + "metadata": {}, |
| 91 | + "outputs": [], |
| 92 | + "source": [ |
| 93 | + "# The notebook is already setup with minio credentials for the bucket that start with <namespace>-data\n", |
| 94 | + "with open(\"/var/run/secrets/kubernetes.io/serviceaccount/namespace\", \"r\") as namespace_file:\n", |
| 95 | + " namespace = namespace_file.read()\n", |
| 96 | + "s3_bucket = f\"{namespace}-data\"\n", |
| 97 | + "s3_model_path = f\"{s3_bucket}/minimal-kserve-example\"\n", |
| 98 | + "print(f\"The created model will be uploaded to s3://{s3_model_path}\")" |
| 99 | + ] |
| 100 | + }, |
| 101 | + { |
| 102 | + "cell_type": "code", |
| 103 | + "execution_count": null, |
| 104 | + "id": "9dad0c7e-9718-4d76-ac0a-9ba7a843ca00", |
| 105 | + "metadata": {}, |
| 106 | + "outputs": [], |
| 107 | + "source": [ |
| 108 | + "# Steps for uploading the created model to MinIO\n", |
| 109 | + "# Get the required values from the environment that is set during notebook creation\n", |
| 110 | + "aws_access_key_id = os.getenv(\"AWS_ACCESS_KEY_ID\", \"\")\n", |
| 111 | + "aws_secret_access_key = os.getenv(\"AWS_SECRET_ACCESS_KEY\", \"\")\n", |
| 112 | + "s3_endpoint = os.getenv(\"S3_ENDPOINT\", \"minio.minio\")\n", |
| 113 | + "s3_use_https = os.getenv(\"S3_USE_HTTPS\", \"0\") == \"1\"\n", |
| 114 | + "s3_verify_ssl = os.getenv(\"S3_VERIFY_SSL\", \"0\") == \"1\"\n", |
| 115 | + "s3_endpoint = f\"http{'s' if s3_use_https else ''}://{s3_endpoint}\"\n", |
| 116 | + "if not aws_access_key_id or not aws_secret_access_key:\n", |
| 117 | + " raise ValueError(\"AWS credentials not found in environment variables.\")\n", |
| 118 | + "\n", |
| 119 | + "# Upload the model to MinIO\n", |
| 120 | + "s3 = s3fs.S3FileSystem(endpoint_url=s3_endpoint, key=aws_access_key_id, secret=aws_secret_access_key, use_ssl=s3_verify_ssl)\n", |
| 121 | + "s3.put(\"model.joblib\", f\"{s3_model_path}/model.joblib\")\n", |
| 122 | + "# List the bucket content to see if upload was successful\n", |
| 123 | + "s3.ls(s3_model_path)" |
| 124 | + ] |
| 125 | + }, |
| 126 | + { |
| 127 | + "cell_type": "markdown", |
| 128 | + "id": "0b965d76", |
| 129 | + "metadata": {}, |
| 130 | + "source": [ |
| 131 | + "# Create the InferenceService manifest that will use the uploaded model and deploy it to the cluster" |
| 132 | + ] |
| 133 | + }, |
| 134 | + { |
| 135 | + "cell_type": "code", |
| 136 | + "execution_count": null, |
| 137 | + "id": "bd593707-a0f5-47e8-a65b-73e467fb2c8c", |
| 138 | + "metadata": {}, |
| 139 | + "outputs": [], |
| 140 | + "source": [ |
| 141 | + "# Create the manifest for the kserve InferenceService\n", |
| 142 | + "inference_service_name = \"kserve-minio-test\"\n", |
| 143 | + "inference_service_manifest= \\\n", |
| 144 | + "f\"\"\"\n", |
| 145 | + "apiVersion: serving.kserve.io/v1beta1\n", |
| 146 | + "kind: InferenceService\n", |
| 147 | + "metadata:\n", |
| 148 | + " name: {inference_service_name}\n", |
| 149 | + " namespace: {namespace}\n", |
| 150 | + "spec:\n", |
| 151 | + " predictor:\n", |
| 152 | + " model:\n", |
| 153 | + " modelFormat:\n", |
| 154 | + " name: sklearn\n", |
| 155 | + " storageUri: s3://{s3_model_path}/model.joblib\n", |
| 156 | + "\n", |
| 157 | + "\"\"\"\n", |
| 158 | + "manifest_file_name=\"inferenceservice.yaml\"\n", |
| 159 | + "with open(manifest_file_name, \"w\") as manifest_file:\n", |
| 160 | + " manifest_file.write(inference_service_manifest)" |
| 161 | + ] |
| 162 | + }, |
| 163 | + { |
| 164 | + "cell_type": "code", |
| 165 | + "execution_count": null, |
| 166 | + "id": "949bfd04-1b67-4cc2-b3fd-1a37453ab8df", |
| 167 | + "metadata": {}, |
| 168 | + "outputs": [], |
| 169 | + "source": [ |
| 170 | + "# Use kubectl to apply the created manifest\n", |
| 171 | + "# Jupyter notebook replaces {variable} with actual python value\n", |
| 172 | + "!kubectl apply -f {manifest_file_name} # Apply the manifest" |
| 173 | + ] |
| 174 | + }, |
| 175 | + { |
| 176 | + "cell_type": "code", |
| 177 | + "execution_count": null, |
| 178 | + "id": "ebb6216a-831b-481c-9500-29c4fe7c615b", |
| 179 | + "metadata": {}, |
| 180 | + "outputs": [], |
| 181 | + "source": [ |
| 182 | + "!kubectl wait inferenceservice --for=condition=ready --timeout 300s --namespace {namespace} {inference_service_name} # Wait for the kserve InferenceService to be ready.\n", |
| 183 | + "!kubectl get inferenceservice --namespace {namespace} {inference_service_name}" |
| 184 | + ] |
| 185 | + }, |
| 186 | + { |
| 187 | + "cell_type": "markdown", |
| 188 | + "id": "b575cb6d", |
| 189 | + "metadata": {}, |
| 190 | + "source": [ |
| 191 | + "# Test the deployed InferenceService with a sample request" |
| 192 | + ] |
| 193 | + }, |
| 194 | + { |
| 195 | + "cell_type": "code", |
| 196 | + "execution_count": null, |
| 197 | + "id": "a5ab81fc-f9b2-4dd0-b969-58a7a2236420", |
| 198 | + "metadata": {}, |
| 199 | + "outputs": [], |
| 200 | + "source": [ |
| 201 | + "# Below, we use {{ and }} to escape the curly braces in the jsonpath expression so Jupyter notebook does not try to replace them with python variables\n", |
| 202 | + "inference_service_url = !kubectl get inferenceservice --namespace {namespace} {inference_service_name} -o jsonpath='{{.status.url}}' \n", |
| 203 | + "inference_service_url = inference_service_url[0] # Jupyter notebook shell command executions returns an array\n", |
| 204 | + "print(inference_service_url)" |
| 205 | + ] |
| 206 | + }, |
| 207 | + { |
| 208 | + "cell_type": "code", |
| 209 | + "execution_count": null, |
| 210 | + "id": "ce43d966-76fa-4f7a-8a4f-e085d090656d", |
| 211 | + "metadata": {}, |
| 212 | + "outputs": [], |
| 213 | + "source": [ |
| 214 | + "# Test the deployed InferenceService.\n", |
| 215 | + "# The deployed service is protected by an API Key.\n", |
| 216 | + "import requests\n", |
| 217 | + "response = requests.post(\n", |
| 218 | + " f\"{inference_service_url}/v1/models/{inference_service_name}:predict\",\n", |
| 219 | + " headers={\n", |
| 220 | + " \"X-Api-Key\": INFERENCE_SERVICE_API_KEY\n", |
| 221 | + " },\n", |
| 222 | + " json={\"instances\": [[6.8, 2.8, 4.8, 1.4], [5.1, 3.5, 1.4, 0.2]]} # an iris instance is [sepal_length, sepal_width, petal_length, petal_width]\n", |
| 223 | + ")\n", |
| 224 | + "print(response.json())" |
| 225 | + ] |
| 226 | + } |
| 227 | + ], |
| 228 | + "metadata": { |
| 229 | + "kernelspec": { |
| 230 | + "display_name": "Python 3 (ipykernel)", |
| 231 | + "language": "python", |
| 232 | + "name": "python3" |
| 233 | + }, |
| 234 | + "language_info": { |
| 235 | + "codemirror_mode": { |
| 236 | + "name": "ipython", |
| 237 | + "version": 3 |
| 238 | + }, |
| 239 | + "file_extension": ".py", |
| 240 | + "mimetype": "text/x-python", |
| 241 | + "name": "python", |
| 242 | + "nbconvert_exporter": "python", |
| 243 | + "pygments_lexer": "ipython3", |
| 244 | + "version": "3.11.10" |
| 245 | + } |
| 246 | + }, |
| 247 | + "nbformat": 4, |
| 248 | + "nbformat_minor": 5 |
| 249 | +} |
0 commit comments