From 7ff7efd847a789bbc1428d596a3ab12ae11bc42a Mon Sep 17 00:00:00 2001 From: Bruno Tafur Date: Mon, 19 Jan 2026 17:13:34 +0000 Subject: [PATCH 1/5] Add LLMOps basic template for end-to-end ML pipeline with Unity Catalog and MLflow --- contrib/templates/llmops-basic/README.md | 10 + .../databricks_template_schema.json | 35 ++ .../llmops-basic/template/.gitignore | 35 ++ .../llmops-basic/template/README.md.tmpl | 45 ++ .../llmops-basic/template/databricks.yml.tmpl | 44 ++ .../template/notebooks/model_build.ipynb | 276 ++++++++++++ .../notebooks/model_endpoint_deploy.ipynb | 118 ++++++ .../template/notebooks/model_evaluation.ipynb | 396 ++++++++++++++++++ .../template/notebooks/model_inference.ipynb | 243 +++++++++++ .../notebooks/model_preprocessing.ipynb | 222 ++++++++++ .../template/notebooks/quickstart_agent.py | 74 ++++ .../resources/model_artifacts.yml.tmpl | 21 + .../model_build_evaluation.job.yml.tmpl | 32 ++ .../model_endpoint_deploy.job.yml.tmpl | 25 ++ .../resources/model_inference.job.yml.tmpl | 25 ++ .../model_preprocessing.job.yml.tmpl | 21 + llmops-basic/.gitignore | 35 ++ llmops-basic/README.md | 45 ++ llmops-basic/databricks.yml | 43 ++ llmops-basic/notebooks/model_build.ipynb | 276 ++++++++++++ .../notebooks/model_endpoint_deploy.ipynb | 118 ++++++ llmops-basic/notebooks/model_evaluation.ipynb | 396 ++++++++++++++++++ llmops-basic/notebooks/model_inference.ipynb | 243 +++++++++++ .../notebooks/model_preprocessing.ipynb | 222 ++++++++++ llmops-basic/notebooks/quickstart_agent.py | 74 ++++ llmops-basic/resources/model_artifacts.yml | 21 + .../resources/model_build_evaluation.job.yml | 32 ++ .../resources/model_endpoint_deploy.job.yml | 25 ++ .../resources/model_inference.job.yml | 25 ++ .../resources/model_preprocessing.job.yml | 21 + 30 files changed, 3198 insertions(+) create mode 100644 contrib/templates/llmops-basic/README.md create mode 100644 contrib/templates/llmops-basic/databricks_template_schema.json create mode 100644 contrib/templates/llmops-basic/template/.gitignore create mode 100644 contrib/templates/llmops-basic/template/README.md.tmpl create mode 100644 contrib/templates/llmops-basic/template/databricks.yml.tmpl create mode 100644 contrib/templates/llmops-basic/template/notebooks/model_build.ipynb create mode 100644 contrib/templates/llmops-basic/template/notebooks/model_endpoint_deploy.ipynb create mode 100644 contrib/templates/llmops-basic/template/notebooks/model_evaluation.ipynb create mode 100644 contrib/templates/llmops-basic/template/notebooks/model_inference.ipynb create mode 100644 contrib/templates/llmops-basic/template/notebooks/model_preprocessing.ipynb create mode 100644 contrib/templates/llmops-basic/template/notebooks/quickstart_agent.py create mode 100644 contrib/templates/llmops-basic/template/resources/model_artifacts.yml.tmpl create mode 100644 contrib/templates/llmops-basic/template/resources/model_build_evaluation.job.yml.tmpl create mode 100644 contrib/templates/llmops-basic/template/resources/model_endpoint_deploy.job.yml.tmpl create mode 100644 contrib/templates/llmops-basic/template/resources/model_inference.job.yml.tmpl create mode 100644 contrib/templates/llmops-basic/template/resources/model_preprocessing.job.yml.tmpl create mode 100644 llmops-basic/.gitignore create mode 100644 llmops-basic/README.md create mode 100644 llmops-basic/databricks.yml create mode 100644 llmops-basic/notebooks/model_build.ipynb create mode 100644 llmops-basic/notebooks/model_endpoint_deploy.ipynb create mode 100644 llmops-basic/notebooks/model_evaluation.ipynb create mode 100644 llmops-basic/notebooks/model_inference.ipynb create mode 100644 llmops-basic/notebooks/model_preprocessing.ipynb create mode 100644 llmops-basic/notebooks/quickstart_agent.py create mode 100644 llmops-basic/resources/model_artifacts.yml create mode 100644 llmops-basic/resources/model_build_evaluation.job.yml create mode 100644 llmops-basic/resources/model_endpoint_deploy.job.yml create mode 100644 llmops-basic/resources/model_inference.job.yml create mode 100644 llmops-basic/resources/model_preprocessing.job.yml diff --git a/contrib/templates/llmops-basic/README.md b/contrib/templates/llmops-basic/README.md new file mode 100644 index 0000000..dbf0c2a --- /dev/null +++ b/contrib/templates/llmops-basic/README.md @@ -0,0 +1,10 @@ +# LLMOps basic template + +This template introduces a base structure for organizing LLMOps projects +using DABs, Unity Catalog and MLflow. + +Install it using + +``` +databricks bundle init https://github.com/databricks/bundle-examples --template-dir contrib/templates/data-engineering +``` \ No newline at end of file diff --git a/contrib/templates/llmops-basic/databricks_template_schema.json b/contrib/templates/llmops-basic/databricks_template_schema.json new file mode 100644 index 0000000..7162166 --- /dev/null +++ b/contrib/templates/llmops-basic/databricks_template_schema.json @@ -0,0 +1,35 @@ +{ + "properties": { + "project_name": { + "type": "string", + "default": "llmops_basic", + "description": "Name of the LLMOps project (use underscores, no hyphens)", + "order": 1 + }, + "catalog_name_dev": { + "type": "string", + "default": "dev_catalog", + "description": "Name of the Unity Catalog for development environment", + "order": 2 + }, + "catalog_name_prod": { + "type": "string", + "default": "prod_catalog", + "description": "Name of the Unity Catalog for production environment", + "order": 3 + }, + "workspace_host_dev": { + "type": "string", + "default": "https://your-workspace.azuredatabricks.net/", + "description": "Databricks workspace URL for development", + "order": 4 + }, + "workspace_host_prod": { + "type": "string", + "default": "https://your-workspace.azuredatabricks.net/", + "description": "Databricks workspace URL for production", + "order": 5 + } + } +} + diff --git a/contrib/templates/llmops-basic/template/.gitignore b/contrib/templates/llmops-basic/template/.gitignore new file mode 100644 index 0000000..dc1fd6c --- /dev/null +++ b/contrib/templates/llmops-basic/template/.gitignore @@ -0,0 +1,35 @@ +# Databricks +.databricks/ +.bundle/ + +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +venv/ +env/ +ENV/ + +# Jupyter +.ipynb_checkpoints/ +*.ipynb_checkpoints + +# IDEs +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# OS +.DS_Store +Thumbs.db + +# MLflow +mlruns/ + +# Logs +*.log + diff --git a/contrib/templates/llmops-basic/template/README.md.tmpl b/contrib/templates/llmops-basic/template/README.md.tmpl new file mode 100644 index 0000000..464124c --- /dev/null +++ b/contrib/templates/llmops-basic/template/README.md.tmpl @@ -0,0 +1,45 @@ +# {{.project_name}} + +End-to-end LLMOps project for sentiment analysis of reviews using Databricks. + +## Overview + +This example demonstrates a complete LLMOps pipeline for building, evaluating, and deploying a sentiment analysis model. + +**Pipeline stages:** +- Data preprocessing +- Model training with MLflow +- Model evaluation +- Model deployment to serving endpoint +- Batch inference + +## Requirements + +- Databricks CLI (v0.218.0+) +- Unity Catalog enabled +- Required permissions on the catalog: + - `USE CATALOG` - to access the catalog + - `CREATE SCHEMA` - to create schemas + +## Quick Start + +**Deploy:** +```bash +databricks bundle deploy -t dev +``` + +**Run pipeline:** +```bash +databricks bundle run model_preprocessing -t dev +databricks bundle run model_build_evaluation -t dev +databricks bundle run model_endpoint_deploy -t dev +databricks bundle run model_inference -t dev +``` + +## Configuration + +- **Dev Catalog**: `{{.catalog_name_dev}}` +- **Prod Catalog**: `{{.catalog_name_prod}}` + +Edit `databricks.yml` to customize catalog names, schema name, model name, and experiment settings. + diff --git a/contrib/templates/llmops-basic/template/databricks.yml.tmpl b/contrib/templates/llmops-basic/template/databricks.yml.tmpl new file mode 100644 index 0000000..c521ed9 --- /dev/null +++ b/contrib/templates/llmops-basic/template/databricks.yml.tmpl @@ -0,0 +1,44 @@ +# This is a Databricks Asset Bundle for {{.project_name}}. +bundle: + name: "{{.project_name}}" + +variables: + catalog_name: + description: "Name of the UC catalog to use" + default: "default_catalog" + schema_name: + description: "Name of the UC schema to use" + default: "sentiment_agent_project" + model_name: + description: "Name of the UC model to use" + default: "sentiment_agent" + experiment_name: + description: "Name of experiment" + default: "/Users/${workspace.current_user.userName}/${bundle.target}_sentiment_agent" + +include: + - resources/*.yml + +targets: + dev: + mode: development + default: true + workspace: + host: {{.workspace_host_dev}} + variables: + catalog_name: {{.catalog_name_dev}} + + prod: + mode: production + workspace: + host: {{.workspace_host_prod}} + root_path: /Shared/.bundle/prod/${bundle.name} + variables: + catalog_name: {{.catalog_name_prod}} + {{- if not is_service_principal}} + run_as: + # This runs as {{user_name}} in production. Alternatively, + # a service principal could be used here using service_principal_name + # (see Databricks documentation). + user_name: {{user_name}} + {{end -}} diff --git a/contrib/templates/llmops-basic/template/notebooks/model_build.ipynb b/contrib/templates/llmops-basic/template/notebooks/model_build.ipynb new file mode 100644 index 0000000..81365c2 --- /dev/null +++ b/contrib/templates/llmops-basic/template/notebooks/model_build.ipynb @@ -0,0 +1,276 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "64ed089a-931a-4123-afdc-a2c38390774c", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "%pip install -U -qqqq mlflow databricks-openai databricks-agents\n", + "dbutils.library.restartPython()" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "7f453049-63d9-46b2-8421-92f8c6a5442a", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "dbutils.widgets.text(\"catalog_name\", \"btafur_catalog\")\n", + "dbutils.widgets.text(\"schema_name\", \"default\")\n", + "dbutils.widgets.text(\"model_name\", \"quickstart_agent\")\n", + "\n", + "\n", + "catalog_name = dbutils.widgets.get(\"catalog_name\")\n", + "schema_name = dbutils.widgets.get(\"schema_name\")\n", + "model_name = dbutils.widgets.get(\"model_name\")\n", + "dbutils.widgets.text(\"experiment_name\", f\"/Users/{dbutils.notebook.entry_point.getDbutils().notebook().getContext().userName().get()}/{model_name}_{catalog_name}\")\n", + "experiment_name = dbutils.widgets.get(\"experiment_name\")" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "114153c0-b167-451b-a2ac-6d4f48faeaec", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "import mlflow\n", + "\n", + "registered_model_name = f\"{catalog_name}.{schema_name}.{model_name}\"\n", + "\n", + "mlflow.set_experiment(experiment_name)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "154fbd28-5ad2-4a51-8ccf-fa850dffc5bd", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "from mlflow.models.resources import DatabricksFunction, DatabricksServingEndpoint\n", + "from pkg_resources import get_distribution\n", + "from quickstart_agent import LLM_ENDPOINT_NAME\n", + "import datetime\n", + "\n", + "mlflow.set_registry_uri(\"databricks-uc\")\n", + "resources = [\n", + " DatabricksServingEndpoint(endpoint_name=LLM_ENDPOINT_NAME),\n", + "]\n", + "\n", + "timestamp = datetime.datetime.now().strftime(\"%Y%m%d_%H%M%S\")\n", + "\n", + "with mlflow.start_run(run_name=f\"sentiment_agent_{timestamp}\"):\n", + " logged_model_info = mlflow.pyfunc.log_model(\n", + " artifact_path=\"agent\",\n", + " python_model=\"quickstart_agent.py\",\n", + " extra_pip_requirements=[\n", + " f\"databricks-connect=={get_distribution('databricks-connect').version}\"\n", + " ],\n", + " resources=resources\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "ea348fb4-9a9f-4772-aaf1-65a646ca5739", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "dbutils.jobs.taskValues.set(key = \"logged_run_id\", value = logged_model_info.run_id)" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "computePreferences": { + "hardware": { + "accelerator": null, + "gpuPoolId": null, + "memory": null + } + }, + "dashboards": [], + "environmentMetadata": { + "base_environment": "", + "environment_version": "3" + }, + "inputWidgetPreferences": null, + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "model_build", + "widgets": { + "catalog_name": { + "currentValue": "btafur_catalog", + "nuid": "fd954ea2-6905-4526-b686-38b43c5b6d97", + "typedWidgetInfo": { + "autoCreated": false, + "defaultValue": "btafur_catalog", + "label": null, + "name": "catalog_name", + "options": { + "validationRegex": null, + "widgetDisplayType": "Text" + }, + "parameterDataType": "String" + }, + "widgetInfo": { + "defaultValue": "btafur_catalog", + "label": null, + "name": "catalog_name", + "options": { + "autoCreated": null, + "validationRegex": null, + "widgetType": "text" + }, + "widgetType": "text" + } + }, + "experiment_name": { + "currentValue": "", + "nuid": "818e024c-c654-4a9c-9f36-7e595b9a9e5f", + "typedWidgetInfo": { + "autoCreated": false, + "defaultValue": "tee", + "label": null, + "name": "experiment_name", + "options": { + "validationRegex": null, + "widgetDisplayType": "Text" + }, + "parameterDataType": "String" + }, + "widgetInfo": { + "defaultValue": "tee", + "label": null, + "name": "experiment_name", + "options": { + "autoCreated": null, + "validationRegex": null, + "widgetType": "text" + }, + "widgetType": "text" + } + }, + "model_name": { + "currentValue": "quickstart_agent", + "nuid": "93dd77ea-7b35-410c-af71-589d260ae712", + "typedWidgetInfo": { + "autoCreated": false, + "defaultValue": "quickstart_agent", + "label": null, + "name": "model_name", + "options": { + "validationRegex": null, + "widgetDisplayType": "Text" + }, + "parameterDataType": "String" + }, + "widgetInfo": { + "defaultValue": "quickstart_agent", + "label": null, + "name": "model_name", + "options": { + "autoCreated": null, + "validationRegex": null, + "widgetType": "text" + }, + "widgetType": "text" + } + }, + "schema_name": { + "currentValue": "default", + "nuid": "88bd61ef-aa5c-42a2-ad26-f9079c1afc5f", + "typedWidgetInfo": { + "autoCreated": false, + "defaultValue": "default", + "label": null, + "name": "schema_name", + "options": { + "validationRegex": null, + "widgetDisplayType": "Text" + }, + "parameterDataType": "String" + }, + "widgetInfo": { + "defaultValue": "default", + "label": null, + "name": "schema_name", + "options": { + "autoCreated": null, + "validationRegex": null, + "widgetType": "text" + }, + "widgetType": "text" + } + } + } + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/contrib/templates/llmops-basic/template/notebooks/model_endpoint_deploy.ipynb b/contrib/templates/llmops-basic/template/notebooks/model_endpoint_deploy.ipynb new file mode 100644 index 0000000..a411e34 --- /dev/null +++ b/contrib/templates/llmops-basic/template/notebooks/model_endpoint_deploy.ipynb @@ -0,0 +1,118 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "28525745-4fb6-4667-92f1-1e0e3fa68da1", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "%pip install -U -qqqq mlflow databricks-openai databricks-agents\n", + "dbutils.library.restartPython()" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "b2519e1e-ef7e-439c-ab55-5c17e77c8b25", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "dbutils.widgets.text(\"catalog_name\", \"btafur_catalog\")\n", + "dbutils.widgets.text(\"schema_name\", \"default\")\n", + "dbutils.widgets.text(\"model_name\", \"quickstart_agent\")\n", + "\n", + "catalog_name = dbutils.widgets.get(\"catalog_name\")\n", + "schema_name = dbutils.widgets.get(\"schema_name\")\n", + "model_name = dbutils.widgets.get(\"model_name\")\n", + "dbutils.widgets.text(\"experiment_name\", f\"/Users/{dbutils.notebook.entry_point.getDbutils().notebook().getContext().userName().get()}/{model_name}_{catalog_name}\")\n", + "experiment_name = dbutils.widgets.get(\"experiment_name\")\n", + "\n", + "import mlflow\n", + "\n", + "mlflow.set_experiment(experiment_name)\n", + "\n", + "registered_model_name = f\"{catalog_name}.{schema_name}.{model_name}\"" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "b2a5f820-740b-4e9d-8496-4b54c3350156", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "#https://docs.databricks.com/aws/en/generative-ai/agent-framework/deploy-agent#deploy-an-agent-using-deploy\n", + "\n", + "from mlflow import MlflowClient\n", + "from databricks.agents import get_deployments, delete_deployment\n", + "from databricks import agents\n", + "\n", + "client = MlflowClient()\n", + "\n", + "registered_model_version = client.get_model_version_by_alias(registered_model_name, \"Champion\")\n", + "\n", + "deployments = get_deployments(model_name=registered_model_name)\n", + "\n", + "latest_version = registered_model_version.version\n", + "\n", + "for deployment in deployments:\n", + " if deployment.model_name == registered_model_name:\n", + " print(f\"Deleting deployment: model={deployment.model_name}, version={deployment.model_version}\")\n", + " delete_deployment(model_name=deployment.model_name, model_version=deployment.model_version)\n", + "\n", + "\n", + "deployment_info = agents.deploy(\n", + " model_name=registered_model_name,\n", + " model_version=registered_model_version.version,\n", + ")" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "computePreferences": null, + "dashboards": [], + "environmentMetadata": { + "base_environment": "", + "environment_version": "3" + }, + "inputWidgetPreferences": null, + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "model_endpoint_deploy", + "widgets": {} + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/contrib/templates/llmops-basic/template/notebooks/model_evaluation.ipynb b/contrib/templates/llmops-basic/template/notebooks/model_evaluation.ipynb new file mode 100644 index 0000000..b303f32 --- /dev/null +++ b/contrib/templates/llmops-basic/template/notebooks/model_evaluation.ipynb @@ -0,0 +1,396 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "be63921b-4820-4872-a708-bf5722e0f8e7", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "%pip install -U -qqqq mlflow databricks-openai databricks-agents threadpoolctl==3.1.0\n", + "dbutils.library.restartPython()" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "8af65798-0acc-4263-bc35-9212653d3987", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "dbutils.widgets.text(\"catalog_name\", \"btafur_catalog\")\n", + "dbutils.widgets.text(\"schema_name\", \"default\")\n", + "dbutils.widgets.text(\"model_name\", \"quickstart_agent\")\n", + "dbutils.widgets.text(\"logged_run_id\", \"None\")\n", + "\n", + "catalog_name = dbutils.widgets.get(\"catalog_name\")\n", + "schema_name = dbutils.widgets.get(\"schema_name\")\n", + "model_name = dbutils.widgets.get(\"model_name\")\n", + "logged_run_id = dbutils.widgets.get(\"logged_run_id\")\n", + "\n", + "dbutils.widgets.text(\"experiment_name\", f\"/Users/{dbutils.notebook.entry_point.getDbutils().notebook().getContext().userName().get()}/{model_name}_{catalog_name}\")\n", + "experiment_name = dbutils.widgets.get(\"experiment_name\")" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "fec9759b-b50d-4611-be17-d3b51a3041a6", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "import mlflow\n", + "import mlflow.genai.datasets\n", + "import time\n", + "from databricks.connect import DatabricksSession\n", + "\n", + "registered_model_name = f\"{catalog_name}.{schema_name}.{model_name}\"\n", + "model_uri = f\"runs:/{logged_run_id}/agent\"\n", + "\n", + "mlflow.set_experiment(experiment_name)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "f0a33257-a9da-4f65-8c8a-657612d6d634", + "showTitle": false, + "tableResultSettingsMap": { + "0": { + "dataGridStateBlob": "{\"version\":1,\"tableState\":{\"columnPinning\":{\"left\":[\"#row_number#\"],\"right\":[]},\"columnSizing\":{},\"columnVisibility\":{}},\"settings\":{\"columns\":{}},\"syncTimestamp\":1756760635746}", + "filterBlob": null, + "queryPlanFiltersBlob": null, + "tableResultIndex": 0 + } + }, + "title": "" + } + }, + "outputs": [], + "source": [ + "query = f\"SELECT * FROM {catalog_name}.{schema_name}.labelled_sentences\"\n", + "df = spark.sql(query).toPandas()\n", + "display(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "0c997521-a041-46ed-8b37-b0b9ddf67df3", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "import mlflow\n", + "from mlflow.genai.scorers import Guidelines, Correctness, RelevanceToQuery\n", + "import datetime\n", + "\n", + "eval_data = []\n", + "for _, row in df.iterrows():\n", + " eval_item = {\n", + " \"inputs\": {\n", + " \"content\": row[\"review\"] # This matches the function parameter name\n", + " },\n", + " \"expectations\": {\n", + " \"expected_response\": str(row[\"sentiment\"]) # Adjust column name\n", + " }\n", + " }\n", + " eval_data.append(eval_item)\n", + "\n", + "mlflow_eval_dataset = None\n", + " \n", + "try:\n", + " # Try to get existing dataset\n", + " mlflow_eval_dataset = mlflow.genai.get_dataset(f\"{catalog_name}.{schema_name}.mlflow_eval_dataset\")\n", + " print(f\"✓ Using existing dataset: {catalog_name}.{schema_name}.mlflow_eval_dataset\")\n", + " \n", + "except Exception as get_error:\n", + " print(f\"Dataset not found, creating dataset\")\n", + " \n", + " try:\n", + " # Create new dataset\n", + " mlflow_eval_dataset = mlflow.genai.create_dataset(f\"{catalog_name}.{schema_name}.mlflow_eval_dataset\")\n", + " mlflow_eval_dataset.merge_records(eval_data)\n", + " dataset_created = True\n", + " print(f\"✓ Created new dataset: {catalog_name}.{schema_name}.mlflow_eval_dataset\")\n", + " \n", + " except Exception as create_error:\n", + " print(f\"⚠ Could not create dataset {create_error}\")\n", + "\n", + "if (mlflow_eval_dataset is not None): \n", + " \n", + " guidelines = {\n", + " \"sentiment_accuracy\": \"Response must correctly identify sentiment\",\n", + " \"clarity\": [\"Response must be clear and concise\"]\n", + " }\n", + "\n", + " agent = mlflow.pyfunc.load_model(model_uri)\n", + "\n", + " def predict_function(content):\n", + " try:\n", + " messages = [\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": content\n", + " }\n", + " ]\n", + " prediction = agent.predict({\"messages\": messages})\n", + " return prediction\n", + " except Exception as e:\n", + " print(f\"Error in predict_fn: {e}\")\n", + " return {\"response\": \"PREDICTION_ERROR\"}\n", + " \n", + " timestamp = datetime.datetime.now().strftime(\"%Y%m%d_%H%M%S\")\n", + " with mlflow.start_run(run_id=logged_run_id) as run:\n", + " with mlflow.start_run(run_name=f\"sentiment_agent_evaluation_{timestamp}\", nested=True) as eval_run:\n", + " results = mlflow.genai.evaluate(\n", + " data=mlflow_eval_dataset,\n", + " predict_fn=predict_function,\n", + " scorers=[\n", + " Correctness(),\n", + " Guidelines(name=\"sentiment_accuracy\", guidelines=guidelines[\"sentiment_accuracy\"]),\n", + " Guidelines(name=\"clarity\", guidelines=guidelines[\"clarity\"]),\n", + " ],\n", + " )\n", + "\n", + " # Print results\n", + " print(\"Evaluation Results:\")\n", + " print(results.metrics)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "281e07cf-0163-453a-9256-c508b37cd073", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "from mlflow import MlflowClient\n", + "client = MlflowClient()\n", + "\n", + "registered_model = mlflow.register_model(model_uri, name=registered_model_name)\n", + "\n", + "if results.metrics['sentiment_accuracy/mean'] > 0.9:\n", + " print(\"Transitioning to champion\")\n", + " client.set_registered_model_alias(registered_model_name, \"Champion\",registered_model.version)" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "computePreferences": { + "hardware": { + "accelerator": null, + "gpuPoolId": null, + "memory": null + } + }, + "dashboards": [], + "environmentMetadata": { + "base_environment": "", + "environment_version": "3" + }, + "inputWidgetPreferences": null, + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "model_evaluation", + "widgets": { + "catalog_name": { + "currentValue": "btafur_catalog", + "nuid": "3a964089-b210-4dc6-aa79-c7c287530d31", + "typedWidgetInfo": { + "autoCreated": false, + "defaultValue": "btafur_catalog", + "label": null, + "name": "catalog_name", + "options": { + "widgetDisplayType": "Text", + "validationRegex": null + }, + "parameterDataType": "String" + }, + "widgetInfo": { + "widgetType": "text", + "defaultValue": "btafur_catalog", + "label": null, + "name": "catalog_name", + "options": { + "widgetType": "text", + "autoCreated": null, + "validationRegex": null + } + } + }, + "experiment_name": { + "currentValue": "/Users/bruno.tafur@databricks.com/quickstart_agent_btafur_catalog", + "nuid": "d243520b-4f07-4843-b294-0e3b3489d85b", + "typedWidgetInfo": { + "autoCreated": false, + "defaultValue": "/Users/bruno.tafur@databricks.com/quickstart_agent_btafur_catalog", + "label": null, + "name": "experiment_name", + "options": { + "widgetDisplayType": "Text", + "validationRegex": null + }, + "parameterDataType": "String" + }, + "widgetInfo": { + "widgetType": "text", + "defaultValue": "/Users/bruno.tafur@databricks.com/quickstart_agent_btafur_catalog", + "label": null, + "name": "experiment_name", + "options": { + "widgetType": "text", + "autoCreated": null, + "validationRegex": null + } + } + }, + "logged_run_id": { + "currentValue": "None", + "nuid": "970893b9-068c-4fff-af42-e9b346618f94", + "typedWidgetInfo": { + "autoCreated": false, + "defaultValue": "None", + "label": null, + "name": "logged_run_id", + "options": { + "widgetDisplayType": "Text", + "validationRegex": null + }, + "parameterDataType": "String" + }, + "widgetInfo": { + "widgetType": "text", + "defaultValue": "None", + "label": null, + "name": "logged_run_id", + "options": { + "widgetType": "text", + "autoCreated": false, + "validationRegex": null + } + } + }, + "model_name": { + "currentValue": "quickstart_agent", + "nuid": "06926edb-a96f-4f02-a702-f7c542e5ef3c", + "typedWidgetInfo": { + "autoCreated": false, + "defaultValue": "quickstart_agent", + "label": null, + "name": "model_name", + "options": { + "widgetDisplayType": "Text", + "validationRegex": null + }, + "parameterDataType": "String" + }, + "widgetInfo": { + "widgetType": "text", + "defaultValue": "quickstart_agent", + "label": null, + "name": "model_name", + "options": { + "widgetType": "text", + "autoCreated": null, + "validationRegex": null + } + } + }, + "schema_name": { + "currentValue": "default", + "nuid": "febc8306-c467-4694-9bfe-6467bb3558a2", + "typedWidgetInfo": { + "autoCreated": false, + "defaultValue": "default", + "label": null, + "name": "schema_name", + "options": { + "widgetDisplayType": "Text", + "validationRegex": null + }, + "parameterDataType": "String" + }, + "widgetInfo": { + "widgetType": "text", + "defaultValue": "default", + "label": null, + "name": "schema_name", + "options": { + "widgetType": "text", + "autoCreated": null, + "validationRegex": null + } + } + } + } + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/contrib/templates/llmops-basic/template/notebooks/model_inference.ipynb b/contrib/templates/llmops-basic/template/notebooks/model_inference.ipynb new file mode 100644 index 0000000..fc8fe43 --- /dev/null +++ b/contrib/templates/llmops-basic/template/notebooks/model_inference.ipynb @@ -0,0 +1,243 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "be63921b-4820-4872-a708-bf5722e0f8e7", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "%pip install -U -qqqq mlflow databricks-openai databricks-agents\n", + "dbutils.library.restartPython()" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "138d2537-42eb-4fae-82b8-e11021a8a85b", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "dbutils.widgets.text(\"catalog_name\", \"btafur_catalog\")\n", + "dbutils.widgets.text(\"schema_name\", \"default\")\n", + "dbutils.widgets.text(\"model_name\", \"quickstart_agent\")\n", + "catalog_name = dbutils.widgets.get(\"catalog_name\")\n", + "schema_name = dbutils.widgets.get(\"schema_name\")\n", + "model_name = dbutils.widgets.get(\"model_name\")\n", + "\n", + "dbutils.widgets.text(\"experiment_name\", f\"/Users/{dbutils.notebook.entry_point.getDbutils().notebook().getContext().userName().get()}/{model_name}_{catalog_name}\")\n", + "experiment_name = dbutils.widgets.get(\"experiment_name\")" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "fec9759b-b50d-4611-be17-d3b51a3041a6", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "import mlflow\n", + "import mlflow.genai.datasets\n", + "import time\n", + "from databricks.connect import DatabricksSession\n", + "\n", + "mlflow.set_experiment(experiment_name)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "40bbaf35-a61d-439f-9bf0-b160a96223ab", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "query = f\"SELECT * FROM {catalog_name}.{schema_name}.sentences\"\n", + "df = spark.sql(query).toPandas()" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "0c997521-a041-46ed-8b37-b0b9ddf67df3", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "\n", + "model_uri = f\"models:/{catalog_name}.{schema_name}.{model_name}@champion\"\n", + "agent = mlflow.pyfunc.load_model(model_uri)\n", + "\n", + "def predict_sentiment(review):\n", + " messages = [{\n", + " \"role\": \"user\",\n", + " \"content\": f\"{review}\"\n", + " }]\n", + " return agent.predict({\"messages\": messages})\n", + "\n", + "df['predictions'] = df['review'].apply(predict_sentiment)\n", + "display(df)\n" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "computePreferences": { + "hardware": { + "accelerator": null, + "gpuPoolId": null, + "memory": null + } + }, + "dashboards": [], + "environmentMetadata": { + "base_environment": "", + "environment_version": "3" + }, + "inputWidgetPreferences": null, + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "model_inference", + "widgets": { + "catalog_name": { + "currentValue": "btafur_catalog", + "nuid": "74cb3499-b670-4a9b-a326-ad9c7a804698", + "typedWidgetInfo": { + "autoCreated": false, + "defaultValue": "btafur_catalog", + "label": null, + "name": "catalog_name", + "options": { + "widgetDisplayType": "Text", + "validationRegex": null + }, + "parameterDataType": "String" + }, + "widgetInfo": { + "widgetType": "text", + "defaultValue": "btafur_catalog", + "label": null, + "name": "catalog_name", + "options": { + "widgetType": "text", + "autoCreated": false, + "validationRegex": null + } + } + }, + "model_name": { + "currentValue": "quickstart_agent", + "nuid": "bfb488ca-a384-4fb5-a7ab-d05661a9d52d", + "typedWidgetInfo": { + "autoCreated": false, + "defaultValue": "quickstart_agent", + "label": null, + "name": "model_name", + "options": { + "widgetDisplayType": "Text", + "validationRegex": null + }, + "parameterDataType": "String" + }, + "widgetInfo": { + "widgetType": "text", + "defaultValue": "quickstart_agent", + "label": null, + "name": "model_name", + "options": { + "widgetType": "text", + "autoCreated": false, + "validationRegex": null + } + } + }, + "schema_name": { + "currentValue": "default", + "nuid": "1b42e31d-1ff1-4bf6-8a06-b55c595e45a9", + "typedWidgetInfo": { + "autoCreated": false, + "defaultValue": "default", + "label": null, + "name": "schema_name", + "options": { + "widgetDisplayType": "Text", + "validationRegex": null + }, + "parameterDataType": "String" + }, + "widgetInfo": { + "widgetType": "text", + "defaultValue": "default", + "label": null, + "name": "schema_name", + "options": { + "widgetType": "text", + "autoCreated": false, + "validationRegex": null + } + } + } + } + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/contrib/templates/llmops-basic/template/notebooks/model_preprocessing.ipynb b/contrib/templates/llmops-basic/template/notebooks/model_preprocessing.ipynb new file mode 100644 index 0000000..3b99c12 --- /dev/null +++ b/contrib/templates/llmops-basic/template/notebooks/model_preprocessing.ipynb @@ -0,0 +1,222 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "97addce4-2562-486a-bc4c-2b39a8dd9756", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "source": [ + "## Generating inference data if it doesn't exist" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "c6242b0e-2772-410e-b7e9-b0bebe58d018", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "dbutils.widgets.text(\"catalog_name\", \"btafur_catalog\")\n", + "dbutils.widgets.text(\"schema_name\", \"default\")\n", + "catalog_name = dbutils.widgets.get(\"catalog_name\")\n", + "schema_name = dbutils.widgets.get(\"schema_name\")" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "1e892951-2eef-40d1-b3f8-0a3be588a4d9", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "# Create the table with two columns: ID and review\n", + "spark.sql(f\"CREATE OR REPLACE TABLE {catalog_name}.{schema_name}.sentences (ID INT, review STRING)\")\n", + "\n", + "# Insert 200 values into the table with proper movie review text\n", + "movie_reviews = [\n", + " \"A thrilling masterpiece with stunning visuals.\",\n", + " \"An emotional rollercoaster that captivates from start to finish.\",\n", + " \"A lackluster plot with underwhelming performances.\",\n", + " \"A heartwarming tale that resonates with audiences of all ages.\",\n", + " \"A groundbreaking film that redefines the genre.\",\n", + " \"A predictable storyline with clichéd characters.\",\n", + " \"A visually stunning film with a compelling narrative.\",\n", + " \"An inspiring story of hope and resilience.\",\n", + " \"A forgettable movie with a weak script.\",\n", + " \"A charming and delightful film that entertains throughout.\"\n", + "]\n", + "\n", + "# Repeat the reviews to fill 200 entries\n", + "reviews_to_insert = (movie_reviews * (200 // len(movie_reviews) + 1))[:200]\n", + "\n", + "spark.sql(f\"INSERT INTO {catalog_name}.{schema_name}.sentences (ID, review) VALUES\" + \", \".join([f\"({i}, '{reviews_to_insert[i-1]}')\" for i in range(1, 201)]) + \";\")" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "34cc0494-a537-4e1d-933e-a123da2d70b2", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "display(spark.read.table(f\"{catalog_name}.{schema_name}.sentences\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "a2ef7511-a2d9-4259-b890-fda7a99182e8", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "source": [ + "## Generating labelled data if it doesn't exist" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "9d9f0618-fa45-4541-a0ed-b50cfe5257a6", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "# Create the labelled table with an additional sentiment column\n", + "spark.sql(f\"CREATE OR REPLACE TABLE {catalog_name}.{schema_name}.labelled_sentences AS SELECT ID, review, ai_analyze_sentiment(review) AS sentiment FROM {catalog_name}.{schema_name}.sentences\")\n", + "\n", + "# Display the new table\n", + "display(spark.read.table(f\"{catalog_name}.{schema_name}.labelled_sentences\"))\n" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "computePreferences": null, + "dashboards": [], + "environmentMetadata": { + "base_environment": "", + "environment_version": "3" + }, + "inputWidgetPreferences": null, + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "model_preprocessing", + "widgets": { + "catalog_name": { + "currentValue": "btafur_catalog", + "nuid": "c27b5912-2ca9-46c0-ae51-7f7dcf693134", + "typedWidgetInfo": { + "autoCreated": false, + "defaultValue": "btafur_catalog", + "label": null, + "name": "catalog_name", + "options": { + "widgetDisplayType": "Text", + "validationRegex": null + }, + "parameterDataType": "String" + }, + "widgetInfo": { + "widgetType": "text", + "defaultValue": "btafur_catalog", + "label": null, + "name": "catalog_name", + "options": { + "widgetType": "text", + "autoCreated": null, + "validationRegex": null + } + } + }, + "schema_name": { + "currentValue": "default", + "nuid": "25f2fe7e-e320-4793-a8cb-2fe969ec7d13", + "typedWidgetInfo": { + "autoCreated": false, + "defaultValue": "default", + "label": null, + "name": "schema_name", + "options": { + "widgetDisplayType": "Text", + "validationRegex": null + }, + "parameterDataType": "String" + }, + "widgetInfo": { + "widgetType": "text", + "defaultValue": "default", + "label": null, + "name": "schema_name", + "options": { + "widgetType": "text", + "autoCreated": false, + "validationRegex": null + } + } + } + } + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/contrib/templates/llmops-basic/template/notebooks/quickstart_agent.py b/contrib/templates/llmops-basic/template/notebooks/quickstart_agent.py new file mode 100644 index 0000000..9fb782f --- /dev/null +++ b/contrib/templates/llmops-basic/template/notebooks/quickstart_agent.py @@ -0,0 +1,74 @@ +import json +import uuid +from databricks.sdk import WorkspaceClient +from databricks_openai import UCFunctionToolkit, DatabricksFunctionClient +from typing import Any, Optional, Iterator + +import mlflow +from mlflow.pyfunc import ChatAgent +from mlflow.types.agent import ChatAgentMessage, ChatAgentResponse, ChatContext + +# Get an OpenAI client configured to talk to Databricks model serving endpoints +# We'll use this to query an LLM in our agent +openai_client = WorkspaceClient().serving_endpoints.get_open_ai_client() + +# The snippet below tries to pick the first LLM API available in your Databricks workspace +# from a set of candidates. You can override and simplify it +# to just specify LLM_ENDPOINT_NAME. +LLM_ENDPOINT_NAME = "databricks-claude-3-7-sonnet" + +# Enable automatic tracing of LLM calls +mlflow.openai.autolog() + + +@mlflow.trace +def run_agent(content, system_prompt=None): + """ + Send a user prompt to the LLM, and return a list of LLM response messages + The LLM is allowed to call the code interpreter tool if needed, to respond to the user + """ + messages = [] + if system_prompt: + messages.append({"role": "system", "content": system_prompt}) + + messages.append({"role": "user", "content": content}) + + response = openai_client.chat.completions.create( + model=LLM_ENDPOINT_NAME, + messages=messages, + ) + msg = response.choices[0].message + return [msg.to_dict()] + + +class QuickstartAgent(ChatAgent): + def __init__(self): + super().__init__() + self.system_prompt = ( + "You are a sentiment analysis expert. Analyze text sentiment " + "and respond with exactly one word: positive, neutral, or negative." + ) + + def predict( + self, + messages: list[ChatAgentMessage], + context: Optional[ChatContext] = None, + custom_inputs: Optional[dict[str, Any]] = None, + ) -> ChatAgentResponse: + message = messages[-1].content + user_prompt = f"Analyze this phrase: {message}" + raw_msgs = run_agent( + content=user_prompt, + system_prompt=self.system_prompt + ) + out = [] + for m in raw_msgs: + out.append(ChatAgentMessage( + id=uuid.uuid4().hex, + **m + )) + return ChatAgentResponse(messages=out) + + +AGENT = QuickstartAgent() +mlflow.models.set_model(AGENT) diff --git a/contrib/templates/llmops-basic/template/resources/model_artifacts.yml.tmpl b/contrib/templates/llmops-basic/template/resources/model_artifacts.yml.tmpl new file mode 100644 index 0000000..bcb7891 --- /dev/null +++ b/contrib/templates/llmops-basic/template/resources/model_artifacts.yml.tmpl @@ -0,0 +1,21 @@ +resources: + registered_models: + sentiment_model: + name: ${var.model_name} + catalog_name: ${var.catalog_name} + schema_name: ${resources.schemas.sentiment_schema.name} + comment: Registered model in Unity Catalog for ${bundle.target} deployment target + grants: + - privileges: + - EXECUTE + principal: account users + + experiments: + experiment: + name: ${var.experiment_name} + + schemas: + sentiment_schema: + name: ${var.schema_name} + catalog_name: ${var.catalog_name} + diff --git a/contrib/templates/llmops-basic/template/resources/model_build_evaluation.job.yml.tmpl b/contrib/templates/llmops-basic/template/resources/model_build_evaluation.job.yml.tmpl new file mode 100644 index 0000000..5633175 --- /dev/null +++ b/contrib/templates/llmops-basic/template/resources/model_build_evaluation.job.yml.tmpl @@ -0,0 +1,32 @@ +resources: + jobs: + model_build_evaluation: + name: model_build_evaluation + queue: + enabled: true + + parameters: + - name: "catalog_name" + default: "${var.catalog_name}" + - name: "schema_name" + default: "${resources.schemas.sentiment_schema.name}" + - name: "model_name" + default: "${resources.registered_models.sentiment_model.name}" + - name: "experiment_name" + default: "${resources.experiments.experiment.name}" + + tasks: + - task_key: model_build + notebook_task: + notebook_path: "../notebooks/model_build.ipynb" + + - task_key: model_evaluation + depends_on: + - task_key: model_build + notebook_task: + notebook_path: "../notebooks/model_evaluation.ipynb" + base_parameters: + logged_run_id: "{{"{{"}}tasks.model_build.values.logged_run_id{{"}}"}}" + tags: + Project: "${bundle.name}" + diff --git a/contrib/templates/llmops-basic/template/resources/model_endpoint_deploy.job.yml.tmpl b/contrib/templates/llmops-basic/template/resources/model_endpoint_deploy.job.yml.tmpl new file mode 100644 index 0000000..a357d7a --- /dev/null +++ b/contrib/templates/llmops-basic/template/resources/model_endpoint_deploy.job.yml.tmpl @@ -0,0 +1,25 @@ +resources: + jobs: + model_deploy: + name: model_endpoint_deploy + queue: + enabled: true + + parameters: + - name: "catalog_name" + default: "${var.catalog_name}" + - name: "schema_name" + default: "${resources.schemas.sentiment_schema.name}" + - name: "model_name" + default: "${resources.registered_models.sentiment_model.name}" + - name: "experiment_name" + default: "${resources.experiments.experiment.name}" + + tasks: + - task_key: model_endpoint_deploy + notebook_task: + notebook_path: "../notebooks/model_endpoint_deploy.ipynb" + + tags: + Project: "${bundle.name}" + diff --git a/contrib/templates/llmops-basic/template/resources/model_inference.job.yml.tmpl b/contrib/templates/llmops-basic/template/resources/model_inference.job.yml.tmpl new file mode 100644 index 0000000..5246476 --- /dev/null +++ b/contrib/templates/llmops-basic/template/resources/model_inference.job.yml.tmpl @@ -0,0 +1,25 @@ +resources: + jobs: + model_inference: + name: model_inference + queue: + enabled: true + + parameters: + - name: "catalog_name" + default: "${var.catalog_name}" + - name: "schema_name" + default: "${resources.schemas.sentiment_schema.name}" + - name: "model_name" + default: "${resources.registered_models.sentiment_model.name}" + - name: "experiment_name" + default: "${resources.experiments.experiment.name}" + + tasks: + - task_key: model_inference + notebook_task: + notebook_path: "../notebooks/model_inference.ipynb" + + tags: + Project: "${bundle.name}" + diff --git a/contrib/templates/llmops-basic/template/resources/model_preprocessing.job.yml.tmpl b/contrib/templates/llmops-basic/template/resources/model_preprocessing.job.yml.tmpl new file mode 100644 index 0000000..d359088 --- /dev/null +++ b/contrib/templates/llmops-basic/template/resources/model_preprocessing.job.yml.tmpl @@ -0,0 +1,21 @@ +resources: + jobs: + model_preprocessing: + name: model_preprocessing + queue: + enabled: true + + parameters: + - name: "catalog_name" + default: "${var.catalog_name}" + - name: "schema_name" + default: "${resources.schemas.sentiment_schema.name}" + + tasks: + - task_key: model_preprocessing + notebook_task: + notebook_path: "../notebooks/model_preprocessing.ipynb" + + tags: + Project: "${bundle.name}" + diff --git a/llmops-basic/.gitignore b/llmops-basic/.gitignore new file mode 100644 index 0000000..dc1fd6c --- /dev/null +++ b/llmops-basic/.gitignore @@ -0,0 +1,35 @@ +# Databricks +.databricks/ +.bundle/ + +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +venv/ +env/ +ENV/ + +# Jupyter +.ipynb_checkpoints/ +*.ipynb_checkpoints + +# IDEs +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# OS +.DS_Store +Thumbs.db + +# MLflow +mlruns/ + +# Logs +*.log + diff --git a/llmops-basic/README.md b/llmops-basic/README.md new file mode 100644 index 0000000..4d14c3d --- /dev/null +++ b/llmops-basic/README.md @@ -0,0 +1,45 @@ +# llmops_basic + +End-to-end LLMOps project for sentiment analysis of reviews using Databricks. + +## Overview + +This example demonstrates a complete LLMOps pipeline for building, evaluating, and deploying a sentiment analysis model. + +**Pipeline stages:** +- Data preprocessing +- Model training with MLflow +- Model evaluation +- Model deployment to serving endpoint +- Batch inference + +## Requirements + +- Databricks CLI (v0.218.0+) +- Unity Catalog enabled +- Required permissions on the catalog: + - `USE CATALOG` - to access the catalog + - `CREATE SCHEMA` - to create schemas + +## Quick Start + +**Deploy:** +```bash +databricks bundle deploy -t dev +``` + +**Run pipeline:** +```bash +databricks bundle run model_preprocessing -t dev +databricks bundle run model_build_evaluation -t dev +databricks bundle run model_endpoint_deploy -t dev +databricks bundle run model_inference -t dev +``` + +## Configuration + +- **Dev Catalog**: `dev_catalog` +- **Prod Catalog**: `prod_catalog` + +Edit `databricks.yml` to customize catalog names, schema name, model name, and experiment settings. + diff --git a/llmops-basic/databricks.yml b/llmops-basic/databricks.yml new file mode 100644 index 0000000..6fd1c8b --- /dev/null +++ b/llmops-basic/databricks.yml @@ -0,0 +1,43 @@ +# This is a Databricks Asset Bundle for llmops_basic. +bundle: + name: "llmops_basic" + +variables: + catalog_name: + description: "Name of the UC catalog to use" + default: "default_catalog" + schema_name: + description: "Name of the UC schema to use" + default: "sentiment_agent_project" + model_name: + description: "Name of the UC model to use" + default: "sentiment_agent" + experiment_name: + description: "Name of experiment" + default: "/Users/${workspace.current_user.userName}/${bundle.target}_sentiment_agent" + +include: + - resources/*.yml + +targets: + dev: + mode: development + default: true + workspace: + host: https://your-workspace.azuredatabricks.net/ + variables: + catalog_name: dev_catalog + + prod: + mode: production + workspace: + host: https://your-workspace.azuredatabricks.net/ + root_path: /Shared/.bundle/prod/${bundle.name} + variables: + catalog_name: prod_catalog + run_as: + # This runs as bruno.tafur@databricks.com in production. Alternatively, + # a service principal could be used here using service_principal_name + # (see Databricks documentation). + user_name: bruno.tafur@databricks.com + \ No newline at end of file diff --git a/llmops-basic/notebooks/model_build.ipynb b/llmops-basic/notebooks/model_build.ipynb new file mode 100644 index 0000000..81365c2 --- /dev/null +++ b/llmops-basic/notebooks/model_build.ipynb @@ -0,0 +1,276 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "64ed089a-931a-4123-afdc-a2c38390774c", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "%pip install -U -qqqq mlflow databricks-openai databricks-agents\n", + "dbutils.library.restartPython()" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "7f453049-63d9-46b2-8421-92f8c6a5442a", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "dbutils.widgets.text(\"catalog_name\", \"btafur_catalog\")\n", + "dbutils.widgets.text(\"schema_name\", \"default\")\n", + "dbutils.widgets.text(\"model_name\", \"quickstart_agent\")\n", + "\n", + "\n", + "catalog_name = dbutils.widgets.get(\"catalog_name\")\n", + "schema_name = dbutils.widgets.get(\"schema_name\")\n", + "model_name = dbutils.widgets.get(\"model_name\")\n", + "dbutils.widgets.text(\"experiment_name\", f\"/Users/{dbutils.notebook.entry_point.getDbutils().notebook().getContext().userName().get()}/{model_name}_{catalog_name}\")\n", + "experiment_name = dbutils.widgets.get(\"experiment_name\")" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "114153c0-b167-451b-a2ac-6d4f48faeaec", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "import mlflow\n", + "\n", + "registered_model_name = f\"{catalog_name}.{schema_name}.{model_name}\"\n", + "\n", + "mlflow.set_experiment(experiment_name)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "154fbd28-5ad2-4a51-8ccf-fa850dffc5bd", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "from mlflow.models.resources import DatabricksFunction, DatabricksServingEndpoint\n", + "from pkg_resources import get_distribution\n", + "from quickstart_agent import LLM_ENDPOINT_NAME\n", + "import datetime\n", + "\n", + "mlflow.set_registry_uri(\"databricks-uc\")\n", + "resources = [\n", + " DatabricksServingEndpoint(endpoint_name=LLM_ENDPOINT_NAME),\n", + "]\n", + "\n", + "timestamp = datetime.datetime.now().strftime(\"%Y%m%d_%H%M%S\")\n", + "\n", + "with mlflow.start_run(run_name=f\"sentiment_agent_{timestamp}\"):\n", + " logged_model_info = mlflow.pyfunc.log_model(\n", + " artifact_path=\"agent\",\n", + " python_model=\"quickstart_agent.py\",\n", + " extra_pip_requirements=[\n", + " f\"databricks-connect=={get_distribution('databricks-connect').version}\"\n", + " ],\n", + " resources=resources\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "ea348fb4-9a9f-4772-aaf1-65a646ca5739", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "dbutils.jobs.taskValues.set(key = \"logged_run_id\", value = logged_model_info.run_id)" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "computePreferences": { + "hardware": { + "accelerator": null, + "gpuPoolId": null, + "memory": null + } + }, + "dashboards": [], + "environmentMetadata": { + "base_environment": "", + "environment_version": "3" + }, + "inputWidgetPreferences": null, + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "model_build", + "widgets": { + "catalog_name": { + "currentValue": "btafur_catalog", + "nuid": "fd954ea2-6905-4526-b686-38b43c5b6d97", + "typedWidgetInfo": { + "autoCreated": false, + "defaultValue": "btafur_catalog", + "label": null, + "name": "catalog_name", + "options": { + "validationRegex": null, + "widgetDisplayType": "Text" + }, + "parameterDataType": "String" + }, + "widgetInfo": { + "defaultValue": "btafur_catalog", + "label": null, + "name": "catalog_name", + "options": { + "autoCreated": null, + "validationRegex": null, + "widgetType": "text" + }, + "widgetType": "text" + } + }, + "experiment_name": { + "currentValue": "", + "nuid": "818e024c-c654-4a9c-9f36-7e595b9a9e5f", + "typedWidgetInfo": { + "autoCreated": false, + "defaultValue": "tee", + "label": null, + "name": "experiment_name", + "options": { + "validationRegex": null, + "widgetDisplayType": "Text" + }, + "parameterDataType": "String" + }, + "widgetInfo": { + "defaultValue": "tee", + "label": null, + "name": "experiment_name", + "options": { + "autoCreated": null, + "validationRegex": null, + "widgetType": "text" + }, + "widgetType": "text" + } + }, + "model_name": { + "currentValue": "quickstart_agent", + "nuid": "93dd77ea-7b35-410c-af71-589d260ae712", + "typedWidgetInfo": { + "autoCreated": false, + "defaultValue": "quickstart_agent", + "label": null, + "name": "model_name", + "options": { + "validationRegex": null, + "widgetDisplayType": "Text" + }, + "parameterDataType": "String" + }, + "widgetInfo": { + "defaultValue": "quickstart_agent", + "label": null, + "name": "model_name", + "options": { + "autoCreated": null, + "validationRegex": null, + "widgetType": "text" + }, + "widgetType": "text" + } + }, + "schema_name": { + "currentValue": "default", + "nuid": "88bd61ef-aa5c-42a2-ad26-f9079c1afc5f", + "typedWidgetInfo": { + "autoCreated": false, + "defaultValue": "default", + "label": null, + "name": "schema_name", + "options": { + "validationRegex": null, + "widgetDisplayType": "Text" + }, + "parameterDataType": "String" + }, + "widgetInfo": { + "defaultValue": "default", + "label": null, + "name": "schema_name", + "options": { + "autoCreated": null, + "validationRegex": null, + "widgetType": "text" + }, + "widgetType": "text" + } + } + } + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/llmops-basic/notebooks/model_endpoint_deploy.ipynb b/llmops-basic/notebooks/model_endpoint_deploy.ipynb new file mode 100644 index 0000000..a411e34 --- /dev/null +++ b/llmops-basic/notebooks/model_endpoint_deploy.ipynb @@ -0,0 +1,118 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "28525745-4fb6-4667-92f1-1e0e3fa68da1", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "%pip install -U -qqqq mlflow databricks-openai databricks-agents\n", + "dbutils.library.restartPython()" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "b2519e1e-ef7e-439c-ab55-5c17e77c8b25", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "dbutils.widgets.text(\"catalog_name\", \"btafur_catalog\")\n", + "dbutils.widgets.text(\"schema_name\", \"default\")\n", + "dbutils.widgets.text(\"model_name\", \"quickstart_agent\")\n", + "\n", + "catalog_name = dbutils.widgets.get(\"catalog_name\")\n", + "schema_name = dbutils.widgets.get(\"schema_name\")\n", + "model_name = dbutils.widgets.get(\"model_name\")\n", + "dbutils.widgets.text(\"experiment_name\", f\"/Users/{dbutils.notebook.entry_point.getDbutils().notebook().getContext().userName().get()}/{model_name}_{catalog_name}\")\n", + "experiment_name = dbutils.widgets.get(\"experiment_name\")\n", + "\n", + "import mlflow\n", + "\n", + "mlflow.set_experiment(experiment_name)\n", + "\n", + "registered_model_name = f\"{catalog_name}.{schema_name}.{model_name}\"" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "b2a5f820-740b-4e9d-8496-4b54c3350156", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "#https://docs.databricks.com/aws/en/generative-ai/agent-framework/deploy-agent#deploy-an-agent-using-deploy\n", + "\n", + "from mlflow import MlflowClient\n", + "from databricks.agents import get_deployments, delete_deployment\n", + "from databricks import agents\n", + "\n", + "client = MlflowClient()\n", + "\n", + "registered_model_version = client.get_model_version_by_alias(registered_model_name, \"Champion\")\n", + "\n", + "deployments = get_deployments(model_name=registered_model_name)\n", + "\n", + "latest_version = registered_model_version.version\n", + "\n", + "for deployment in deployments:\n", + " if deployment.model_name == registered_model_name:\n", + " print(f\"Deleting deployment: model={deployment.model_name}, version={deployment.model_version}\")\n", + " delete_deployment(model_name=deployment.model_name, model_version=deployment.model_version)\n", + "\n", + "\n", + "deployment_info = agents.deploy(\n", + " model_name=registered_model_name,\n", + " model_version=registered_model_version.version,\n", + ")" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "computePreferences": null, + "dashboards": [], + "environmentMetadata": { + "base_environment": "", + "environment_version": "3" + }, + "inputWidgetPreferences": null, + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "model_endpoint_deploy", + "widgets": {} + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/llmops-basic/notebooks/model_evaluation.ipynb b/llmops-basic/notebooks/model_evaluation.ipynb new file mode 100644 index 0000000..b303f32 --- /dev/null +++ b/llmops-basic/notebooks/model_evaluation.ipynb @@ -0,0 +1,396 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "be63921b-4820-4872-a708-bf5722e0f8e7", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "%pip install -U -qqqq mlflow databricks-openai databricks-agents threadpoolctl==3.1.0\n", + "dbutils.library.restartPython()" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "8af65798-0acc-4263-bc35-9212653d3987", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "dbutils.widgets.text(\"catalog_name\", \"btafur_catalog\")\n", + "dbutils.widgets.text(\"schema_name\", \"default\")\n", + "dbutils.widgets.text(\"model_name\", \"quickstart_agent\")\n", + "dbutils.widgets.text(\"logged_run_id\", \"None\")\n", + "\n", + "catalog_name = dbutils.widgets.get(\"catalog_name\")\n", + "schema_name = dbutils.widgets.get(\"schema_name\")\n", + "model_name = dbutils.widgets.get(\"model_name\")\n", + "logged_run_id = dbutils.widgets.get(\"logged_run_id\")\n", + "\n", + "dbutils.widgets.text(\"experiment_name\", f\"/Users/{dbutils.notebook.entry_point.getDbutils().notebook().getContext().userName().get()}/{model_name}_{catalog_name}\")\n", + "experiment_name = dbutils.widgets.get(\"experiment_name\")" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "fec9759b-b50d-4611-be17-d3b51a3041a6", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "import mlflow\n", + "import mlflow.genai.datasets\n", + "import time\n", + "from databricks.connect import DatabricksSession\n", + "\n", + "registered_model_name = f\"{catalog_name}.{schema_name}.{model_name}\"\n", + "model_uri = f\"runs:/{logged_run_id}/agent\"\n", + "\n", + "mlflow.set_experiment(experiment_name)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "f0a33257-a9da-4f65-8c8a-657612d6d634", + "showTitle": false, + "tableResultSettingsMap": { + "0": { + "dataGridStateBlob": "{\"version\":1,\"tableState\":{\"columnPinning\":{\"left\":[\"#row_number#\"],\"right\":[]},\"columnSizing\":{},\"columnVisibility\":{}},\"settings\":{\"columns\":{}},\"syncTimestamp\":1756760635746}", + "filterBlob": null, + "queryPlanFiltersBlob": null, + "tableResultIndex": 0 + } + }, + "title": "" + } + }, + "outputs": [], + "source": [ + "query = f\"SELECT * FROM {catalog_name}.{schema_name}.labelled_sentences\"\n", + "df = spark.sql(query).toPandas()\n", + "display(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "0c997521-a041-46ed-8b37-b0b9ddf67df3", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "import mlflow\n", + "from mlflow.genai.scorers import Guidelines, Correctness, RelevanceToQuery\n", + "import datetime\n", + "\n", + "eval_data = []\n", + "for _, row in df.iterrows():\n", + " eval_item = {\n", + " \"inputs\": {\n", + " \"content\": row[\"review\"] # This matches the function parameter name\n", + " },\n", + " \"expectations\": {\n", + " \"expected_response\": str(row[\"sentiment\"]) # Adjust column name\n", + " }\n", + " }\n", + " eval_data.append(eval_item)\n", + "\n", + "mlflow_eval_dataset = None\n", + " \n", + "try:\n", + " # Try to get existing dataset\n", + " mlflow_eval_dataset = mlflow.genai.get_dataset(f\"{catalog_name}.{schema_name}.mlflow_eval_dataset\")\n", + " print(f\"✓ Using existing dataset: {catalog_name}.{schema_name}.mlflow_eval_dataset\")\n", + " \n", + "except Exception as get_error:\n", + " print(f\"Dataset not found, creating dataset\")\n", + " \n", + " try:\n", + " # Create new dataset\n", + " mlflow_eval_dataset = mlflow.genai.create_dataset(f\"{catalog_name}.{schema_name}.mlflow_eval_dataset\")\n", + " mlflow_eval_dataset.merge_records(eval_data)\n", + " dataset_created = True\n", + " print(f\"✓ Created new dataset: {catalog_name}.{schema_name}.mlflow_eval_dataset\")\n", + " \n", + " except Exception as create_error:\n", + " print(f\"⚠ Could not create dataset {create_error}\")\n", + "\n", + "if (mlflow_eval_dataset is not None): \n", + " \n", + " guidelines = {\n", + " \"sentiment_accuracy\": \"Response must correctly identify sentiment\",\n", + " \"clarity\": [\"Response must be clear and concise\"]\n", + " }\n", + "\n", + " agent = mlflow.pyfunc.load_model(model_uri)\n", + "\n", + " def predict_function(content):\n", + " try:\n", + " messages = [\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": content\n", + " }\n", + " ]\n", + " prediction = agent.predict({\"messages\": messages})\n", + " return prediction\n", + " except Exception as e:\n", + " print(f\"Error in predict_fn: {e}\")\n", + " return {\"response\": \"PREDICTION_ERROR\"}\n", + " \n", + " timestamp = datetime.datetime.now().strftime(\"%Y%m%d_%H%M%S\")\n", + " with mlflow.start_run(run_id=logged_run_id) as run:\n", + " with mlflow.start_run(run_name=f\"sentiment_agent_evaluation_{timestamp}\", nested=True) as eval_run:\n", + " results = mlflow.genai.evaluate(\n", + " data=mlflow_eval_dataset,\n", + " predict_fn=predict_function,\n", + " scorers=[\n", + " Correctness(),\n", + " Guidelines(name=\"sentiment_accuracy\", guidelines=guidelines[\"sentiment_accuracy\"]),\n", + " Guidelines(name=\"clarity\", guidelines=guidelines[\"clarity\"]),\n", + " ],\n", + " )\n", + "\n", + " # Print results\n", + " print(\"Evaluation Results:\")\n", + " print(results.metrics)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "281e07cf-0163-453a-9256-c508b37cd073", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "from mlflow import MlflowClient\n", + "client = MlflowClient()\n", + "\n", + "registered_model = mlflow.register_model(model_uri, name=registered_model_name)\n", + "\n", + "if results.metrics['sentiment_accuracy/mean'] > 0.9:\n", + " print(\"Transitioning to champion\")\n", + " client.set_registered_model_alias(registered_model_name, \"Champion\",registered_model.version)" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "computePreferences": { + "hardware": { + "accelerator": null, + "gpuPoolId": null, + "memory": null + } + }, + "dashboards": [], + "environmentMetadata": { + "base_environment": "", + "environment_version": "3" + }, + "inputWidgetPreferences": null, + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "model_evaluation", + "widgets": { + "catalog_name": { + "currentValue": "btafur_catalog", + "nuid": "3a964089-b210-4dc6-aa79-c7c287530d31", + "typedWidgetInfo": { + "autoCreated": false, + "defaultValue": "btafur_catalog", + "label": null, + "name": "catalog_name", + "options": { + "widgetDisplayType": "Text", + "validationRegex": null + }, + "parameterDataType": "String" + }, + "widgetInfo": { + "widgetType": "text", + "defaultValue": "btafur_catalog", + "label": null, + "name": "catalog_name", + "options": { + "widgetType": "text", + "autoCreated": null, + "validationRegex": null + } + } + }, + "experiment_name": { + "currentValue": "/Users/bruno.tafur@databricks.com/quickstart_agent_btafur_catalog", + "nuid": "d243520b-4f07-4843-b294-0e3b3489d85b", + "typedWidgetInfo": { + "autoCreated": false, + "defaultValue": "/Users/bruno.tafur@databricks.com/quickstart_agent_btafur_catalog", + "label": null, + "name": "experiment_name", + "options": { + "widgetDisplayType": "Text", + "validationRegex": null + }, + "parameterDataType": "String" + }, + "widgetInfo": { + "widgetType": "text", + "defaultValue": "/Users/bruno.tafur@databricks.com/quickstart_agent_btafur_catalog", + "label": null, + "name": "experiment_name", + "options": { + "widgetType": "text", + "autoCreated": null, + "validationRegex": null + } + } + }, + "logged_run_id": { + "currentValue": "None", + "nuid": "970893b9-068c-4fff-af42-e9b346618f94", + "typedWidgetInfo": { + "autoCreated": false, + "defaultValue": "None", + "label": null, + "name": "logged_run_id", + "options": { + "widgetDisplayType": "Text", + "validationRegex": null + }, + "parameterDataType": "String" + }, + "widgetInfo": { + "widgetType": "text", + "defaultValue": "None", + "label": null, + "name": "logged_run_id", + "options": { + "widgetType": "text", + "autoCreated": false, + "validationRegex": null + } + } + }, + "model_name": { + "currentValue": "quickstart_agent", + "nuid": "06926edb-a96f-4f02-a702-f7c542e5ef3c", + "typedWidgetInfo": { + "autoCreated": false, + "defaultValue": "quickstart_agent", + "label": null, + "name": "model_name", + "options": { + "widgetDisplayType": "Text", + "validationRegex": null + }, + "parameterDataType": "String" + }, + "widgetInfo": { + "widgetType": "text", + "defaultValue": "quickstart_agent", + "label": null, + "name": "model_name", + "options": { + "widgetType": "text", + "autoCreated": null, + "validationRegex": null + } + } + }, + "schema_name": { + "currentValue": "default", + "nuid": "febc8306-c467-4694-9bfe-6467bb3558a2", + "typedWidgetInfo": { + "autoCreated": false, + "defaultValue": "default", + "label": null, + "name": "schema_name", + "options": { + "widgetDisplayType": "Text", + "validationRegex": null + }, + "parameterDataType": "String" + }, + "widgetInfo": { + "widgetType": "text", + "defaultValue": "default", + "label": null, + "name": "schema_name", + "options": { + "widgetType": "text", + "autoCreated": null, + "validationRegex": null + } + } + } + } + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/llmops-basic/notebooks/model_inference.ipynb b/llmops-basic/notebooks/model_inference.ipynb new file mode 100644 index 0000000..fc8fe43 --- /dev/null +++ b/llmops-basic/notebooks/model_inference.ipynb @@ -0,0 +1,243 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "be63921b-4820-4872-a708-bf5722e0f8e7", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "%pip install -U -qqqq mlflow databricks-openai databricks-agents\n", + "dbutils.library.restartPython()" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "138d2537-42eb-4fae-82b8-e11021a8a85b", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "dbutils.widgets.text(\"catalog_name\", \"btafur_catalog\")\n", + "dbutils.widgets.text(\"schema_name\", \"default\")\n", + "dbutils.widgets.text(\"model_name\", \"quickstart_agent\")\n", + "catalog_name = dbutils.widgets.get(\"catalog_name\")\n", + "schema_name = dbutils.widgets.get(\"schema_name\")\n", + "model_name = dbutils.widgets.get(\"model_name\")\n", + "\n", + "dbutils.widgets.text(\"experiment_name\", f\"/Users/{dbutils.notebook.entry_point.getDbutils().notebook().getContext().userName().get()}/{model_name}_{catalog_name}\")\n", + "experiment_name = dbutils.widgets.get(\"experiment_name\")" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "fec9759b-b50d-4611-be17-d3b51a3041a6", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "import mlflow\n", + "import mlflow.genai.datasets\n", + "import time\n", + "from databricks.connect import DatabricksSession\n", + "\n", + "mlflow.set_experiment(experiment_name)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "40bbaf35-a61d-439f-9bf0-b160a96223ab", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "query = f\"SELECT * FROM {catalog_name}.{schema_name}.sentences\"\n", + "df = spark.sql(query).toPandas()" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "0c997521-a041-46ed-8b37-b0b9ddf67df3", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "\n", + "model_uri = f\"models:/{catalog_name}.{schema_name}.{model_name}@champion\"\n", + "agent = mlflow.pyfunc.load_model(model_uri)\n", + "\n", + "def predict_sentiment(review):\n", + " messages = [{\n", + " \"role\": \"user\",\n", + " \"content\": f\"{review}\"\n", + " }]\n", + " return agent.predict({\"messages\": messages})\n", + "\n", + "df['predictions'] = df['review'].apply(predict_sentiment)\n", + "display(df)\n" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "computePreferences": { + "hardware": { + "accelerator": null, + "gpuPoolId": null, + "memory": null + } + }, + "dashboards": [], + "environmentMetadata": { + "base_environment": "", + "environment_version": "3" + }, + "inputWidgetPreferences": null, + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "model_inference", + "widgets": { + "catalog_name": { + "currentValue": "btafur_catalog", + "nuid": "74cb3499-b670-4a9b-a326-ad9c7a804698", + "typedWidgetInfo": { + "autoCreated": false, + "defaultValue": "btafur_catalog", + "label": null, + "name": "catalog_name", + "options": { + "widgetDisplayType": "Text", + "validationRegex": null + }, + "parameterDataType": "String" + }, + "widgetInfo": { + "widgetType": "text", + "defaultValue": "btafur_catalog", + "label": null, + "name": "catalog_name", + "options": { + "widgetType": "text", + "autoCreated": false, + "validationRegex": null + } + } + }, + "model_name": { + "currentValue": "quickstart_agent", + "nuid": "bfb488ca-a384-4fb5-a7ab-d05661a9d52d", + "typedWidgetInfo": { + "autoCreated": false, + "defaultValue": "quickstart_agent", + "label": null, + "name": "model_name", + "options": { + "widgetDisplayType": "Text", + "validationRegex": null + }, + "parameterDataType": "String" + }, + "widgetInfo": { + "widgetType": "text", + "defaultValue": "quickstart_agent", + "label": null, + "name": "model_name", + "options": { + "widgetType": "text", + "autoCreated": false, + "validationRegex": null + } + } + }, + "schema_name": { + "currentValue": "default", + "nuid": "1b42e31d-1ff1-4bf6-8a06-b55c595e45a9", + "typedWidgetInfo": { + "autoCreated": false, + "defaultValue": "default", + "label": null, + "name": "schema_name", + "options": { + "widgetDisplayType": "Text", + "validationRegex": null + }, + "parameterDataType": "String" + }, + "widgetInfo": { + "widgetType": "text", + "defaultValue": "default", + "label": null, + "name": "schema_name", + "options": { + "widgetType": "text", + "autoCreated": false, + "validationRegex": null + } + } + } + } + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/llmops-basic/notebooks/model_preprocessing.ipynb b/llmops-basic/notebooks/model_preprocessing.ipynb new file mode 100644 index 0000000..3b99c12 --- /dev/null +++ b/llmops-basic/notebooks/model_preprocessing.ipynb @@ -0,0 +1,222 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "97addce4-2562-486a-bc4c-2b39a8dd9756", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "source": [ + "## Generating inference data if it doesn't exist" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "c6242b0e-2772-410e-b7e9-b0bebe58d018", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "dbutils.widgets.text(\"catalog_name\", \"btafur_catalog\")\n", + "dbutils.widgets.text(\"schema_name\", \"default\")\n", + "catalog_name = dbutils.widgets.get(\"catalog_name\")\n", + "schema_name = dbutils.widgets.get(\"schema_name\")" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "1e892951-2eef-40d1-b3f8-0a3be588a4d9", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "# Create the table with two columns: ID and review\n", + "spark.sql(f\"CREATE OR REPLACE TABLE {catalog_name}.{schema_name}.sentences (ID INT, review STRING)\")\n", + "\n", + "# Insert 200 values into the table with proper movie review text\n", + "movie_reviews = [\n", + " \"A thrilling masterpiece with stunning visuals.\",\n", + " \"An emotional rollercoaster that captivates from start to finish.\",\n", + " \"A lackluster plot with underwhelming performances.\",\n", + " \"A heartwarming tale that resonates with audiences of all ages.\",\n", + " \"A groundbreaking film that redefines the genre.\",\n", + " \"A predictable storyline with clichéd characters.\",\n", + " \"A visually stunning film with a compelling narrative.\",\n", + " \"An inspiring story of hope and resilience.\",\n", + " \"A forgettable movie with a weak script.\",\n", + " \"A charming and delightful film that entertains throughout.\"\n", + "]\n", + "\n", + "# Repeat the reviews to fill 200 entries\n", + "reviews_to_insert = (movie_reviews * (200 // len(movie_reviews) + 1))[:200]\n", + "\n", + "spark.sql(f\"INSERT INTO {catalog_name}.{schema_name}.sentences (ID, review) VALUES\" + \", \".join([f\"({i}, '{reviews_to_insert[i-1]}')\" for i in range(1, 201)]) + \";\")" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "34cc0494-a537-4e1d-933e-a123da2d70b2", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "display(spark.read.table(f\"{catalog_name}.{schema_name}.sentences\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "a2ef7511-a2d9-4259-b890-fda7a99182e8", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "source": [ + "## Generating labelled data if it doesn't exist" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "9d9f0618-fa45-4541-a0ed-b50cfe5257a6", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "# Create the labelled table with an additional sentiment column\n", + "spark.sql(f\"CREATE OR REPLACE TABLE {catalog_name}.{schema_name}.labelled_sentences AS SELECT ID, review, ai_analyze_sentiment(review) AS sentiment FROM {catalog_name}.{schema_name}.sentences\")\n", + "\n", + "# Display the new table\n", + "display(spark.read.table(f\"{catalog_name}.{schema_name}.labelled_sentences\"))\n" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "computePreferences": null, + "dashboards": [], + "environmentMetadata": { + "base_environment": "", + "environment_version": "3" + }, + "inputWidgetPreferences": null, + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "model_preprocessing", + "widgets": { + "catalog_name": { + "currentValue": "btafur_catalog", + "nuid": "c27b5912-2ca9-46c0-ae51-7f7dcf693134", + "typedWidgetInfo": { + "autoCreated": false, + "defaultValue": "btafur_catalog", + "label": null, + "name": "catalog_name", + "options": { + "widgetDisplayType": "Text", + "validationRegex": null + }, + "parameterDataType": "String" + }, + "widgetInfo": { + "widgetType": "text", + "defaultValue": "btafur_catalog", + "label": null, + "name": "catalog_name", + "options": { + "widgetType": "text", + "autoCreated": null, + "validationRegex": null + } + } + }, + "schema_name": { + "currentValue": "default", + "nuid": "25f2fe7e-e320-4793-a8cb-2fe969ec7d13", + "typedWidgetInfo": { + "autoCreated": false, + "defaultValue": "default", + "label": null, + "name": "schema_name", + "options": { + "widgetDisplayType": "Text", + "validationRegex": null + }, + "parameterDataType": "String" + }, + "widgetInfo": { + "widgetType": "text", + "defaultValue": "default", + "label": null, + "name": "schema_name", + "options": { + "widgetType": "text", + "autoCreated": false, + "validationRegex": null + } + } + } + } + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/llmops-basic/notebooks/quickstart_agent.py b/llmops-basic/notebooks/quickstart_agent.py new file mode 100644 index 0000000..9fb782f --- /dev/null +++ b/llmops-basic/notebooks/quickstart_agent.py @@ -0,0 +1,74 @@ +import json +import uuid +from databricks.sdk import WorkspaceClient +from databricks_openai import UCFunctionToolkit, DatabricksFunctionClient +from typing import Any, Optional, Iterator + +import mlflow +from mlflow.pyfunc import ChatAgent +from mlflow.types.agent import ChatAgentMessage, ChatAgentResponse, ChatContext + +# Get an OpenAI client configured to talk to Databricks model serving endpoints +# We'll use this to query an LLM in our agent +openai_client = WorkspaceClient().serving_endpoints.get_open_ai_client() + +# The snippet below tries to pick the first LLM API available in your Databricks workspace +# from a set of candidates. You can override and simplify it +# to just specify LLM_ENDPOINT_NAME. +LLM_ENDPOINT_NAME = "databricks-claude-3-7-sonnet" + +# Enable automatic tracing of LLM calls +mlflow.openai.autolog() + + +@mlflow.trace +def run_agent(content, system_prompt=None): + """ + Send a user prompt to the LLM, and return a list of LLM response messages + The LLM is allowed to call the code interpreter tool if needed, to respond to the user + """ + messages = [] + if system_prompt: + messages.append({"role": "system", "content": system_prompt}) + + messages.append({"role": "user", "content": content}) + + response = openai_client.chat.completions.create( + model=LLM_ENDPOINT_NAME, + messages=messages, + ) + msg = response.choices[0].message + return [msg.to_dict()] + + +class QuickstartAgent(ChatAgent): + def __init__(self): + super().__init__() + self.system_prompt = ( + "You are a sentiment analysis expert. Analyze text sentiment " + "and respond with exactly one word: positive, neutral, or negative." + ) + + def predict( + self, + messages: list[ChatAgentMessage], + context: Optional[ChatContext] = None, + custom_inputs: Optional[dict[str, Any]] = None, + ) -> ChatAgentResponse: + message = messages[-1].content + user_prompt = f"Analyze this phrase: {message}" + raw_msgs = run_agent( + content=user_prompt, + system_prompt=self.system_prompt + ) + out = [] + for m in raw_msgs: + out.append(ChatAgentMessage( + id=uuid.uuid4().hex, + **m + )) + return ChatAgentResponse(messages=out) + + +AGENT = QuickstartAgent() +mlflow.models.set_model(AGENT) diff --git a/llmops-basic/resources/model_artifacts.yml b/llmops-basic/resources/model_artifacts.yml new file mode 100644 index 0000000..bcb7891 --- /dev/null +++ b/llmops-basic/resources/model_artifacts.yml @@ -0,0 +1,21 @@ +resources: + registered_models: + sentiment_model: + name: ${var.model_name} + catalog_name: ${var.catalog_name} + schema_name: ${resources.schemas.sentiment_schema.name} + comment: Registered model in Unity Catalog for ${bundle.target} deployment target + grants: + - privileges: + - EXECUTE + principal: account users + + experiments: + experiment: + name: ${var.experiment_name} + + schemas: + sentiment_schema: + name: ${var.schema_name} + catalog_name: ${var.catalog_name} + diff --git a/llmops-basic/resources/model_build_evaluation.job.yml b/llmops-basic/resources/model_build_evaluation.job.yml new file mode 100644 index 0000000..856d808 --- /dev/null +++ b/llmops-basic/resources/model_build_evaluation.job.yml @@ -0,0 +1,32 @@ +resources: + jobs: + model_build_evaluation: + name: model_build_evaluation + queue: + enabled: true + + parameters: + - name: "catalog_name" + default: "${var.catalog_name}" + - name: "schema_name" + default: "${resources.schemas.sentiment_schema.name}" + - name: "model_name" + default: "${resources.registered_models.sentiment_model.name}" + - name: "experiment_name" + default: "${resources.experiments.experiment.name}" + + tasks: + - task_key: model_build + notebook_task: + notebook_path: "../notebooks/model_build.ipynb" + + - task_key: model_evaluation + depends_on: + - task_key: model_build + notebook_task: + notebook_path: "../notebooks/model_evaluation.ipynb" + base_parameters: + logged_run_id: "{{tasks.model_build.values.logged_run_id}}" + tags: + Project: "${bundle.name}" + diff --git a/llmops-basic/resources/model_endpoint_deploy.job.yml b/llmops-basic/resources/model_endpoint_deploy.job.yml new file mode 100644 index 0000000..a357d7a --- /dev/null +++ b/llmops-basic/resources/model_endpoint_deploy.job.yml @@ -0,0 +1,25 @@ +resources: + jobs: + model_deploy: + name: model_endpoint_deploy + queue: + enabled: true + + parameters: + - name: "catalog_name" + default: "${var.catalog_name}" + - name: "schema_name" + default: "${resources.schemas.sentiment_schema.name}" + - name: "model_name" + default: "${resources.registered_models.sentiment_model.name}" + - name: "experiment_name" + default: "${resources.experiments.experiment.name}" + + tasks: + - task_key: model_endpoint_deploy + notebook_task: + notebook_path: "../notebooks/model_endpoint_deploy.ipynb" + + tags: + Project: "${bundle.name}" + diff --git a/llmops-basic/resources/model_inference.job.yml b/llmops-basic/resources/model_inference.job.yml new file mode 100644 index 0000000..5246476 --- /dev/null +++ b/llmops-basic/resources/model_inference.job.yml @@ -0,0 +1,25 @@ +resources: + jobs: + model_inference: + name: model_inference + queue: + enabled: true + + parameters: + - name: "catalog_name" + default: "${var.catalog_name}" + - name: "schema_name" + default: "${resources.schemas.sentiment_schema.name}" + - name: "model_name" + default: "${resources.registered_models.sentiment_model.name}" + - name: "experiment_name" + default: "${resources.experiments.experiment.name}" + + tasks: + - task_key: model_inference + notebook_task: + notebook_path: "../notebooks/model_inference.ipynb" + + tags: + Project: "${bundle.name}" + diff --git a/llmops-basic/resources/model_preprocessing.job.yml b/llmops-basic/resources/model_preprocessing.job.yml new file mode 100644 index 0000000..d359088 --- /dev/null +++ b/llmops-basic/resources/model_preprocessing.job.yml @@ -0,0 +1,21 @@ +resources: + jobs: + model_preprocessing: + name: model_preprocessing + queue: + enabled: true + + parameters: + - name: "catalog_name" + default: "${var.catalog_name}" + - name: "schema_name" + default: "${resources.schemas.sentiment_schema.name}" + + tasks: + - task_key: model_preprocessing + notebook_task: + notebook_path: "../notebooks/model_preprocessing.ipynb" + + tags: + Project: "${bundle.name}" + From b553e6eb1640aa629db60dacd2cc4a09e57291eb Mon Sep 17 00:00:00 2001 From: Bruno Tafur Date: Mon, 19 Jan 2026 21:27:47 +0000 Subject: [PATCH 2/5] reestructure files and generalise catalog naming --- contrib/templates/llmops-basic/README.md | 2 +- .../{ => {{.project_name}}}/.gitignore | 13 ---- .../{ => {{.project_name}}}/README.md.tmpl | 5 +- .../databricks.yml.tmpl | 0 .../notebooks/model_build.ipynb | 2 +- .../notebooks/model_endpoint_deploy.ipynb | 2 +- .../notebooks/model_evaluation.ipynb | 62 +++++++++---------- .../notebooks/model_inference.ipynb | 38 ++++++------ .../notebooks/model_preprocessing.ipynb | 26 ++++---- .../notebooks/quickstart_agent.py | 0 .../resources/model_artifacts.yml.tmpl | 0 .../model_build_evaluation.job.yml.tmpl | 0 .../model_endpoint_deploy.job.yml.tmpl | 0 .../resources/model_inference.job.yml.tmpl | 0 .../model_preprocessing.job.yml.tmpl | 0 {llmops-basic => llmops_basic}/.gitignore | 13 ---- {llmops-basic => llmops_basic}/README.md | 5 +- {llmops-basic => llmops_basic}/databricks.yml | 0 .../notebooks/model_build.ipynb | 2 +- .../notebooks/model_endpoint_deploy.ipynb | 2 +- .../notebooks/model_evaluation.ipynb | 62 +++++++++---------- .../notebooks/model_inference.ipynb | 38 ++++++------ .../notebooks/model_preprocessing.ipynb | 26 ++++---- .../notebooks/quickstart_agent.py | 0 .../resources/model_artifacts.yml | 0 .../resources/model_build_evaluation.job.yml | 0 .../resources/model_endpoint_deploy.job.yml | 0 .../resources/model_inference.job.yml | 0 .../resources/model_preprocessing.job.yml | 0 29 files changed, 135 insertions(+), 163 deletions(-) rename contrib/templates/llmops-basic/template/{ => {{.project_name}}}/.gitignore (74%) rename contrib/templates/llmops-basic/template/{ => {{.project_name}}}/README.md.tmpl (93%) rename contrib/templates/llmops-basic/template/{ => {{.project_name}}}/databricks.yml.tmpl (100%) rename contrib/templates/llmops-basic/template/{ => {{.project_name}}}/notebooks/model_build.ipynb (99%) rename contrib/templates/llmops-basic/template/{ => {{.project_name}}}/notebooks/model_endpoint_deploy.ipynb (98%) rename {llmops-basic => contrib/templates/llmops-basic/template/{{.project_name}}}/notebooks/model_evaluation.ipynb (92%) rename contrib/templates/llmops-basic/template/{ => {{.project_name}}}/notebooks/model_inference.ipynb (90%) rename contrib/templates/llmops-basic/template/{ => {{.project_name}}}/notebooks/model_preprocessing.ipynb (93%) rename contrib/templates/llmops-basic/template/{ => {{.project_name}}}/notebooks/quickstart_agent.py (100%) rename contrib/templates/llmops-basic/template/{ => {{.project_name}}}/resources/model_artifacts.yml.tmpl (100%) rename contrib/templates/llmops-basic/template/{ => {{.project_name}}}/resources/model_build_evaluation.job.yml.tmpl (100%) rename contrib/templates/llmops-basic/template/{ => {{.project_name}}}/resources/model_endpoint_deploy.job.yml.tmpl (100%) rename contrib/templates/llmops-basic/template/{ => {{.project_name}}}/resources/model_inference.job.yml.tmpl (100%) rename contrib/templates/llmops-basic/template/{ => {{.project_name}}}/resources/model_preprocessing.job.yml.tmpl (100%) rename {llmops-basic => llmops_basic}/.gitignore (74%) rename {llmops-basic => llmops_basic}/README.md (93%) rename {llmops-basic => llmops_basic}/databricks.yml (100%) rename {llmops-basic => llmops_basic}/notebooks/model_build.ipynb (99%) rename {llmops-basic => llmops_basic}/notebooks/model_endpoint_deploy.ipynb (98%) rename {contrib/templates/llmops-basic/template => llmops_basic}/notebooks/model_evaluation.ipynb (92%) rename {llmops-basic => llmops_basic}/notebooks/model_inference.ipynb (90%) rename {llmops-basic => llmops_basic}/notebooks/model_preprocessing.ipynb (93%) rename {llmops-basic => llmops_basic}/notebooks/quickstart_agent.py (100%) rename {llmops-basic => llmops_basic}/resources/model_artifacts.yml (100%) rename {llmops-basic => llmops_basic}/resources/model_build_evaluation.job.yml (100%) rename {llmops-basic => llmops_basic}/resources/model_endpoint_deploy.job.yml (100%) rename {llmops-basic => llmops_basic}/resources/model_inference.job.yml (100%) rename {llmops-basic => llmops_basic}/resources/model_preprocessing.job.yml (100%) diff --git a/contrib/templates/llmops-basic/README.md b/contrib/templates/llmops-basic/README.md index dbf0c2a..eb43a3b 100644 --- a/contrib/templates/llmops-basic/README.md +++ b/contrib/templates/llmops-basic/README.md @@ -6,5 +6,5 @@ using DABs, Unity Catalog and MLflow. Install it using ``` -databricks bundle init https://github.com/databricks/bundle-examples --template-dir contrib/templates/data-engineering +databricks bundle init https://github.com/databricks/bundle-examples --template-dir contrib/templates/llmops-basic ``` \ No newline at end of file diff --git a/contrib/templates/llmops-basic/template/.gitignore b/contrib/templates/llmops-basic/template/{{.project_name}}/.gitignore similarity index 74% rename from contrib/templates/llmops-basic/template/.gitignore rename to contrib/templates/llmops-basic/template/{{.project_name}}/.gitignore index dc1fd6c..f3b15af 100644 --- a/contrib/templates/llmops-basic/template/.gitignore +++ b/contrib/templates/llmops-basic/template/{{.project_name}}/.gitignore @@ -1,8 +1,5 @@ -# Databricks .databricks/ .bundle/ - -# Python __pycache__/ *.py[cod] *$py.class @@ -11,25 +8,15 @@ __pycache__/ venv/ env/ ENV/ - -# Jupyter .ipynb_checkpoints/ *.ipynb_checkpoints - -# IDEs .vscode/ .idea/ *.swp *.swo *~ - -# OS .DS_Store Thumbs.db - -# MLflow mlruns/ - -# Logs *.log diff --git a/contrib/templates/llmops-basic/template/README.md.tmpl b/contrib/templates/llmops-basic/template/{{.project_name}}/README.md.tmpl similarity index 93% rename from contrib/templates/llmops-basic/template/README.md.tmpl rename to contrib/templates/llmops-basic/template/{{.project_name}}/README.md.tmpl index 464124c..cac94b8 100644 --- a/contrib/templates/llmops-basic/template/README.md.tmpl +++ b/contrib/templates/llmops-basic/template/{{.project_name}}/README.md.tmpl @@ -7,9 +7,8 @@ End-to-end LLMOps project for sentiment analysis of reviews using Databricks. This example demonstrates a complete LLMOps pipeline for building, evaluating, and deploying a sentiment analysis model. **Pipeline stages:** -- Data preprocessing -- Model training with MLflow -- Model evaluation +- Data preparation +- Model build and evaluation with MLflow - Model deployment to serving endpoint - Batch inference diff --git a/contrib/templates/llmops-basic/template/databricks.yml.tmpl b/contrib/templates/llmops-basic/template/{{.project_name}}/databricks.yml.tmpl similarity index 100% rename from contrib/templates/llmops-basic/template/databricks.yml.tmpl rename to contrib/templates/llmops-basic/template/{{.project_name}}/databricks.yml.tmpl diff --git a/contrib/templates/llmops-basic/template/notebooks/model_build.ipynb b/contrib/templates/llmops-basic/template/{{.project_name}}/notebooks/model_build.ipynb similarity index 99% rename from contrib/templates/llmops-basic/template/notebooks/model_build.ipynb rename to contrib/templates/llmops-basic/template/{{.project_name}}/notebooks/model_build.ipynb index 81365c2..697ea97 100644 --- a/contrib/templates/llmops-basic/template/notebooks/model_build.ipynb +++ b/contrib/templates/llmops-basic/template/{{.project_name}}/notebooks/model_build.ipynb @@ -40,7 +40,7 @@ }, "outputs": [], "source": [ - "dbutils.widgets.text(\"catalog_name\", \"btafur_catalog\")\n", + "dbutils.widgets.text(\"catalog_name\", \"dev_catalog\")\n", "dbutils.widgets.text(\"schema_name\", \"default\")\n", "dbutils.widgets.text(\"model_name\", \"quickstart_agent\")\n", "\n", diff --git a/contrib/templates/llmops-basic/template/notebooks/model_endpoint_deploy.ipynb b/contrib/templates/llmops-basic/template/{{.project_name}}/notebooks/model_endpoint_deploy.ipynb similarity index 98% rename from contrib/templates/llmops-basic/template/notebooks/model_endpoint_deploy.ipynb rename to contrib/templates/llmops-basic/template/{{.project_name}}/notebooks/model_endpoint_deploy.ipynb index a411e34..2f18515 100644 --- a/contrib/templates/llmops-basic/template/notebooks/model_endpoint_deploy.ipynb +++ b/contrib/templates/llmops-basic/template/{{.project_name}}/notebooks/model_endpoint_deploy.ipynb @@ -34,7 +34,7 @@ }, "outputs": [], "source": [ - "dbutils.widgets.text(\"catalog_name\", \"btafur_catalog\")\n", + "dbutils.widgets.text(\"catalog_name\", \"dev_catalog\")\n", "dbutils.widgets.text(\"schema_name\", \"default\")\n", "dbutils.widgets.text(\"model_name\", \"quickstart_agent\")\n", "\n", diff --git a/llmops-basic/notebooks/model_evaluation.ipynb b/contrib/templates/llmops-basic/template/{{.project_name}}/notebooks/model_evaluation.ipynb similarity index 92% rename from llmops-basic/notebooks/model_evaluation.ipynb rename to contrib/templates/llmops-basic/template/{{.project_name}}/notebooks/model_evaluation.ipynb index b303f32..8d0e39b 100644 --- a/llmops-basic/notebooks/model_evaluation.ipynb +++ b/contrib/templates/llmops-basic/template/{{.project_name}}/notebooks/model_evaluation.ipynb @@ -40,7 +40,7 @@ }, "outputs": [], "source": [ - "dbutils.widgets.text(\"catalog_name\", \"btafur_catalog\")\n", + "dbutils.widgets.text(\"catalog_name\", \"dev_catalog\")\n", "dbutils.widgets.text(\"schema_name\", \"default\")\n", "dbutils.widgets.text(\"model_name\", \"quickstart_agent\")\n", "dbutils.widgets.text(\"logged_run_id\", \"None\")\n", @@ -264,21 +264,21 @@ "label": null, "name": "catalog_name", "options": { - "widgetDisplayType": "Text", - "validationRegex": null + "validationRegex": null, + "widgetDisplayType": "Text" }, "parameterDataType": "String" }, "widgetInfo": { - "widgetType": "text", "defaultValue": "btafur_catalog", "label": null, "name": "catalog_name", "options": { - "widgetType": "text", "autoCreated": null, - "validationRegex": null - } + "validationRegex": null, + "widgetType": "text" + }, + "widgetType": "text" } }, "experiment_name": { @@ -290,21 +290,21 @@ "label": null, "name": "experiment_name", "options": { - "widgetDisplayType": "Text", - "validationRegex": null + "validationRegex": null, + "widgetDisplayType": "Text" }, "parameterDataType": "String" }, "widgetInfo": { - "widgetType": "text", "defaultValue": "/Users/bruno.tafur@databricks.com/quickstart_agent_btafur_catalog", "label": null, "name": "experiment_name", "options": { - "widgetType": "text", "autoCreated": null, - "validationRegex": null - } + "validationRegex": null, + "widgetType": "text" + }, + "widgetType": "text" } }, "logged_run_id": { @@ -316,21 +316,21 @@ "label": null, "name": "logged_run_id", "options": { - "widgetDisplayType": "Text", - "validationRegex": null + "validationRegex": null, + "widgetDisplayType": "Text" }, "parameterDataType": "String" }, "widgetInfo": { - "widgetType": "text", "defaultValue": "None", "label": null, "name": "logged_run_id", "options": { - "widgetType": "text", "autoCreated": false, - "validationRegex": null - } + "validationRegex": null, + "widgetType": "text" + }, + "widgetType": "text" } }, "model_name": { @@ -342,21 +342,21 @@ "label": null, "name": "model_name", "options": { - "widgetDisplayType": "Text", - "validationRegex": null + "validationRegex": null, + "widgetDisplayType": "Text" }, "parameterDataType": "String" }, "widgetInfo": { - "widgetType": "text", "defaultValue": "quickstart_agent", "label": null, "name": "model_name", "options": { - "widgetType": "text", "autoCreated": null, - "validationRegex": null - } + "validationRegex": null, + "widgetType": "text" + }, + "widgetType": "text" } }, "schema_name": { @@ -368,21 +368,21 @@ "label": null, "name": "schema_name", "options": { - "widgetDisplayType": "Text", - "validationRegex": null + "validationRegex": null, + "widgetDisplayType": "Text" }, "parameterDataType": "String" }, "widgetInfo": { - "widgetType": "text", "defaultValue": "default", "label": null, "name": "schema_name", "options": { - "widgetType": "text", "autoCreated": null, - "validationRegex": null - } + "validationRegex": null, + "widgetType": "text" + }, + "widgetType": "text" } } } diff --git a/contrib/templates/llmops-basic/template/notebooks/model_inference.ipynb b/contrib/templates/llmops-basic/template/{{.project_name}}/notebooks/model_inference.ipynb similarity index 90% rename from contrib/templates/llmops-basic/template/notebooks/model_inference.ipynb rename to contrib/templates/llmops-basic/template/{{.project_name}}/notebooks/model_inference.ipynb index fc8fe43..b64c8b5 100644 --- a/contrib/templates/llmops-basic/template/notebooks/model_inference.ipynb +++ b/contrib/templates/llmops-basic/template/{{.project_name}}/notebooks/model_inference.ipynb @@ -40,7 +40,7 @@ }, "outputs": [], "source": [ - "dbutils.widgets.text(\"catalog_name\", \"btafur_catalog\")\n", + "dbutils.widgets.text(\"catalog_name\", \"dev_catalog\")\n", "dbutils.widgets.text(\"schema_name\", \"default\")\n", "dbutils.widgets.text(\"model_name\", \"quickstart_agent\")\n", "catalog_name = dbutils.widgets.get(\"catalog_name\")\n", @@ -163,21 +163,21 @@ "label": null, "name": "catalog_name", "options": { - "widgetDisplayType": "Text", - "validationRegex": null + "validationRegex": null, + "widgetDisplayType": "Text" }, "parameterDataType": "String" }, "widgetInfo": { - "widgetType": "text", "defaultValue": "btafur_catalog", "label": null, "name": "catalog_name", "options": { - "widgetType": "text", "autoCreated": false, - "validationRegex": null - } + "validationRegex": null, + "widgetType": "text" + }, + "widgetType": "text" } }, "model_name": { @@ -189,21 +189,21 @@ "label": null, "name": "model_name", "options": { - "widgetDisplayType": "Text", - "validationRegex": null + "validationRegex": null, + "widgetDisplayType": "Text" }, "parameterDataType": "String" }, "widgetInfo": { - "widgetType": "text", "defaultValue": "quickstart_agent", "label": null, "name": "model_name", "options": { - "widgetType": "text", "autoCreated": false, - "validationRegex": null - } + "validationRegex": null, + "widgetType": "text" + }, + "widgetType": "text" } }, "schema_name": { @@ -215,21 +215,21 @@ "label": null, "name": "schema_name", "options": { - "widgetDisplayType": "Text", - "validationRegex": null + "validationRegex": null, + "widgetDisplayType": "Text" }, "parameterDataType": "String" }, "widgetInfo": { - "widgetType": "text", "defaultValue": "default", "label": null, "name": "schema_name", "options": { - "widgetType": "text", "autoCreated": false, - "validationRegex": null - } + "validationRegex": null, + "widgetType": "text" + }, + "widgetType": "text" } } } diff --git a/contrib/templates/llmops-basic/template/notebooks/model_preprocessing.ipynb b/contrib/templates/llmops-basic/template/{{.project_name}}/notebooks/model_preprocessing.ipynb similarity index 93% rename from contrib/templates/llmops-basic/template/notebooks/model_preprocessing.ipynb rename to contrib/templates/llmops-basic/template/{{.project_name}}/notebooks/model_preprocessing.ipynb index 3b99c12..33cbe6b 100644 --- a/contrib/templates/llmops-basic/template/notebooks/model_preprocessing.ipynb +++ b/contrib/templates/llmops-basic/template/{{.project_name}}/notebooks/model_preprocessing.ipynb @@ -34,7 +34,7 @@ }, "outputs": [], "source": [ - "dbutils.widgets.text(\"catalog_name\", \"btafur_catalog\")\n", + "dbutils.widgets.text(\"catalog_name\", \"dev_catalog\")\n", "dbutils.widgets.text(\"schema_name\", \"default\")\n", "catalog_name = dbutils.widgets.get(\"catalog_name\")\n", "schema_name = dbutils.widgets.get(\"schema_name\")" @@ -168,21 +168,21 @@ "label": null, "name": "catalog_name", "options": { - "widgetDisplayType": "Text", - "validationRegex": null + "validationRegex": null, + "widgetDisplayType": "Text" }, "parameterDataType": "String" }, "widgetInfo": { - "widgetType": "text", "defaultValue": "btafur_catalog", "label": null, "name": "catalog_name", "options": { - "widgetType": "text", "autoCreated": null, - "validationRegex": null - } + "validationRegex": null, + "widgetType": "text" + }, + "widgetType": "text" } }, "schema_name": { @@ -194,21 +194,21 @@ "label": null, "name": "schema_name", "options": { - "widgetDisplayType": "Text", - "validationRegex": null + "validationRegex": null, + "widgetDisplayType": "Text" }, "parameterDataType": "String" }, "widgetInfo": { - "widgetType": "text", "defaultValue": "default", "label": null, "name": "schema_name", "options": { - "widgetType": "text", "autoCreated": false, - "validationRegex": null - } + "validationRegex": null, + "widgetType": "text" + }, + "widgetType": "text" } } } diff --git a/contrib/templates/llmops-basic/template/notebooks/quickstart_agent.py b/contrib/templates/llmops-basic/template/{{.project_name}}/notebooks/quickstart_agent.py similarity index 100% rename from contrib/templates/llmops-basic/template/notebooks/quickstart_agent.py rename to contrib/templates/llmops-basic/template/{{.project_name}}/notebooks/quickstart_agent.py diff --git a/contrib/templates/llmops-basic/template/resources/model_artifacts.yml.tmpl b/contrib/templates/llmops-basic/template/{{.project_name}}/resources/model_artifacts.yml.tmpl similarity index 100% rename from contrib/templates/llmops-basic/template/resources/model_artifacts.yml.tmpl rename to contrib/templates/llmops-basic/template/{{.project_name}}/resources/model_artifacts.yml.tmpl diff --git a/contrib/templates/llmops-basic/template/resources/model_build_evaluation.job.yml.tmpl b/contrib/templates/llmops-basic/template/{{.project_name}}/resources/model_build_evaluation.job.yml.tmpl similarity index 100% rename from contrib/templates/llmops-basic/template/resources/model_build_evaluation.job.yml.tmpl rename to contrib/templates/llmops-basic/template/{{.project_name}}/resources/model_build_evaluation.job.yml.tmpl diff --git a/contrib/templates/llmops-basic/template/resources/model_endpoint_deploy.job.yml.tmpl b/contrib/templates/llmops-basic/template/{{.project_name}}/resources/model_endpoint_deploy.job.yml.tmpl similarity index 100% rename from contrib/templates/llmops-basic/template/resources/model_endpoint_deploy.job.yml.tmpl rename to contrib/templates/llmops-basic/template/{{.project_name}}/resources/model_endpoint_deploy.job.yml.tmpl diff --git a/contrib/templates/llmops-basic/template/resources/model_inference.job.yml.tmpl b/contrib/templates/llmops-basic/template/{{.project_name}}/resources/model_inference.job.yml.tmpl similarity index 100% rename from contrib/templates/llmops-basic/template/resources/model_inference.job.yml.tmpl rename to contrib/templates/llmops-basic/template/{{.project_name}}/resources/model_inference.job.yml.tmpl diff --git a/contrib/templates/llmops-basic/template/resources/model_preprocessing.job.yml.tmpl b/contrib/templates/llmops-basic/template/{{.project_name}}/resources/model_preprocessing.job.yml.tmpl similarity index 100% rename from contrib/templates/llmops-basic/template/resources/model_preprocessing.job.yml.tmpl rename to contrib/templates/llmops-basic/template/{{.project_name}}/resources/model_preprocessing.job.yml.tmpl diff --git a/llmops-basic/.gitignore b/llmops_basic/.gitignore similarity index 74% rename from llmops-basic/.gitignore rename to llmops_basic/.gitignore index dc1fd6c..f3b15af 100644 --- a/llmops-basic/.gitignore +++ b/llmops_basic/.gitignore @@ -1,8 +1,5 @@ -# Databricks .databricks/ .bundle/ - -# Python __pycache__/ *.py[cod] *$py.class @@ -11,25 +8,15 @@ __pycache__/ venv/ env/ ENV/ - -# Jupyter .ipynb_checkpoints/ *.ipynb_checkpoints - -# IDEs .vscode/ .idea/ *.swp *.swo *~ - -# OS .DS_Store Thumbs.db - -# MLflow mlruns/ - -# Logs *.log diff --git a/llmops-basic/README.md b/llmops_basic/README.md similarity index 93% rename from llmops-basic/README.md rename to llmops_basic/README.md index 4d14c3d..b0f7a71 100644 --- a/llmops-basic/README.md +++ b/llmops_basic/README.md @@ -7,9 +7,8 @@ End-to-end LLMOps project for sentiment analysis of reviews using Databricks. This example demonstrates a complete LLMOps pipeline for building, evaluating, and deploying a sentiment analysis model. **Pipeline stages:** -- Data preprocessing -- Model training with MLflow -- Model evaluation +- Data preparation +- Model build and evaluation with MLflow - Model deployment to serving endpoint - Batch inference diff --git a/llmops-basic/databricks.yml b/llmops_basic/databricks.yml similarity index 100% rename from llmops-basic/databricks.yml rename to llmops_basic/databricks.yml diff --git a/llmops-basic/notebooks/model_build.ipynb b/llmops_basic/notebooks/model_build.ipynb similarity index 99% rename from llmops-basic/notebooks/model_build.ipynb rename to llmops_basic/notebooks/model_build.ipynb index 81365c2..697ea97 100644 --- a/llmops-basic/notebooks/model_build.ipynb +++ b/llmops_basic/notebooks/model_build.ipynb @@ -40,7 +40,7 @@ }, "outputs": [], "source": [ - "dbutils.widgets.text(\"catalog_name\", \"btafur_catalog\")\n", + "dbutils.widgets.text(\"catalog_name\", \"dev_catalog\")\n", "dbutils.widgets.text(\"schema_name\", \"default\")\n", "dbutils.widgets.text(\"model_name\", \"quickstart_agent\")\n", "\n", diff --git a/llmops-basic/notebooks/model_endpoint_deploy.ipynb b/llmops_basic/notebooks/model_endpoint_deploy.ipynb similarity index 98% rename from llmops-basic/notebooks/model_endpoint_deploy.ipynb rename to llmops_basic/notebooks/model_endpoint_deploy.ipynb index a411e34..2f18515 100644 --- a/llmops-basic/notebooks/model_endpoint_deploy.ipynb +++ b/llmops_basic/notebooks/model_endpoint_deploy.ipynb @@ -34,7 +34,7 @@ }, "outputs": [], "source": [ - "dbutils.widgets.text(\"catalog_name\", \"btafur_catalog\")\n", + "dbutils.widgets.text(\"catalog_name\", \"dev_catalog\")\n", "dbutils.widgets.text(\"schema_name\", \"default\")\n", "dbutils.widgets.text(\"model_name\", \"quickstart_agent\")\n", "\n", diff --git a/contrib/templates/llmops-basic/template/notebooks/model_evaluation.ipynb b/llmops_basic/notebooks/model_evaluation.ipynb similarity index 92% rename from contrib/templates/llmops-basic/template/notebooks/model_evaluation.ipynb rename to llmops_basic/notebooks/model_evaluation.ipynb index b303f32..8d0e39b 100644 --- a/contrib/templates/llmops-basic/template/notebooks/model_evaluation.ipynb +++ b/llmops_basic/notebooks/model_evaluation.ipynb @@ -40,7 +40,7 @@ }, "outputs": [], "source": [ - "dbutils.widgets.text(\"catalog_name\", \"btafur_catalog\")\n", + "dbutils.widgets.text(\"catalog_name\", \"dev_catalog\")\n", "dbutils.widgets.text(\"schema_name\", \"default\")\n", "dbutils.widgets.text(\"model_name\", \"quickstart_agent\")\n", "dbutils.widgets.text(\"logged_run_id\", \"None\")\n", @@ -264,21 +264,21 @@ "label": null, "name": "catalog_name", "options": { - "widgetDisplayType": "Text", - "validationRegex": null + "validationRegex": null, + "widgetDisplayType": "Text" }, "parameterDataType": "String" }, "widgetInfo": { - "widgetType": "text", "defaultValue": "btafur_catalog", "label": null, "name": "catalog_name", "options": { - "widgetType": "text", "autoCreated": null, - "validationRegex": null - } + "validationRegex": null, + "widgetType": "text" + }, + "widgetType": "text" } }, "experiment_name": { @@ -290,21 +290,21 @@ "label": null, "name": "experiment_name", "options": { - "widgetDisplayType": "Text", - "validationRegex": null + "validationRegex": null, + "widgetDisplayType": "Text" }, "parameterDataType": "String" }, "widgetInfo": { - "widgetType": "text", "defaultValue": "/Users/bruno.tafur@databricks.com/quickstart_agent_btafur_catalog", "label": null, "name": "experiment_name", "options": { - "widgetType": "text", "autoCreated": null, - "validationRegex": null - } + "validationRegex": null, + "widgetType": "text" + }, + "widgetType": "text" } }, "logged_run_id": { @@ -316,21 +316,21 @@ "label": null, "name": "logged_run_id", "options": { - "widgetDisplayType": "Text", - "validationRegex": null + "validationRegex": null, + "widgetDisplayType": "Text" }, "parameterDataType": "String" }, "widgetInfo": { - "widgetType": "text", "defaultValue": "None", "label": null, "name": "logged_run_id", "options": { - "widgetType": "text", "autoCreated": false, - "validationRegex": null - } + "validationRegex": null, + "widgetType": "text" + }, + "widgetType": "text" } }, "model_name": { @@ -342,21 +342,21 @@ "label": null, "name": "model_name", "options": { - "widgetDisplayType": "Text", - "validationRegex": null + "validationRegex": null, + "widgetDisplayType": "Text" }, "parameterDataType": "String" }, "widgetInfo": { - "widgetType": "text", "defaultValue": "quickstart_agent", "label": null, "name": "model_name", "options": { - "widgetType": "text", "autoCreated": null, - "validationRegex": null - } + "validationRegex": null, + "widgetType": "text" + }, + "widgetType": "text" } }, "schema_name": { @@ -368,21 +368,21 @@ "label": null, "name": "schema_name", "options": { - "widgetDisplayType": "Text", - "validationRegex": null + "validationRegex": null, + "widgetDisplayType": "Text" }, "parameterDataType": "String" }, "widgetInfo": { - "widgetType": "text", "defaultValue": "default", "label": null, "name": "schema_name", "options": { - "widgetType": "text", "autoCreated": null, - "validationRegex": null - } + "validationRegex": null, + "widgetType": "text" + }, + "widgetType": "text" } } } diff --git a/llmops-basic/notebooks/model_inference.ipynb b/llmops_basic/notebooks/model_inference.ipynb similarity index 90% rename from llmops-basic/notebooks/model_inference.ipynb rename to llmops_basic/notebooks/model_inference.ipynb index fc8fe43..b64c8b5 100644 --- a/llmops-basic/notebooks/model_inference.ipynb +++ b/llmops_basic/notebooks/model_inference.ipynb @@ -40,7 +40,7 @@ }, "outputs": [], "source": [ - "dbutils.widgets.text(\"catalog_name\", \"btafur_catalog\")\n", + "dbutils.widgets.text(\"catalog_name\", \"dev_catalog\")\n", "dbutils.widgets.text(\"schema_name\", \"default\")\n", "dbutils.widgets.text(\"model_name\", \"quickstart_agent\")\n", "catalog_name = dbutils.widgets.get(\"catalog_name\")\n", @@ -163,21 +163,21 @@ "label": null, "name": "catalog_name", "options": { - "widgetDisplayType": "Text", - "validationRegex": null + "validationRegex": null, + "widgetDisplayType": "Text" }, "parameterDataType": "String" }, "widgetInfo": { - "widgetType": "text", "defaultValue": "btafur_catalog", "label": null, "name": "catalog_name", "options": { - "widgetType": "text", "autoCreated": false, - "validationRegex": null - } + "validationRegex": null, + "widgetType": "text" + }, + "widgetType": "text" } }, "model_name": { @@ -189,21 +189,21 @@ "label": null, "name": "model_name", "options": { - "widgetDisplayType": "Text", - "validationRegex": null + "validationRegex": null, + "widgetDisplayType": "Text" }, "parameterDataType": "String" }, "widgetInfo": { - "widgetType": "text", "defaultValue": "quickstart_agent", "label": null, "name": "model_name", "options": { - "widgetType": "text", "autoCreated": false, - "validationRegex": null - } + "validationRegex": null, + "widgetType": "text" + }, + "widgetType": "text" } }, "schema_name": { @@ -215,21 +215,21 @@ "label": null, "name": "schema_name", "options": { - "widgetDisplayType": "Text", - "validationRegex": null + "validationRegex": null, + "widgetDisplayType": "Text" }, "parameterDataType": "String" }, "widgetInfo": { - "widgetType": "text", "defaultValue": "default", "label": null, "name": "schema_name", "options": { - "widgetType": "text", "autoCreated": false, - "validationRegex": null - } + "validationRegex": null, + "widgetType": "text" + }, + "widgetType": "text" } } } diff --git a/llmops-basic/notebooks/model_preprocessing.ipynb b/llmops_basic/notebooks/model_preprocessing.ipynb similarity index 93% rename from llmops-basic/notebooks/model_preprocessing.ipynb rename to llmops_basic/notebooks/model_preprocessing.ipynb index 3b99c12..33cbe6b 100644 --- a/llmops-basic/notebooks/model_preprocessing.ipynb +++ b/llmops_basic/notebooks/model_preprocessing.ipynb @@ -34,7 +34,7 @@ }, "outputs": [], "source": [ - "dbutils.widgets.text(\"catalog_name\", \"btafur_catalog\")\n", + "dbutils.widgets.text(\"catalog_name\", \"dev_catalog\")\n", "dbutils.widgets.text(\"schema_name\", \"default\")\n", "catalog_name = dbutils.widgets.get(\"catalog_name\")\n", "schema_name = dbutils.widgets.get(\"schema_name\")" @@ -168,21 +168,21 @@ "label": null, "name": "catalog_name", "options": { - "widgetDisplayType": "Text", - "validationRegex": null + "validationRegex": null, + "widgetDisplayType": "Text" }, "parameterDataType": "String" }, "widgetInfo": { - "widgetType": "text", "defaultValue": "btafur_catalog", "label": null, "name": "catalog_name", "options": { - "widgetType": "text", "autoCreated": null, - "validationRegex": null - } + "validationRegex": null, + "widgetType": "text" + }, + "widgetType": "text" } }, "schema_name": { @@ -194,21 +194,21 @@ "label": null, "name": "schema_name", "options": { - "widgetDisplayType": "Text", - "validationRegex": null + "validationRegex": null, + "widgetDisplayType": "Text" }, "parameterDataType": "String" }, "widgetInfo": { - "widgetType": "text", "defaultValue": "default", "label": null, "name": "schema_name", "options": { - "widgetType": "text", "autoCreated": false, - "validationRegex": null - } + "validationRegex": null, + "widgetType": "text" + }, + "widgetType": "text" } } } diff --git a/llmops-basic/notebooks/quickstart_agent.py b/llmops_basic/notebooks/quickstart_agent.py similarity index 100% rename from llmops-basic/notebooks/quickstart_agent.py rename to llmops_basic/notebooks/quickstart_agent.py diff --git a/llmops-basic/resources/model_artifacts.yml b/llmops_basic/resources/model_artifacts.yml similarity index 100% rename from llmops-basic/resources/model_artifacts.yml rename to llmops_basic/resources/model_artifacts.yml diff --git a/llmops-basic/resources/model_build_evaluation.job.yml b/llmops_basic/resources/model_build_evaluation.job.yml similarity index 100% rename from llmops-basic/resources/model_build_evaluation.job.yml rename to llmops_basic/resources/model_build_evaluation.job.yml diff --git a/llmops-basic/resources/model_endpoint_deploy.job.yml b/llmops_basic/resources/model_endpoint_deploy.job.yml similarity index 100% rename from llmops-basic/resources/model_endpoint_deploy.job.yml rename to llmops_basic/resources/model_endpoint_deploy.job.yml diff --git a/llmops-basic/resources/model_inference.job.yml b/llmops_basic/resources/model_inference.job.yml similarity index 100% rename from llmops-basic/resources/model_inference.job.yml rename to llmops_basic/resources/model_inference.job.yml diff --git a/llmops-basic/resources/model_preprocessing.job.yml b/llmops_basic/resources/model_preprocessing.job.yml similarity index 100% rename from llmops-basic/resources/model_preprocessing.job.yml rename to llmops_basic/resources/model_preprocessing.job.yml From 9b874e27cb3617689fd4a73a06683f10dc7f95ed Mon Sep 17 00:00:00 2001 From: Bruno Tafur Date: Mon, 19 Jan 2026 21:34:42 +0000 Subject: [PATCH 3/5] reestructure files and generalise catalog naming --- llmops_basic/notebooks/model_build.ipynb | 6 +++--- llmops_basic/notebooks/model_evaluation.ipynb | 6 +++--- llmops_basic/notebooks/model_inference.ipynb | 6 +++--- llmops_basic/notebooks/model_preprocessing.ipynb | 6 +++--- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/llmops_basic/notebooks/model_build.ipynb b/llmops_basic/notebooks/model_build.ipynb index 697ea97..8ea1c35 100644 --- a/llmops_basic/notebooks/model_build.ipynb +++ b/llmops_basic/notebooks/model_build.ipynb @@ -162,11 +162,11 @@ "notebookName": "model_build", "widgets": { "catalog_name": { - "currentValue": "btafur_catalog", + "currentValue": "dev_catalog", "nuid": "fd954ea2-6905-4526-b686-38b43c5b6d97", "typedWidgetInfo": { "autoCreated": false, - "defaultValue": "btafur_catalog", + "defaultValue": "dev_catalog", "label": null, "name": "catalog_name", "options": { @@ -176,7 +176,7 @@ "parameterDataType": "String" }, "widgetInfo": { - "defaultValue": "btafur_catalog", + "defaultValue": "dev_catalog", "label": null, "name": "catalog_name", "options": { diff --git a/llmops_basic/notebooks/model_evaluation.ipynb b/llmops_basic/notebooks/model_evaluation.ipynb index 8d0e39b..462d8be 100644 --- a/llmops_basic/notebooks/model_evaluation.ipynb +++ b/llmops_basic/notebooks/model_evaluation.ipynb @@ -256,11 +256,11 @@ "notebookName": "model_evaluation", "widgets": { "catalog_name": { - "currentValue": "btafur_catalog", + "currentValue": "dev_catalog", "nuid": "3a964089-b210-4dc6-aa79-c7c287530d31", "typedWidgetInfo": { "autoCreated": false, - "defaultValue": "btafur_catalog", + "defaultValue": "dev_catalog", "label": null, "name": "catalog_name", "options": { @@ -270,7 +270,7 @@ "parameterDataType": "String" }, "widgetInfo": { - "defaultValue": "btafur_catalog", + "defaultValue": "dev_catalog", "label": null, "name": "catalog_name", "options": { diff --git a/llmops_basic/notebooks/model_inference.ipynb b/llmops_basic/notebooks/model_inference.ipynb index b64c8b5..e0c4535 100644 --- a/llmops_basic/notebooks/model_inference.ipynb +++ b/llmops_basic/notebooks/model_inference.ipynb @@ -155,11 +155,11 @@ "notebookName": "model_inference", "widgets": { "catalog_name": { - "currentValue": "btafur_catalog", + "currentValue": "dev_catalog", "nuid": "74cb3499-b670-4a9b-a326-ad9c7a804698", "typedWidgetInfo": { "autoCreated": false, - "defaultValue": "btafur_catalog", + "defaultValue": "dev_catalog", "label": null, "name": "catalog_name", "options": { @@ -169,7 +169,7 @@ "parameterDataType": "String" }, "widgetInfo": { - "defaultValue": "btafur_catalog", + "defaultValue": "dev_catalog", "label": null, "name": "catalog_name", "options": { diff --git a/llmops_basic/notebooks/model_preprocessing.ipynb b/llmops_basic/notebooks/model_preprocessing.ipynb index 33cbe6b..95dd292 100644 --- a/llmops_basic/notebooks/model_preprocessing.ipynb +++ b/llmops_basic/notebooks/model_preprocessing.ipynb @@ -160,11 +160,11 @@ "notebookName": "model_preprocessing", "widgets": { "catalog_name": { - "currentValue": "btafur_catalog", + "currentValue": "dev_catalog", "nuid": "c27b5912-2ca9-46c0-ae51-7f7dcf693134", "typedWidgetInfo": { "autoCreated": false, - "defaultValue": "btafur_catalog", + "defaultValue": "dev_catalog", "label": null, "name": "catalog_name", "options": { @@ -174,7 +174,7 @@ "parameterDataType": "String" }, "widgetInfo": { - "defaultValue": "btafur_catalog", + "defaultValue": "dev_catalog", "label": null, "name": "catalog_name", "options": { From c18affcc5469a503092a3f789c11e041eb65c1fd Mon Sep 17 00:00:00 2001 From: Bruno Tafur Date: Mon, 19 Jan 2026 21:39:27 +0000 Subject: [PATCH 4/5] reestructure files and generalise catalog naming --- .../{{.project_name}}/notebooks/model_build.ipynb | 6 +++--- .../notebooks/model_evaluation.ipynb | 12 ++++++------ .../notebooks/model_inference.ipynb | 6 +++--- .../notebooks/model_preprocessing.ipynb | 6 +++--- llmops_basic/notebooks/model_evaluation.ipynb | 6 +++--- 5 files changed, 18 insertions(+), 18 deletions(-) diff --git a/contrib/templates/llmops-basic/template/{{.project_name}}/notebooks/model_build.ipynb b/contrib/templates/llmops-basic/template/{{.project_name}}/notebooks/model_build.ipynb index 697ea97..8ea1c35 100644 --- a/contrib/templates/llmops-basic/template/{{.project_name}}/notebooks/model_build.ipynb +++ b/contrib/templates/llmops-basic/template/{{.project_name}}/notebooks/model_build.ipynb @@ -162,11 +162,11 @@ "notebookName": "model_build", "widgets": { "catalog_name": { - "currentValue": "btafur_catalog", + "currentValue": "dev_catalog", "nuid": "fd954ea2-6905-4526-b686-38b43c5b6d97", "typedWidgetInfo": { "autoCreated": false, - "defaultValue": "btafur_catalog", + "defaultValue": "dev_catalog", "label": null, "name": "catalog_name", "options": { @@ -176,7 +176,7 @@ "parameterDataType": "String" }, "widgetInfo": { - "defaultValue": "btafur_catalog", + "defaultValue": "dev_catalog", "label": null, "name": "catalog_name", "options": { diff --git a/contrib/templates/llmops-basic/template/{{.project_name}}/notebooks/model_evaluation.ipynb b/contrib/templates/llmops-basic/template/{{.project_name}}/notebooks/model_evaluation.ipynb index 8d0e39b..ea7ed2f 100644 --- a/contrib/templates/llmops-basic/template/{{.project_name}}/notebooks/model_evaluation.ipynb +++ b/contrib/templates/llmops-basic/template/{{.project_name}}/notebooks/model_evaluation.ipynb @@ -256,11 +256,11 @@ "notebookName": "model_evaluation", "widgets": { "catalog_name": { - "currentValue": "btafur_catalog", + "currentValue": "dev_catalog", "nuid": "3a964089-b210-4dc6-aa79-c7c287530d31", "typedWidgetInfo": { "autoCreated": false, - "defaultValue": "btafur_catalog", + "defaultValue": "dev_catalog", "label": null, "name": "catalog_name", "options": { @@ -270,7 +270,7 @@ "parameterDataType": "String" }, "widgetInfo": { - "defaultValue": "btafur_catalog", + "defaultValue": "dev_catalog", "label": null, "name": "catalog_name", "options": { @@ -282,11 +282,11 @@ } }, "experiment_name": { - "currentValue": "/Users/bruno.tafur@databricks.com/quickstart_agent_btafur_catalog", + "currentValue": "/Users/bruno.tafur@databricks.com/quickstart_agent_dev_catalog", "nuid": "d243520b-4f07-4843-b294-0e3b3489d85b", "typedWidgetInfo": { "autoCreated": false, - "defaultValue": "/Users/bruno.tafur@databricks.com/quickstart_agent_btafur_catalog", + "defaultValue": "/Users/bruno.tafur@databricks.com/quickstart_agent_dev_catalog", "label": null, "name": "experiment_name", "options": { @@ -296,7 +296,7 @@ "parameterDataType": "String" }, "widgetInfo": { - "defaultValue": "/Users/bruno.tafur@databricks.com/quickstart_agent_btafur_catalog", + "defaultValue": "/Users/bruno.tafur@databricks.com/quickstart_agent_dev_catalog", "label": null, "name": "experiment_name", "options": { diff --git a/contrib/templates/llmops-basic/template/{{.project_name}}/notebooks/model_inference.ipynb b/contrib/templates/llmops-basic/template/{{.project_name}}/notebooks/model_inference.ipynb index b64c8b5..e0c4535 100644 --- a/contrib/templates/llmops-basic/template/{{.project_name}}/notebooks/model_inference.ipynb +++ b/contrib/templates/llmops-basic/template/{{.project_name}}/notebooks/model_inference.ipynb @@ -155,11 +155,11 @@ "notebookName": "model_inference", "widgets": { "catalog_name": { - "currentValue": "btafur_catalog", + "currentValue": "dev_catalog", "nuid": "74cb3499-b670-4a9b-a326-ad9c7a804698", "typedWidgetInfo": { "autoCreated": false, - "defaultValue": "btafur_catalog", + "defaultValue": "dev_catalog", "label": null, "name": "catalog_name", "options": { @@ -169,7 +169,7 @@ "parameterDataType": "String" }, "widgetInfo": { - "defaultValue": "btafur_catalog", + "defaultValue": "dev_catalog", "label": null, "name": "catalog_name", "options": { diff --git a/contrib/templates/llmops-basic/template/{{.project_name}}/notebooks/model_preprocessing.ipynb b/contrib/templates/llmops-basic/template/{{.project_name}}/notebooks/model_preprocessing.ipynb index 33cbe6b..95dd292 100644 --- a/contrib/templates/llmops-basic/template/{{.project_name}}/notebooks/model_preprocessing.ipynb +++ b/contrib/templates/llmops-basic/template/{{.project_name}}/notebooks/model_preprocessing.ipynb @@ -160,11 +160,11 @@ "notebookName": "model_preprocessing", "widgets": { "catalog_name": { - "currentValue": "btafur_catalog", + "currentValue": "dev_catalog", "nuid": "c27b5912-2ca9-46c0-ae51-7f7dcf693134", "typedWidgetInfo": { "autoCreated": false, - "defaultValue": "btafur_catalog", + "defaultValue": "dev_catalog", "label": null, "name": "catalog_name", "options": { @@ -174,7 +174,7 @@ "parameterDataType": "String" }, "widgetInfo": { - "defaultValue": "btafur_catalog", + "defaultValue": "dev_catalog", "label": null, "name": "catalog_name", "options": { diff --git a/llmops_basic/notebooks/model_evaluation.ipynb b/llmops_basic/notebooks/model_evaluation.ipynb index 462d8be..ea7ed2f 100644 --- a/llmops_basic/notebooks/model_evaluation.ipynb +++ b/llmops_basic/notebooks/model_evaluation.ipynb @@ -282,11 +282,11 @@ } }, "experiment_name": { - "currentValue": "/Users/bruno.tafur@databricks.com/quickstart_agent_btafur_catalog", + "currentValue": "/Users/bruno.tafur@databricks.com/quickstart_agent_dev_catalog", "nuid": "d243520b-4f07-4843-b294-0e3b3489d85b", "typedWidgetInfo": { "autoCreated": false, - "defaultValue": "/Users/bruno.tafur@databricks.com/quickstart_agent_btafur_catalog", + "defaultValue": "/Users/bruno.tafur@databricks.com/quickstart_agent_dev_catalog", "label": null, "name": "experiment_name", "options": { @@ -296,7 +296,7 @@ "parameterDataType": "String" }, "widgetInfo": { - "defaultValue": "/Users/bruno.tafur@databricks.com/quickstart_agent_btafur_catalog", + "defaultValue": "/Users/bruno.tafur@databricks.com/quickstart_agent_dev_catalog", "label": null, "name": "experiment_name", "options": { From c58aeabcaeb8587bb07e67075534d49257538069 Mon Sep 17 00:00:00 2001 From: Bruno Tafur Date: Mon, 19 Jan 2026 21:44:10 +0000 Subject: [PATCH 5/5] reestructure files and generalise catalog naming --- .../{{.project_name}}/notebooks/model_evaluation.ipynb | 6 +++--- llmops_basic/databricks.yml | 4 ++-- llmops_basic/notebooks/model_evaluation.ipynb | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/contrib/templates/llmops-basic/template/{{.project_name}}/notebooks/model_evaluation.ipynb b/contrib/templates/llmops-basic/template/{{.project_name}}/notebooks/model_evaluation.ipynb index ea7ed2f..5d08f41 100644 --- a/contrib/templates/llmops-basic/template/{{.project_name}}/notebooks/model_evaluation.ipynb +++ b/contrib/templates/llmops-basic/template/{{.project_name}}/notebooks/model_evaluation.ipynb @@ -282,11 +282,11 @@ } }, "experiment_name": { - "currentValue": "/Users/bruno.tafur@databricks.com/quickstart_agent_dev_catalog", + "currentValue": "/Users/user@company.com/quickstart_agent_dev_catalog", "nuid": "d243520b-4f07-4843-b294-0e3b3489d85b", "typedWidgetInfo": { "autoCreated": false, - "defaultValue": "/Users/bruno.tafur@databricks.com/quickstart_agent_dev_catalog", + "defaultValue": "/Users/user@company.com/quickstart_agent_dev_catalog", "label": null, "name": "experiment_name", "options": { @@ -296,7 +296,7 @@ "parameterDataType": "String" }, "widgetInfo": { - "defaultValue": "/Users/bruno.tafur@databricks.com/quickstart_agent_dev_catalog", + "defaultValue": "/Users/user@company.com/quickstart_agent_dev_catalog", "label": null, "name": "experiment_name", "options": { diff --git a/llmops_basic/databricks.yml b/llmops_basic/databricks.yml index 6fd1c8b..a68bac3 100644 --- a/llmops_basic/databricks.yml +++ b/llmops_basic/databricks.yml @@ -36,8 +36,8 @@ targets: variables: catalog_name: prod_catalog run_as: - # This runs as bruno.tafur@databricks.com in production. Alternatively, + # This runs as user@company.com in production. Alternatively, # a service principal could be used here using service_principal_name # (see Databricks documentation). - user_name: bruno.tafur@databricks.com + user_name: user@company.com \ No newline at end of file diff --git a/llmops_basic/notebooks/model_evaluation.ipynb b/llmops_basic/notebooks/model_evaluation.ipynb index ea7ed2f..5d08f41 100644 --- a/llmops_basic/notebooks/model_evaluation.ipynb +++ b/llmops_basic/notebooks/model_evaluation.ipynb @@ -282,11 +282,11 @@ } }, "experiment_name": { - "currentValue": "/Users/bruno.tafur@databricks.com/quickstart_agent_dev_catalog", + "currentValue": "/Users/user@company.com/quickstart_agent_dev_catalog", "nuid": "d243520b-4f07-4843-b294-0e3b3489d85b", "typedWidgetInfo": { "autoCreated": false, - "defaultValue": "/Users/bruno.tafur@databricks.com/quickstart_agent_dev_catalog", + "defaultValue": "/Users/user@company.com/quickstart_agent_dev_catalog", "label": null, "name": "experiment_name", "options": { @@ -296,7 +296,7 @@ "parameterDataType": "String" }, "widgetInfo": { - "defaultValue": "/Users/bruno.tafur@databricks.com/quickstart_agent_dev_catalog", + "defaultValue": "/Users/user@company.com/quickstart_agent_dev_catalog", "label": null, "name": "experiment_name", "options": {