diff --git a/examples/ray/README.md b/examples/ray/README.md
new file mode 100644
index 000000000..98c326303
--- /dev/null
+++ b/examples/ray/README.md
@@ -0,0 +1,75 @@
+# Running Ray on Azure Kubernetes Service (AKS)
+
+This example demonstrates how to deploy and run a [Ray](https://www.ray.io/) application on AKS using the KubeRay operator.
+
+## Prerequisites
+
+- An AKS cluster (Kubernetes 1.26+)
+- [kubectl](https://kubernetes.io/docs/tasks/tools/) configured to access your cluster
+- [Helm](https://helm.sh/docs/intro/install/) 3.x installed
+
+## Overview
+
+Ray is an open-source framework for scaling AI and Python workloads. This example deploys:
+
+1. The **KubeRay operator** to manage Ray clusters on Kubernetes
+2. A **RayCluster** custom resource with a head node and worker nodes
+3. A sample **Ray job** to verify the deployment
+
+## Deploy the KubeRay operator
+
+```bash
+helm repo add kuberay https://ray-project.github.io/kuberay-helm/
+helm repo update
+
+helm install kuberay-operator kuberay/kuberay-operator \
+  --namespace kuberay-system \
+  --create-namespace
+```
+
+Verify the operator is running:
+
+```bash
+kubectl get pods -n kuberay-system
+```
+
+## Deploy the RayCluster
+
+```bash
+kubectl apply -f ray-cluster.yaml
+```
+
+Wait for the cluster to be ready:
+
+```bash
+kubectl get rayclusters
+kubectl get pods -l ray.io/cluster=ray-cluster
+```
+
+## Submit a sample job
+
+```bash
+kubectl apply -f ray-job.yaml
+```
+
+Check job status:
+
+```bash
+kubectl get rayjobs
+kubectl logs -l job-name=ray-sample-job
+```
+
+## Clean up
+
+```bash
+kubectl delete -f ray-job.yaml
+kubectl delete -f ray-cluster.yaml
+helm uninstall kuberay-operator -n kuberay-system
+kubectl delete namespace kuberay-system
+```
+
+## Resources
+
+- [Ray documentation](https://docs.ray.io/)
+- [KubeRay documentation](https://ray-project.github.io/kuberay/)
+- [AKS documentation](https://learn.microsoft.com/azure/aks)
diff --git a/examples/ray/aks-classic/deploy.sh b/examples/ray/aks-classic/deploy.sh
new file mode 100755
index 000000000..2427f68a1
--- /dev/null
+++ b/examples/ray/aks-classic/deploy.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+set -euo pipefail
+
+# Check if the user is logged into Azure CLI
+if ! az account show > /dev/null 2>&1; then
+    echo "Please sign in to Azure CLI using 'az login' before running this script."
+    exit 1
+fi
+
+# Initialize Terraform
+terraform init
+
+# Create a Terraform plan
+terraform plan -out main.tfplan
+
+# Apply the Terraform plan
+terraform apply main.tfplan
+
+# Retrieve the Terraform outputs
+resource_group_name=$(terraform output -raw resource_group_name)
+aks_cluster_name=$(terraform output -raw kubernetes_cluster_name)
+
+# Get AKS credentials for the cluster
+az aks get-credentials \
+    --resource-group "$resource_group_name" \
+    --name "$aks_cluster_name" \
+    --overwrite-existing
+
+echo "=== Cluster nodes ==="
+kubectl get nodes
+
+echo "=== Verifying installations ==="
+kubectl get pods -n kueue-system
+kubectl get pods -n kuberay-system
+
+echo "=== Setup complete ==="
\ No newline at end of file
diff --git a/examples/ray/aks-classic/main.tf b/examples/ray/aks-classic/main.tf
new file mode 100644
index 000000000..4576026a6
--- /dev/null
+++ b/examples/ray/aks-classic/main.tf
@@ -0,0 +1,122 @@
+# Generate random resource group name
+resource "random_pet" "rg_name" {
+  prefix = var.resource_group_name_prefix
+}
+
+resource "azurerm_resource_group" "rg" {
+  location = var.resource_group_location
+  name     = random_pet.rg_name.id
+}
+
+resource "random_pet" "azurerm_kubernetes_cluster_name" {
+  prefix = "cluster"
+}
+
+resource "random_pet" "azurerm_kubernetes_cluster_dns_prefix" {
+  prefix = "dns"
+}
+
+resource "azurerm_kubernetes_cluster" "k8s" {
+  location                         = azurerm_resource_group.rg.location
+  name                             = random_pet.azurerm_kubernetes_cluster_name.id
+  resource_group_name              = azurerm_resource_group.rg.name
+  dns_prefix                       = random_pet.azurerm_kubernetes_cluster_dns_prefix.id
+  kubernetes_version               = var.kubernetes_version
+    
+  identity {
+    type = "SystemAssigned"
+  }
+
+  default_node_pool {
+    name       = "systempool"
+    vm_size    = var.system_node_pool_vm_size
+    node_count = var.system_node_pool_node_count
+    tags = { owner = var.resource_group_owner }
+  }
+
+  linux_profile {
+    admin_username = var.username
+
+    ssh_key {
+      key_data = azapi_resource_action.ssh_public_key_gen.output.publicKey
+    }
+  }
+
+  network_profile {
+    network_plugin    = "azure"
+  }
+
+  web_app_routing {
+    dns_zone_ids = []
+  }
+}
+
+resource "null_resource" "wait_for_aks" {
+  depends_on = [azurerm_kubernetes_cluster.k8s]
+
+  provisioner "local-exec" {
+    command = <<EOT
+      max_retries=10
+      retries=0
+      while [ "$(az aks show --resource-group ${azurerm_resource_group.rg.name} --name ${azurerm_kubernetes_cluster.k8s.name} --query "provisioningState" -o tsv)" != "Succeeded" ]; do
+        if [ $retries -ge $max_retries ]; then
+          echo "Max retries exceeded. Exiting..."
+          exit 1
+        fi
+        echo "Waiting for AKS cluster to be fully provisioned... (Attempt: $((retries+1)))"
+        retries=$((retries+1))
+        sleep 30
+      done
+    EOT
+  }
+}
+
+resource "azapi_update_resource" "k8s-default-node-pool-systempool-taint" {
+  type        = "Microsoft.ContainerService/managedClusters@2024-09-02-preview"
+  resource_id = azurerm_kubernetes_cluster.k8s.id
+  body = jsonencode({
+    properties = {
+      agentPoolProfiles = [
+        {
+          name = "systempool"
+          nodeTaints = ["CriticalAddonsOnly=true:NoSchedule"]
+        }
+      ]
+    }
+  })
+
+  depends_on = [null_resource.wait_for_aks]
+}
+
+resource "azurerm_kubernetes_cluster_node_pool" "workload" {
+  name                  = "cpupool"
+  kubernetes_cluster_id = azurerm_kubernetes_cluster.k8s.id
+  vm_size               = var.ray_node_pool_vm_size
+  node_count            = var.ray_node_pool_node_count
+
+  depends_on = [azapi_update_resource.k8s-default-node-pool-systempool-taint]
+}
+
+resource "helm_release" "kueue" {
+  name             = "kueue"
+  namespace        = "kueue-system"
+  create_namespace = true
+  repository       = var.helm_registry
+  chart            = "kueue"
+  version          = var.kueue_version
+  wait             = true
+
+  depends_on = [azurerm_kubernetes_cluster_node_pool.workload]
+}
+
+resource "helm_release" "kuberay_operator" {
+  name             = "kuberay-operator"
+  namespace        = "kuberay-system"
+  create_namespace = true
+  repository       = var.helm_registry
+  chart            = "kuberay-operator"
+  version          = var.kuberay_operator_version
+  wait             = true
+
+  depends_on = [azurerm_kubernetes_cluster_node_pool.workload]
+}
\ No newline at end of file
diff --git a/examples/ray/aks-classic/outputs.tf b/examples/ray/aks-classic/outputs.tf
new file mode 100644
index 000000000..fe85c3fcd
--- /dev/null
+++ b/examples/ray/aks-classic/outputs.tf
@@ -0,0 +1,46 @@
+output "resource_group_name" {
+  value = azurerm_resource_group.rg.name
+}
+
+output "kubernetes_cluster_name" {
+  value = azurerm_kubernetes_cluster.k8s.name
+}
+
+output "client_certificate" {
+  value     = azurerm_kubernetes_cluster.k8s.kube_config[0].client_certificate
+  sensitive = true
+}
+
+output "client_key" {
+  value     = azurerm_kubernetes_cluster.k8s.kube_config[0].client_key
+  sensitive = true
+}
+
+output "cluster_ca_certificate" {
+  value     = azurerm_kubernetes_cluster.k8s.kube_config[0].cluster_ca_certificate
+  sensitive = true
+}
+
+output "cluster_password" {
+  value     = azurerm_kubernetes_cluster.k8s.kube_config[0].password
+  sensitive = true
+}
+
+output "cluster_username" {
+  value     = azurerm_kubernetes_cluster.k8s.kube_config[0].username
+  sensitive = true
+}
+
+output "host" {
+  value     = azurerm_kubernetes_cluster.k8s.kube_config[0].host
+  sensitive = true
+}
+
+output "kube_config" {
+  value     = azurerm_kubernetes_cluster.k8s.kube_config_raw
+  sensitive = true
+}
+
+output "system_node_pool_name" {
+  value = azurerm_kubernetes_cluster.k8s.default_node_pool[0].name
+}
diff --git a/examples/ray/aks-classic/providers.tf b/examples/ray/aks-classic/providers.tf
new file mode 100644
index 000000000..dec6184d2
--- /dev/null
+++ b/examples/ray/aks-classic/providers.tf
@@ -0,0 +1,41 @@
+terraform {
+  required_version = ">=1.0"
+
+  required_providers {
+    azapi = {
+      source  = "azure/azapi"
+      version = "~>1.5"
+    }
+    azurerm = {
+      source  = "hashicorp/azurerm"
+      version = "~>4.13"
+    }
+    helm = {
+      source  = "hashicorp/helm"
+      version = "~>2.12"
+    }
+    random = {
+      source  = "hashicorp/random"
+      version = "~>3.0"
+    }
+    time = {
+      source  = "hashicorp/time"
+      version = "0.9.1"
+    }
+  }
+}
+
+provider "azurerm" {
+  features {}
+
+  subscription_id = var.subscription_id
+}
+
+provider "helm" {
+  kubernetes {
+    host                   = azurerm_kubernetes_cluster.k8s.kube_config[0].host
+    client_certificate     = base64decode(azurerm_kubernetes_cluster.k8s.kube_config[0].client_certificate)
+    client_key             = base64decode(azurerm_kubernetes_cluster.k8s.kube_config[0].client_key)
+    cluster_ca_certificate = base64decode(azurerm_kubernetes_cluster.k8s.kube_config[0].cluster_ca_certificate)
+  }
+}
\ No newline at end of file
diff --git a/examples/ray/aks-classic/ssh.tf b/examples/ray/aks-classic/ssh.tf
new file mode 100644
index 000000000..a65439495
--- /dev/null
+++ b/examples/ray/aks-classic/ssh.tf
@@ -0,0 +1,25 @@
+
+resource "random_pet" "ssh_key_name" {
+  prefix    = "ssh"
+  separator = ""
+}
+
+resource "azapi_resource_action" "ssh_public_key_gen" {
+  type        = "Microsoft.Compute/sshPublicKeys@2022-11-01"
+  resource_id = azapi_resource.ssh_public_key.id
+  action      = "generateKeyPair"
+  method      = "POST"
+
+  response_export_values = ["publicKey", "privateKey"]
+}
+
+resource "azapi_resource" "ssh_public_key" {
+  type      = "Microsoft.Compute/sshPublicKeys@2022-11-01"
+  name      = random_pet.ssh_key_name.id
+  location  = azurerm_resource_group.rg.location
+  parent_id = azurerm_resource_group.rg.id
+}
+
+output "key_data" {
+  value = azapi_resource_action.ssh_public_key_gen.output.publicKey
+}
diff --git a/examples/ray/aks-classic/variables.tf b/examples/ray/aks-classic/variables.tf
new file mode 100644
index 000000000..0e717a0da
--- /dev/null
+++ b/examples/ray/aks-classic/variables.tf
@@ -0,0 +1,81 @@
+variable "subscription_id" {
+  description = "The Azure subscription ID."
+  type        = string
+}
+
+variable "resource_group_owner" {
+  description = "The owner of the resource group."
+  type        = string
+}
+
+variable "resource_group_location" {
+  type        = string
+  default     = "centralus"
+  description = "Location of the resource group."
+}
+
+variable "resource_group_name_prefix" {
+  type        = string
+  default     = "rg"
+  description = "Prefix of the resource group name that's combined with a random ID so name is unique in your Azure subscription."
+}
+
+variable "system_node_pool_vm_size" {
+  type        = string
+  description = "The size of the Virtual Machine for the system node pool."
+  default     = "Standard_D4ds_v4"
+}
+
+variable "system_node_pool_node_count" {
+  type        = number
+  description = "The initial quantity of nodes for the system node pool."
+  default     = 3
+}
+
+variable "ray_node_pool_vm_size" {
+  type        = string
+  description = "The size of the Virtual Machine for the Ray worker node pool."
+  default     = "Standard_D16ds_v7"
+}
+
+variable "ray_node_pool_node_count" {
+  type        = number
+  description = "The initial quantity of nodes for the Ray worker node pool."
+  default     = 2
+}
+
+variable "kubernetes_version" {
+  type        = string
+  description = "The Kubernetes version for the AKS cluster."
+  default     = "1.35"
+}
+
+variable "helm_registry" {
+  type        = string
+  description = "OCI registry for AKS AI Runtime Helm charts."
+  default     = "oci://mcr.microsoft.com/aks/ai-runtime/helm"
+}
+
+variable "kueue_version" {
+  type        = string
+  description = "Version of the Kueue Helm chart."
+  default     = "0.17.1"
+}
+
+variable "kuberay_operator_version" {
+  type        = string
+  description = "Version of the KubeRay Operator Helm chart."
+  default     = "1.6.1"
+}
+
+variable "msi_id" {
+  type        = string
+  description = "The Managed Service Identity ID. Set this value if you're running this example using Managed Identity as the authentication method."
+  default     = null
+}
+
+variable "username" {
+  type        = string
+  description = "The admin username for the new cluster."
+  default     = "azureadmin"
+}
\ No newline at end of file
diff --git a/examples/ray/inference-cpu/inference-rayjob.yaml b/examples/ray/inference-cpu/inference-rayjob.yaml
new file mode 100644
index 000000000..667c00af7
--- /dev/null
+++ b/examples/ray/inference-cpu/inference-rayjob.yaml
@@ -0,0 +1,91 @@
+# RayJob for the stack integration test.
+#
+# Flow:
+#   1. Labelled with kueue.x-k8s.io/queue-name so Kueue gates admission.
+#   2. Kueue creates a Workload (name: rayjob-e2e-inference-<hash>) and admits it
+#      once head 2 CPU + 2 workers × 1 CPU = 4 CPU quota is available.
+#   3. KubeRay spawns head + 2 workers; Ray Data runs image classification on CPU.
+#
+# Uses the custom Ray image built from images/ray/. {{RAY_IMAGE}} is resolved at
+# test time from the RAY_E2E_IMAGE env var, which CI sets to the image pushed to
+# the PR-scoped ACR. {{RAY_VERSION}} is resolved from images/ray/Makefile.
+apiVersion: ray.io/v1
+kind: RayJob
+metadata:
+  name: cpu-inference
+  namespace: ray
+  labels:
+    kueue.x-k8s.io/queue-name: e2e-stack-queue
+spec:
+  entrypoint: python /home/ray/scripts/inference_job.py
+  shutdownAfterJobFinishes: true
+  ttlSecondsAfterFinished: 600
+  runtimeEnvYAML: |
+    pip:
+      packages:
+        - torch
+        - torchvision
+      pip_install_options:
+        - --index-url
+        - https://download.pytorch.org/whl/cpu
+        - --extra-index-url
+        - https://pypi.org/simple
+      pip_check: false
+  rayClusterSpec:
+    rayVersion: "{{RAY_VERSION}}"
+    enableInTreeAutoscaling: false
+    headGroupSpec:
+      rayStartParams:
+        num-cpus: "0"            # Don't schedule Ray tasks on head; Kueue still counts its K8s CPU requests.
+        dashboard-host: "0.0.0.0"
+      template:
+        spec:
+          containers:
+          - name: ray-head
+            image: {{RAY_IMAGE}}
+            resources:
+              requests:
+                cpu: "2"
+                memory: "8Gi"
+              limits:
+                cpu: "2"
+                memory: "8Gi"
+            startupProbe:
+              httpGet:
+                path: /api/version
+                port: 8265
+              periodSeconds: 5
+              timeoutSeconds: 2
+              failureThreshold: 60
+            volumeMounts:
+            - name: inference-script
+              mountPath: /home/ray/scripts
+          volumes:
+          - name: inference-script
+            configMap:
+              name: cpu-inference-script
+    workerGroupSpecs:
+    - groupName: workers
+      replicas: 2
+      minReplicas: 2
+      maxReplicas: 2
+      rayStartParams: {}
+      template:
+        spec:
+          containers:
+          - name: ray-worker
+            image: {{RAY_IMAGE}}
+            resources:
+              requests:
+                cpu: "1"
+                memory: "4Gi"
+              limits:
+                cpu: "1"
+                memory: "4Gi"
+            startupProbe:
+              httpGet:
+                path: /api/healthz
+                port: 52365
+              periodSeconds: 5
+              timeoutSeconds: 2
+              failureThreshold: 60
diff --git a/examples/ray/inference-cpu/inference_job.py b/examples/ray/inference-cpu/inference_job.py
new file mode 100644
index 000000000..2aaaa1082
--- /dev/null
+++ b/examples/ray/inference-cpu/inference_job.py
@@ -0,0 +1,85 @@
+"""
+Image classification test for e2e validation.
+Adapted from aks-unbounded/ray/inference_job.py.
+
+Uses a locally-constructed model with random weights instead of downloading from
+HuggingFace. This avoids network dependencies (SSL proxy issues, rate limits) while
+still exercising the full Ray Data distributed inference pipeline:
+  - ActorPoolStrategy distributes work across workers
+  - Each actor loads a model, processes batches, returns predictions
+  - Ray Data orchestrates the dataflow end-to-end
+
+Auto-detects GPU: if torch.cuda.is_available(), actors declare num_gpus=1 and the
+model runs on CUDA; otherwise falls back to CPU. Ray Data infers actor count from
+the ActorPoolStrategy size; the cluster must have enough GPU/CPU quota to schedule.
+"""
+import ray
+import numpy as np
+
+ray.init()
+cluster_resources = ray.cluster_resources()
+print(f"Cluster resources: {cluster_resources}")
+GPU_AVAILABLE = cluster_resources.get("GPU", 0) > 0
+print(f"GPU_AVAILABLE = {GPU_AVAILABLE}")
+
+# Generate synthetic images in-memory (no S3 dependency)
+NUM_IMAGES = 10
+synthetic_images = [
+    {"image": np.random.randint(0, 255, (224, 224, 3), dtype=np.uint8)}
+    for _ in range(NUM_IMAGES)
+]
+
+ds = ray.data.from_items(synthetic_images)
+
+LABELS = ["cat", "dog", "bird", "fish", "car"]
+
+class ImageClassifier:
+    def __init__(self):
+        import torch
+        import torch.nn as nn
+
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        print(f"Device: {self.device}")
+        # Simple conv net with random weights — no download needed.
+        self.model = nn.Sequential(
+            nn.Conv2d(3, 16, 3, stride=2),
+            nn.ReLU(),
+            nn.AdaptiveAvgPool2d(1),
+            nn.Flatten(),
+            nn.Linear(16, len(LABELS)),
+        ).to(self.device)
+        self.model.eval()
+        print("Model loaded successfully")
+
+    def __call__(self, batch):
+        import torch
+
+        images = torch.from_numpy(
+            np.stack(batch["image"])
+        ).permute(0, 3, 1, 2).float().to(self.device) / 255.0
+
+        with torch.no_grad():
+            logits = self.model(images)
+            indices = logits.argmax(dim=1)
+
+        batch["label"] = [LABELS[i] for i in indices.tolist()]
+        batch["score"] = [logits[j, indices[j]].item() for j in range(len(indices))]
+        return batch
+
+# Use fewer actors on GPU (1 worker × 1 GPU) than CPU (2 workers × 2 CPU).
+POOL_SIZE = 1 if GPU_AVAILABLE else 2
+
+predictions = ds.map_batches(
+    ImageClassifier,
+    compute=ray.data.ActorPoolStrategy(size=POOL_SIZE),
+    batch_size=4,
+    **({"num_gpus": 1} if GPU_AVAILABLE else {}),
+)
+
+results = predictions.take_all()
+print(f"\nInference complete: {len(results)} images classified")
+for i, r in enumerate(results):
+    print(f"  Image {i}: Label: {r['label']} (score: {r['score']:.4f})")
+
+print("\nSUCCESS: All images classified")
+
diff --git a/examples/ray/inference-cpu/stack-kueue-resources.yaml b/examples/ray/inference-cpu/stack-kueue-resources.yaml
new file mode 100644
index 000000000..0bb26c836
--- /dev/null
+++ b/examples/ray/inference-cpu/stack-kueue-resources.yaml
@@ -0,0 +1,45 @@
+---
+# Namespace for the Ray example.
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: ray
+---
+# ResourceFlavor — represents CPU hardware for the stack test.
+apiVersion: kueue.x-k8s.io/v1beta2
+kind: ResourceFlavor
+metadata:
+  name: e2e-stack-flavor
+spec: {}
+---
+# ClusterQueue — sized to admit both the CPU-only inference job (head 2 + 2 workers × 1 CPU + submitter)
+# and the GPU variant (head 1 + 1 worker × 2 CPU + 1 GPU + submitter). GPU quota is only exercised
+# on GPU-capable clusters; on CPU clusters workloads that don't request GPU are unaffected.
+apiVersion: kueue.x-k8s.io/v1beta2
+kind: ClusterQueue
+metadata:
+  name: e2e-stack-cluster-queue
+spec:
+  namespaceSelector: {}
+  queueingStrategy: BestEffortFIFO
+  resourceGroups:
+  - coveredResources: ["cpu", "memory", "nvidia.com/gpu"]
+    flavors:
+    - name: e2e-stack-flavor
+      resources:
+      - name: cpu
+        nominalQuota: 6
+      - name: memory
+        nominalQuota: 24Gi
+      - name: nvidia.com/gpu
+        nominalQuota: 2
+---
+# LocalQueue — RayJobs must carry label kueue.x-k8s.io/queue-name: e2e-stack-queue
+# to be managed by Kueue.
+apiVersion: kueue.x-k8s.io/v1beta2
+kind: LocalQueue
+metadata:
+  name: e2e-stack-queue
+  namespace: ray
+spec:
+  clusterQueue: e2e-stack-cluster-queue
diff --git a/examples/ray/setup.sh b/examples/ray/setup.sh
new file mode 100755
index 000000000..56e17ecdd
--- /dev/null
+++ b/examples/ray/setup.sh
@@ -0,0 +1,165 @@
+#!/bin/bash
+set -euo pipefail
+
+# --- Infrastructure Configuration ---
+RESOURCE_GROUP="${RESOURCE_GROUP:-ray-example-rg}"
+LOCATION="${LOCATION:-centralus}"
+CLUSTER_NAME="${CLUSTER_NAME:-demo}"
+NODE_COUNT="${NODE_COUNT:-3}"
+NODE_VM_SIZE="${NODE_VM_SIZE:-Standard_D4ds_v4}"
+KUBERNETES_VERSION="${KUBERNETES_VERSION:-1.35}"
+NODEPOOL_NAME="${NODEPOOL_NAME:-cpu-pool}"
+NODEPOOL_NODE_COUNT="${NODEPOOL_NODE_COUNT:-2}"
+NODEPOOL_VM_SIZE="${NODEPOOL_VM_SIZE:-Standard_D16ds_v7}"
+
+# --- Helm Charts Configuration ---
+HELM_REGISTRY="${HELM_REGISTRY:-oci://mcr.microsoft.com/aks/ai-runtime/helm}"
+KUEUE_VERSION="${KUEUE_VERSION:-0.17.1}"
+KUBERAY_OPERATOR_VERSION="${KUBERAY_OPERATOR_VERSION:-1.6.1}"
+
+# --- Inference CPU Configuration ---
+RAY_IMAGE="${RAY_IMAGE:-mcr.microsoft.com/aks/ai-runtime/ray:py3.12-ray2.54.0}"
+RAY_VERSION="${RAY_VERSION:-2.54.0}"
+INFERENCE_NAMESPACE="ray"
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+create_infra() {
+    echo "=== Creating resource group ==="
+    az group create \
+      --name "$RESOURCE_GROUP" \
+      --location "$LOCATION"
+
+    echo "=== Creating AKS cluster ==="
+    az aks create \
+      --resource-group "$RESOURCE_GROUP" \
+      --name "$CLUSTER_NAME" \
+      --location "$LOCATION" \
+      --node-count "$NODE_COUNT" \
+      --node-vm-size "$NODE_VM_SIZE" \
+      --kubernetes-version "$KUBERNETES_VERSION" \
+      --generate-ssh-keys \
+      --enable-managed-identity
+
+    echo "=== Adding cpu worker node pool ==="
+    az aks nodepool add \
+      --resource-group "$RESOURCE_GROUP" \
+      --cluster-name "$CLUSTER_NAME" \
+      --name "$NODEPOOL_NAME" \
+      --node-count "$NODEPOOL_NODE_COUNT" \
+      --node-vm-size "$NODEPOOL_VM_SIZE"
+
+    echo "=== Getting cluster credentials ==="
+    az aks get-credentials \
+      --resource-group "$RESOURCE_GROUP" \
+      --name "$CLUSTER_NAME" \
+      --overwrite-existing
+}
+
+install_operators() {
+    echo "=== Installing Kueue (v${KUEUE_VERSION}) ==="
+    helm upgrade --install kueue "$HELM_REGISTRY/kueue" \
+      --version "$KUEUE_VERSION" \
+      --namespace kueue-system \
+      --create-namespace \
+      --wait
+
+    echo "=== Installing KubeRay Operator (v${KUBERAY_OPERATOR_VERSION}) ==="
+    helm upgrade --install kuberay-operator "$HELM_REGISTRY/kuberay-operator" \
+      --version "$KUBERAY_OPERATOR_VERSION" \
+      --namespace kuberay-system \
+      --create-namespace \
+      --wait
+}
+
+status() {
+    echo "=== Cluster status ==="
+    kubectl get nodes
+    echo ""
+    kubectl get pods -n kueue-system
+    echo ""
+    kubectl get pods -n kuberay-system
+}
+
+run_inference_cpu() {
+    echo "=== Deploying Kueue resources ==="
+    kubectl apply -f "$SCRIPT_DIR/inference-cpu/stack-kueue-resources.yaml"
+
+    echo "=== Creating inference script ConfigMap ==="
+    kubectl create configmap cpu-inference-script \
+      --from-file=inference_job.py="$SCRIPT_DIR/inference-cpu/inference_job.py" \
+      --namespace "$INFERENCE_NAMESPACE" \
+      --dry-run=client -o yaml | kubectl apply -f -
+
+    echo "=== Submitting RayJob ==="
+    sed "s|{{RAY_IMAGE}}|${RAY_IMAGE}|g; s|{{RAY_VERSION}}|${RAY_VERSION}|g" \
+      "$SCRIPT_DIR/inference-cpu/inference-rayjob.yaml" | kubectl apply -f -
+
+    echo "=== Waiting for RayJob to complete ==="
+    kubectl wait --for=jsonpath='{.status.jobStatus}'=SUCCEEDED \
+      rayjob/cpu-inference \
+      --namespace "$INFERENCE_NAMESPACE" \
+      --timeout=600s
+
+    echo "=== Job logs ==="
+    kubectl logs -l job-name=cpu-inference --namespace "$INFERENCE_NAMESPACE" --tail=50
+}
+
+all() {
+    check_prerequisites
+    create_infra
+    install_operators
+    status
+}
+
+usage() {
+    echo "Usage: $0 <command>"
+    echo ""
+    echo "Commands:"
+    echo "  check_prerequisites  Check and install required tools (az, helm)"
+    echo "  create_infra       Create resource group, AKS cluster, node pool, and fetch credentials"
+    echo "  install_operators    Install Kueue and KubeRay from MCR"
+    echo "  run_inference_cpu    Deploy and run the CPU inference example"
+    echo "  status               Show cluster and operator pod status"
+    echo "  all                Run all steps end-to-end"
+    echo ""
+    echo "Examples:"
+    echo "  $0 install_operators   # Just install Kueue + KubeRay"
+    echo "  $0 run_inference_cpu   # Run CPU inference example"
+    echo "  $0 all                 # Full setup from scratch"
+}
+
+# --- Prerequisites ---
+install_az_cli() {
+    echo "Installing Azure CLI..."
+    curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash
+}
+
+install_helm() {
+    echo "Installing Helm..."
+    curl -fsSL https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
+}
+
+check_prerequisites() {
+    if ! command -v az &> /dev/null; then
+        echo "Azure CLI (az) not found. Installing..."
+        install_az_cli
+    fi
+
+    if ! command -v helm &> /dev/null; then
+        echo "Helm not found. Installing..."
+        install_helm
+    fi
+
+    echo "Prerequisites satisfied: az $(az version --query '"azure-cli"' -o tsv), helm $(helm version --short)"
+}
+
+COMMAND="${1:-}"
+case "$COMMAND" in
+    check_prerequisites|create_infra|install_operators|run_inference_cpu|status|all)
+        "$COMMAND"
+        ;;
+    *)
+        usage
+        exit 1
+        ;;
+esac
\ No newline at end of file