From 94072b70bbba4935631bb8ca548a92be556b61cc Mon Sep 17 00:00:00 2001 From: "ohotnikov.ivan" Date: Thu, 16 Apr 2026 18:30:09 +0300 Subject: [PATCH 1/5] feat(detector): add Azure provider ID detection via IMDS Implement DetectAzureProviderID function to query Azure Instance Metadata Service (IMDS) for the resource ID. This enables automatic providerID setup on Azure nodes, which is required for cluster-autoscaler to properly track scaled nodes. The detector gracefully handles non-Azure environments by returning empty string when IMDS is unavailable. Signed-off-by: ohotnikov.ivan --- pkg/detector/azure_provider.go | 92 ++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 pkg/detector/azure_provider.go diff --git a/pkg/detector/azure_provider.go b/pkg/detector/azure_provider.go new file mode 100644 index 0000000..b4b4a03 --- /dev/null +++ b/pkg/detector/azure_provider.go @@ -0,0 +1,92 @@ +/* +Copyright 2025 The local-ccm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package detector + +import ( + "context" + "fmt" + "io" + "net/http" + "strings" + "time" + + "k8s.io/klog/v2" +) + +const ( + // AzureIMDSURL is the Azure Instance Metadata Service endpoint + AzureIMDSURL = "http://169.254.169.254/metadata/instance/compute/resourceId?api-version=2021-02-01&format=text" + + // AzureIMDSTimeout is the timeout for IMDS requests + AzureIMDSTimeout = 2 * time.Second +) + +// DetectAzureProviderID detects Azure provider ID from Instance Metadata Service +// Returns empty string if not running in Azure environment +func DetectAzureProviderID(ctx context.Context) (string, error) { + klog.V(3).Info("Attempting to detect Azure provider ID from IMDS") + + // Create HTTP client with timeout + client := &http.Client{ + Timeout: AzureIMDSTimeout, + } + + // Create request with context + req, err := http.NewRequestWithContext(ctx, http.MethodGet, AzureIMDSURL, nil) + if err != nil { + return "", fmt.Errorf("failed to create IMDS request: %w", err) + } + + // Azure IMDS requires Metadata header + req.Header.Set("Metadata", "true") + + klog.V(4).Infof("Sending request to Azure IMDS: %s", AzureIMDSURL) + + // Execute request + resp, err := client.Do(req) + if err != nil { + // This is expected when not running in Azure + klog.V(3).Infof("Azure IMDS not available (likely not running in Azure): %v", err) + return "", nil + } + defer resp.Body.Close() + + // Check response status + if resp.StatusCode != http.StatusOK { + klog.V(2).Infof("Azure IMDS returned non-OK status: %d", resp.StatusCode) + return "", nil + } + + // Read response body + body, err := io.ReadAll(resp.Body) + if err != nil { + return "", fmt.Errorf("failed to read IMDS response: %w", err) + } + + resourceID := strings.TrimSpace(string(body)) + if resourceID == "" { + klog.V(2).Info("Azure IMDS returned empty resource ID") + return "", nil + } + + // Format as Kubernetes provider ID + providerID := fmt.Sprintf("azure://%s", resourceID) + + klog.V(2).Infof("Detected Azure provider ID: %s", providerID) + + return providerID, nil +} From f5e1563dee0b4075d4cba434a9a5b7b40a8644e0 Mon Sep 17 00:00:00 2001 From: "ohotnikov.ivan" Date: Thu, 16 Apr 2026 18:30:27 +0300 Subject: [PATCH 2/5] feat(node): add UpdateProviderID method to node updater Add method to set spec.providerID on nodes via JSON patch. The method includes idempotency check to avoid unnecessary updates and warns when overwriting existing providerID values. This enables cloud controllers to set provider-specific node identifiers required by components like cluster-autoscaler. Signed-off-by: ohotnikov.ivan --- pkg/node/updater.go | 54 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/pkg/node/updater.go b/pkg/node/updater.go index b296f62..8d10017 100644 --- a/pkg/node/updater.go +++ b/pkg/node/updater.go @@ -147,3 +147,57 @@ func (u *Updater) RemoveTaint(ctx context.Context) error { func (u *Updater) GetNode(ctx context.Context) (*v1.Node, error) { return u.client.CoreV1().Nodes().Get(ctx, u.nodeName, metav1.GetOptions{}) } + +// UpdateProviderID updates the node's spec.providerID +func (u *Updater) UpdateProviderID(ctx context.Context, providerID string) error { + klog.V(2).Infof("Updating providerID for node %s: %s", u.nodeName, providerID) + + // Get current node to check if update is needed + node, err := u.GetNode(ctx) + if err != nil { + return fmt.Errorf("failed to get node: %w", err) + } + + // Check if providerID is already set correctly + if node.Spec.ProviderID == providerID { + klog.V(3).Infof("ProviderID already set correctly on node %s, skipping update", u.nodeName) + return nil + } + + // Log if we're overwriting an existing providerID + if node.Spec.ProviderID != "" { + klog.Warningf("Overwriting existing providerID on node %s: %s -> %s", + u.nodeName, node.Spec.ProviderID, providerID) + } + + // Create JSON patch for providerID + patch := []map[string]interface{}{ + { + "op": "replace", + "path": "/spec/providerID", + "value": providerID, + }, + } + + patchBytes, err := json.Marshal(patch) + if err != nil { + return fmt.Errorf("failed to marshal patch: %w", err) + } + + klog.V(4).Infof("Applying providerID patch to node %s: %s", u.nodeName, string(patchBytes)) + + // Apply patch + _, err = u.client.CoreV1().Nodes().Patch( + ctx, + u.nodeName, + types.JSONPatchType, + patchBytes, + metav1.PatchOptions{}, + ) + if err != nil { + return fmt.Errorf("failed to patch node providerID: %w", err) + } + + klog.Infof("Successfully updated providerID for node %s to %s", u.nodeName, providerID) + return nil +} From b4c30ff472e219c0c41dd210419b0f59c767059a Mon Sep 17 00:00:00 2001 From: "ohotnikov.ivan" Date: Thu, 16 Apr 2026 18:30:53 +0300 Subject: [PATCH 3/5] feat(main): integrate Azure provider ID detection into reconcile loop Add --enable-azure-provider-id flag to enable automatic provider ID detection and setup on Azure nodes. When enabled, the controller will: - Query Azure IMDS for the resource ID - Set spec.providerID if detected - Gracefully skip if not running in Azure This resolves cluster-autoscaler issues where nodes lack providerID and become orphaned from their cloud VM instances. Signed-off-by: ohotnikov.ivan --- cmd/local-ccm/main.go | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/cmd/local-ccm/main.go b/cmd/local-ccm/main.go index fae702d..cc0145c 100644 --- a/cmd/local-ccm/main.go +++ b/cmd/local-ccm/main.go @@ -34,13 +34,14 @@ import ( ) var ( - nodeName string - kubeconfig string - internalIPTarget string - externalIPTarget string - runOnce bool - removeTaint bool - reconcileInterval time.Duration + nodeName string + kubeconfig string + internalIPTarget string + externalIPTarget string + runOnce bool + removeTaint bool + reconcileInterval time.Duration + enableAzureProviderID bool ) func init() { @@ -51,6 +52,7 @@ func init() { flag.BoolVar(&runOnce, "run-once", false, "Run once and exit instead of running in a loop") flag.BoolVar(&removeTaint, "remove-taint", true, "Remove node.cloudprovider.kubernetes.io/uninitialized taint") flag.DurationVar(&reconcileInterval, "reconcile-interval", 10*time.Second, "Interval between reconciliation loops") + flag.BoolVar(&enableAzureProviderID, "enable-azure-provider-id", false, "Enable Azure provider ID detection via IMDS") klog.InitFlags(nil) } @@ -63,8 +65,8 @@ func main() { } klog.Infof("Starting local-ccm for node %s", nodeName) - klog.V(2).Infof("Configuration: internalIPTarget=%q externalIPTarget=%q", - internalIPTarget, externalIPTarget) + klog.V(2).Infof("Configuration: internalIPTarget=%q externalIPTarget=%q azureProviderID=%v", + internalIPTarget, externalIPTarget, enableAzureProviderID) // Create Kubernetes client k8sClient, err := createKubernetesClient(kubeconfig) @@ -158,6 +160,21 @@ func reconcile(ctx context.Context, nodeUpdater *node.Updater) error { } } + // Set Azure provider ID if enabled + if enableAzureProviderID { + providerID, err := detector.DetectAzureProviderID(ctx) + if err != nil { + return fmt.Errorf("failed to detect Azure provider ID: %w", err) + } + if providerID != "" { + if err := nodeUpdater.UpdateProviderID(ctx, providerID); err != nil { + return fmt.Errorf("failed to update provider ID: %w", err) + } + } else { + klog.V(3).Info("Azure provider ID not detected (not running in Azure)") + } + } + // Remove taint if requested if removeTaint { if err := nodeUpdater.RemoveTaint(ctx); err != nil { From d5f072e6fadc164e6eb2b1401a9ba8657f1e416a Mon Sep 17 00:00:00 2001 From: "ohotnikov.ivan" Date: Thu, 16 Apr 2026 18:31:36 +0300 Subject: [PATCH 4/5] feat(helm): add Azure provider ID configuration to Helm chart Add azure.enableProviderID parameter to values.yaml to control Azure provider ID detection. When enabled, the DaemonSet will pass --enable-azure-provider-id flag to local-ccm controller. Update chart README with Azure configuration example and document the new parameter in the configuration table. Signed-off-by: ohotnikov.ivan --- charts/local-ccm/README.md | 13 +++++++++++++ charts/local-ccm/templates/daemonset.yaml | 3 +++ charts/local-ccm/values.yaml | 6 ++++++ 3 files changed, 22 insertions(+) diff --git a/charts/local-ccm/README.md b/charts/local-ccm/README.md index 7b9187d..bfbd572 100644 --- a/charts/local-ccm/README.md +++ b/charts/local-ccm/README.md @@ -7,6 +7,7 @@ Local Cloud Controller Manager for Kubernetes - automatically detects and manage - Automatic node IP address detection using routing table - Support for both internal and external IP detection - Automatic removal of cloud provider initialization taint +- Azure provider ID detection for cluster-autoscaler integration - Minimal resource footprint - Runs as DaemonSet on all nodes @@ -50,6 +51,17 @@ helm install local-ccm ./charts/local-ccm \ --set ipDetection.internalIPTarget=10.0.0.1 ``` +### Azure Configuration + +For Azure environments with cluster-autoscaler: + +```yaml +azure: + enableProviderID: true +``` + +This enables automatic detection and setting of `spec.providerID` on Azure nodes, which is required for cluster-autoscaler to properly track and manage nodes. + ## Configuration | Parameter | Description | Default | @@ -64,6 +76,7 @@ helm install local-ccm ./charts/local-ccm \ | `controller.removeTaint` | Remove uninitialized taint | `true` | | `controller.reconcileInterval` | Reconciliation interval | `10s` | | `controller.verbosity` | Log verbosity level (0-5) | `2` | +| `azure.enableProviderID` | Enable Azure provider ID detection via IMDS | `false` | | `resources.requests.cpu` | CPU resource requests | `10m` | | `resources.requests.memory` | Memory resource requests | `32Mi` | | `resources.limits.cpu` | CPU resource limits | `100m` | diff --git a/charts/local-ccm/templates/daemonset.yaml b/charts/local-ccm/templates/daemonset.yaml index c45afc6..4698805 100644 --- a/charts/local-ccm/templates/daemonset.yaml +++ b/charts/local-ccm/templates/daemonset.yaml @@ -43,6 +43,9 @@ spec: - --remove-taint={{ .Values.controller.removeTaint }} - --reconcile-interval={{ .Values.controller.reconcileInterval }} - --v={{ .Values.controller.verbosity }} + {{- if .Values.azure.enableProviderID }} + - --enable-azure-provider-id=true + {{- end }} env: - name: NODE_NAME valueFrom: diff --git a/charts/local-ccm/values.yaml b/charts/local-ccm/values.yaml index d84bac3..b8a7e0f 100644 --- a/charts/local-ccm/values.yaml +++ b/charts/local-ccm/values.yaml @@ -23,6 +23,12 @@ controller: reconcileInterval: 10s # Verbosity level (0-5) verbosity: 2 +# Azure-specific configuration +azure: + # Enable Azure provider ID detection via Instance Metadata Service (IMDS) + # When enabled, the controller will automatically detect and set spec.providerID + # on Azure nodes, which is required for cluster-autoscaler to properly track nodes + enableProviderID: false # Pod resources resources: requests: From 4d669d0218c58e6cbc36bdd5aeefaae9dfb1e8ce Mon Sep 17 00:00:00 2001 From: "ohotnikov.ivan" Date: Thu, 16 Apr 2026 18:32:06 +0300 Subject: [PATCH 5/5] docs: document Azure provider ID support Update README with Azure provider ID feature in features list, configuration table, and example configurations. Add explanation of how it integrates with cluster-autoscaler on Azure. Signed-off-by: ohotnikov.ivan --- README.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/README.md b/README.md index fde7b09..3f4e7db 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,7 @@ A lightweight Kubernetes cloud controller for bare-metal and on-premise clusters - **Configurable Targets**: Separate configuration for internal and external IP detection - **Non-Destructive Updates**: Preserves existing addresses (Hostname, InternalIP from kubelet), updates only managed fields - **Taint Removal**: Automatically removes `node.cloudprovider.kubernetes.io/uninitialized` taint +- **Azure Provider ID Support**: Automatic detection and setup of `spec.providerID` for Azure nodes via IMDS **Node Lifecycle Controller:** - **Autoscaler Integration**: Watches nodes tainted with `ToBeDeletedByClusterAutoscaler:NoSchedule` by default @@ -179,6 +180,7 @@ The following command-line flags are available: | `--remove-taint` | Remove node.cloudprovider.kubernetes.io/uninitialized taint | `true` | No | | `--reconcile-interval` | Interval between reconciliation loops | `10s` | No | | `--run-once` | Run once and exit instead of running in a loop | `false` | No | +| `--enable-azure-provider-id` | Enable Azure provider ID detection via IMDS | `false` | No | | `--kubeconfig` | Path to kubeconfig file (for local testing only) | In-cluster config | No | | `--v` | Log level (0-5) | `0` | No | @@ -224,6 +226,18 @@ Result: } ``` +#### Azure Provider ID (for cluster-autoscaler) + +```yaml +args: +- --node-name=$(NODE_NAME) +- --external-ip-target=8.8.8.8 +- --enable-azure-provider-id=true +- --reconcile-interval=10s +``` + +This configuration enables automatic detection and setup of `spec.providerID` on Azure nodes. The controller queries Azure Instance Metadata Service (IMDS) to retrieve the resource ID and sets it as the node's provider ID. This is required for Azure cluster-autoscaler to properly track and manage nodes. + After updating the DaemonSet args, restart the pods: ```bash