diff --git a/README.md b/README.md index fde7b09..3f4e7db 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,7 @@ A lightweight Kubernetes cloud controller for bare-metal and on-premise clusters - **Configurable Targets**: Separate configuration for internal and external IP detection - **Non-Destructive Updates**: Preserves existing addresses (Hostname, InternalIP from kubelet), updates only managed fields - **Taint Removal**: Automatically removes `node.cloudprovider.kubernetes.io/uninitialized` taint +- **Azure Provider ID Support**: Automatic detection and setup of `spec.providerID` for Azure nodes via IMDS **Node Lifecycle Controller:** - **Autoscaler Integration**: Watches nodes tainted with `ToBeDeletedByClusterAutoscaler:NoSchedule` by default @@ -179,6 +180,7 @@ The following command-line flags are available: | `--remove-taint` | Remove node.cloudprovider.kubernetes.io/uninitialized taint | `true` | No | | `--reconcile-interval` | Interval between reconciliation loops | `10s` | No | | `--run-once` | Run once and exit instead of running in a loop | `false` | No | +| `--enable-azure-provider-id` | Enable Azure provider ID detection via IMDS | `false` | No | | `--kubeconfig` | Path to kubeconfig file (for local testing only) | In-cluster config | No | | `--v` | Log level (0-5) | `0` | No | @@ -224,6 +226,18 @@ Result: } ``` +#### Azure Provider ID (for cluster-autoscaler) + +```yaml +args: +- --node-name=$(NODE_NAME) +- --external-ip-target=8.8.8.8 +- --enable-azure-provider-id=true +- --reconcile-interval=10s +``` + +This configuration enables automatic detection and setup of `spec.providerID` on Azure nodes. The controller queries Azure Instance Metadata Service (IMDS) to retrieve the resource ID and sets it as the node's provider ID. This is required for Azure cluster-autoscaler to properly track and manage nodes. + After updating the DaemonSet args, restart the pods: ```bash diff --git a/charts/local-ccm/README.md b/charts/local-ccm/README.md index 7b9187d..bfbd572 100644 --- a/charts/local-ccm/README.md +++ b/charts/local-ccm/README.md @@ -7,6 +7,7 @@ Local Cloud Controller Manager for Kubernetes - automatically detects and manage - Automatic node IP address detection using routing table - Support for both internal and external IP detection - Automatic removal of cloud provider initialization taint +- Azure provider ID detection for cluster-autoscaler integration - Minimal resource footprint - Runs as DaemonSet on all nodes @@ -50,6 +51,17 @@ helm install local-ccm ./charts/local-ccm \ --set ipDetection.internalIPTarget=10.0.0.1 ``` +### Azure Configuration + +For Azure environments with cluster-autoscaler: + +```yaml +azure: + enableProviderID: true +``` + +This enables automatic detection and setting of `spec.providerID` on Azure nodes, which is required for cluster-autoscaler to properly track and manage nodes. + ## Configuration | Parameter | Description | Default | @@ -64,6 +76,7 @@ helm install local-ccm ./charts/local-ccm \ | `controller.removeTaint` | Remove uninitialized taint | `true` | | `controller.reconcileInterval` | Reconciliation interval | `10s` | | `controller.verbosity` | Log verbosity level (0-5) | `2` | +| `azure.enableProviderID` | Enable Azure provider ID detection via IMDS | `false` | | `resources.requests.cpu` | CPU resource requests | `10m` | | `resources.requests.memory` | Memory resource requests | `32Mi` | | `resources.limits.cpu` | CPU resource limits | `100m` | diff --git a/charts/local-ccm/templates/daemonset.yaml b/charts/local-ccm/templates/daemonset.yaml index c45afc6..4698805 100644 --- a/charts/local-ccm/templates/daemonset.yaml +++ b/charts/local-ccm/templates/daemonset.yaml @@ -43,6 +43,9 @@ spec: - --remove-taint={{ .Values.controller.removeTaint }} - --reconcile-interval={{ .Values.controller.reconcileInterval }} - --v={{ .Values.controller.verbosity }} + {{- if .Values.azure.enableProviderID }} + - --enable-azure-provider-id=true + {{- end }} env: - name: NODE_NAME valueFrom: diff --git a/charts/local-ccm/values.yaml b/charts/local-ccm/values.yaml index d84bac3..b8a7e0f 100644 --- a/charts/local-ccm/values.yaml +++ b/charts/local-ccm/values.yaml @@ -23,6 +23,12 @@ controller: reconcileInterval: 10s # Verbosity level (0-5) verbosity: 2 +# Azure-specific configuration +azure: + # Enable Azure provider ID detection via Instance Metadata Service (IMDS) + # When enabled, the controller will automatically detect and set spec.providerID + # on Azure nodes, which is required for cluster-autoscaler to properly track nodes + enableProviderID: false # Pod resources resources: requests: diff --git a/cmd/local-ccm/main.go b/cmd/local-ccm/main.go index fae702d..cc0145c 100644 --- a/cmd/local-ccm/main.go +++ b/cmd/local-ccm/main.go @@ -34,13 +34,14 @@ import ( ) var ( - nodeName string - kubeconfig string - internalIPTarget string - externalIPTarget string - runOnce bool - removeTaint bool - reconcileInterval time.Duration + nodeName string + kubeconfig string + internalIPTarget string + externalIPTarget string + runOnce bool + removeTaint bool + reconcileInterval time.Duration + enableAzureProviderID bool ) func init() { @@ -51,6 +52,7 @@ func init() { flag.BoolVar(&runOnce, "run-once", false, "Run once and exit instead of running in a loop") flag.BoolVar(&removeTaint, "remove-taint", true, "Remove node.cloudprovider.kubernetes.io/uninitialized taint") flag.DurationVar(&reconcileInterval, "reconcile-interval", 10*time.Second, "Interval between reconciliation loops") + flag.BoolVar(&enableAzureProviderID, "enable-azure-provider-id", false, "Enable Azure provider ID detection via IMDS") klog.InitFlags(nil) } @@ -63,8 +65,8 @@ func main() { } klog.Infof("Starting local-ccm for node %s", nodeName) - klog.V(2).Infof("Configuration: internalIPTarget=%q externalIPTarget=%q", - internalIPTarget, externalIPTarget) + klog.V(2).Infof("Configuration: internalIPTarget=%q externalIPTarget=%q azureProviderID=%v", + internalIPTarget, externalIPTarget, enableAzureProviderID) // Create Kubernetes client k8sClient, err := createKubernetesClient(kubeconfig) @@ -158,6 +160,21 @@ func reconcile(ctx context.Context, nodeUpdater *node.Updater) error { } } + // Set Azure provider ID if enabled + if enableAzureProviderID { + providerID, err := detector.DetectAzureProviderID(ctx) + if err != nil { + return fmt.Errorf("failed to detect Azure provider ID: %w", err) + } + if providerID != "" { + if err := nodeUpdater.UpdateProviderID(ctx, providerID); err != nil { + return fmt.Errorf("failed to update provider ID: %w", err) + } + } else { + klog.V(3).Info("Azure provider ID not detected (not running in Azure)") + } + } + // Remove taint if requested if removeTaint { if err := nodeUpdater.RemoveTaint(ctx); err != nil { diff --git a/pkg/detector/azure_provider.go b/pkg/detector/azure_provider.go new file mode 100644 index 0000000..b4b4a03 --- /dev/null +++ b/pkg/detector/azure_provider.go @@ -0,0 +1,92 @@ +/* +Copyright 2025 The local-ccm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package detector + +import ( + "context" + "fmt" + "io" + "net/http" + "strings" + "time" + + "k8s.io/klog/v2" +) + +const ( + // AzureIMDSURL is the Azure Instance Metadata Service endpoint + AzureIMDSURL = "http://169.254.169.254/metadata/instance/compute/resourceId?api-version=2021-02-01&format=text" + + // AzureIMDSTimeout is the timeout for IMDS requests + AzureIMDSTimeout = 2 * time.Second +) + +// DetectAzureProviderID detects Azure provider ID from Instance Metadata Service +// Returns empty string if not running in Azure environment +func DetectAzureProviderID(ctx context.Context) (string, error) { + klog.V(3).Info("Attempting to detect Azure provider ID from IMDS") + + // Create HTTP client with timeout + client := &http.Client{ + Timeout: AzureIMDSTimeout, + } + + // Create request with context + req, err := http.NewRequestWithContext(ctx, http.MethodGet, AzureIMDSURL, nil) + if err != nil { + return "", fmt.Errorf("failed to create IMDS request: %w", err) + } + + // Azure IMDS requires Metadata header + req.Header.Set("Metadata", "true") + + klog.V(4).Infof("Sending request to Azure IMDS: %s", AzureIMDSURL) + + // Execute request + resp, err := client.Do(req) + if err != nil { + // This is expected when not running in Azure + klog.V(3).Infof("Azure IMDS not available (likely not running in Azure): %v", err) + return "", nil + } + defer resp.Body.Close() + + // Check response status + if resp.StatusCode != http.StatusOK { + klog.V(2).Infof("Azure IMDS returned non-OK status: %d", resp.StatusCode) + return "", nil + } + + // Read response body + body, err := io.ReadAll(resp.Body) + if err != nil { + return "", fmt.Errorf("failed to read IMDS response: %w", err) + } + + resourceID := strings.TrimSpace(string(body)) + if resourceID == "" { + klog.V(2).Info("Azure IMDS returned empty resource ID") + return "", nil + } + + // Format as Kubernetes provider ID + providerID := fmt.Sprintf("azure://%s", resourceID) + + klog.V(2).Infof("Detected Azure provider ID: %s", providerID) + + return providerID, nil +} diff --git a/pkg/node/updater.go b/pkg/node/updater.go index b296f62..8d10017 100644 --- a/pkg/node/updater.go +++ b/pkg/node/updater.go @@ -147,3 +147,57 @@ func (u *Updater) RemoveTaint(ctx context.Context) error { func (u *Updater) GetNode(ctx context.Context) (*v1.Node, error) { return u.client.CoreV1().Nodes().Get(ctx, u.nodeName, metav1.GetOptions{}) } + +// UpdateProviderID updates the node's spec.providerID +func (u *Updater) UpdateProviderID(ctx context.Context, providerID string) error { + klog.V(2).Infof("Updating providerID for node %s: %s", u.nodeName, providerID) + + // Get current node to check if update is needed + node, err := u.GetNode(ctx) + if err != nil { + return fmt.Errorf("failed to get node: %w", err) + } + + // Check if providerID is already set correctly + if node.Spec.ProviderID == providerID { + klog.V(3).Infof("ProviderID already set correctly on node %s, skipping update", u.nodeName) + return nil + } + + // Log if we're overwriting an existing providerID + if node.Spec.ProviderID != "" { + klog.Warningf("Overwriting existing providerID on node %s: %s -> %s", + u.nodeName, node.Spec.ProviderID, providerID) + } + + // Create JSON patch for providerID + patch := []map[string]interface{}{ + { + "op": "replace", + "path": "/spec/providerID", + "value": providerID, + }, + } + + patchBytes, err := json.Marshal(patch) + if err != nil { + return fmt.Errorf("failed to marshal patch: %w", err) + } + + klog.V(4).Infof("Applying providerID patch to node %s: %s", u.nodeName, string(patchBytes)) + + // Apply patch + _, err = u.client.CoreV1().Nodes().Patch( + ctx, + u.nodeName, + types.JSONPatchType, + patchBytes, + metav1.PatchOptions{}, + ) + if err != nil { + return fmt.Errorf("failed to patch node providerID: %w", err) + } + + klog.Infof("Successfully updated providerID for node %s to %s", u.nodeName, providerID) + return nil +}