cobaltcore-dev · umswmayj · Mar 25, 2026 · Mar 24, 2026 · Mar 24, 2026 · Mar 24, 2026
@@ -36,6 +36,13 @@ const (
 	ReservationTypeLabelFailover          = "failover"
 )
 
+// Annotation keys for Reservation metadata.
+const (
+	// AnnotationCreatorRequestID tracks the request ID that created this reservation.
+	// Used for end-to-end traceability across API calls, controller reconciles, and scheduler invocations.
+	AnnotationCreatorRequestID = "reservations.cortex.cloud/creator-request-id"
+)
+
 // CommittedResourceAllocation represents a workload's assignment to a committed resource reservation slot.
 // The workload could be a VM (Nova/IronCore), Pod (Kubernetes), or other resource.
 type CommittedResourceAllocation struct {

@@ -340,7 +340,7 @@ func main() {
 		// Initialize commitments API for LIQUID interface (with Nova client for usage reporting)
 		commitmentsConfig := conf.GetConfigOrDie[commitments.Config]()
 		commitmentsAPI := commitments.NewAPIWithConfig(multiclusterClient, commitmentsConfig, novaClient)
-		commitmentsAPI.Init(mux, metrics.Registry)
+		commitmentsAPI.Init(mux, metrics.Registry, ctrl.Log.WithName("commitments-api"))
 
 		deschedulingsController := &nova.DetectorPipelineController{
 			Monitor: detectorPipelineMonitor,

@@ -0,0 +1,104 @@
+# Committed Resource Reservation System
+
+The committed resource reservation system manages capacity commitments, i.e. strict reservation guarantees usable by projects. 
+When customers pre-commit to resource usage, Cortex reserves capacity on hypervisors to guarantee availability.
+The system integrates with Limes (via the LIQUID protocol) to receive commitments, expose usage and capacity data, and provides acceptance/rejection feedback.
+
+## File Structure
+
+```text
+internal/scheduling/reservations/commitments/
+├── config.go                          # Configuration (intervals, API flags, secrets)
+├── controller.go                      # Reconciliation of reservations
+├── syncer.go                          # Periodic sync task with Limes, ensures local state matches Limes' commitments
+├── reservation_manager.go             # Reservation CRUD operations
+├── api.go                             # HTTP API initialization
+├── api_change_commitments.go          # Handle commitment changes from Limes and updates local reservations accordingly
+├── api_report_usage.go                # Report VM usage per project, accounting to commitments or PAYG
+├── api_report_capacity.go             # Report capacity per AZ
+├── api_info.go                        # Readiness endpoint with versioning (of underlying flavor group configuration)
+├── capacity.go                        # Capacity calculation from Hypervisor CRDs
+├── usage.go                           # VM-to-commitment assignment logic
+├── flavor_group_eligibility.go        # Validates VMs belong to correct flavor groups
+└── state.go                           # Commitment state helper functions
+```
+
+## Operations
+
+### Configuration
+
+| Helm Value | Description |
+|------------|-------------|
+| `committedResourceEnableChangeCommitmentsAPI` | Enable/disable the change-commitments endpoint |
+| `committedResourceEnableReportUsageAPI` | Enable/disable the usage reporting endpoint |
+| `committedResourceEnableReportCapacityAPI` | Enable/disable the capacity reporting endpoint |
+| `committedResourceRequeueIntervalActive` | How often to revalidate active reservations |
+| `committedResourceRequeueIntervalRetry` | Retry interval when knowledge not ready |
+| `committedResourceChangeAPIWatchReservationsTimeout` | Timeout waiting for reservations to become ready while processing commitment changes via API |
+| `committedResourcePipelineDefault` | Default scheduling pipeline |
+| `committedResourceFlavorGroupPipelines` | Map of flavor group to pipeline name |
+| `committedResourceSyncInterval` | How often the syncer reconciles Limes commitments to Reservation CRDs |
+
+Each API endpoint can be disabled independently. The periodic sync task can be disabled by removing it (`commitments-sync-task`) from the list of enabled tasks in the `cortex-nova` Helm chart.
+
+### Observability
+
+Alerts and metrics are defined in `helm/bundles/cortex-nova/alerts/nova.alerts.yaml`. Key metric prefixes:
+- `cortex_committed_resource_change_api_*` - Change API metrics
+- `cortex_committed_resource_usage_api_*` - Usage API metrics
+- `cortex_committed_resource_capacity_api_*` - Capacity API metrics
+
+## Architecture Overview
+
+```mermaid
+flowchart LR
+    subgraph State
+        Res[(Reservation CRDs)]
+    end
+
+    ChangeAPI[Change API]
+    UsageAPI[Usage API]
+    Syncer[Syncer Task]
+    Controller[Controller]
+    Scheduler[Scheduler API]
+
+    ChangeAPI -->|CRUD| Res
+    Syncer -->|CRUD| Res
+    UsageAPI -->|read| Res
+    Res -->|watch| Controller
+    Controller -->|update spec/status| Res
+    Controller -->|placement request| Scheduler
+```
+
+Reservations are managed through the Change API, Syncer Task, and Controller reconciliation. The Usage API provides read-only access to report usage data back to Limes.
+
+### Change-Commitments API
+
+The change-commitments API receives batched commitment changes from Limes. A request can contain multiple commitment changes across different projects and flavor groups. The semantic is **all-or-nothing**: if any commitment in the batch cannot be fulfilled (e.g., insufficient capacity), the entire request is rejected and rolled back.
+
+Cortex performs CRUD operations on local Reservation CRDs to match the new desired state:
+- Creates new reservations for increased commitment amounts
+- Deletes existing reservations
+- Cortex preserves existing reservations that already have VMs allocated when possible
+
+### Syncer Task
+
+The syncer task runs periodically and fetches all commitments from Limes. It syncs the local Reservation CRD state to match Limes' view of commitments.
+
+### Controller (Reconciliation)
+
+The controller watches Reservation CRDs and performs reconciliation:
+
+1. **For new reservations** (no target host assigned):
+   - Calls Cortex for scheduling to find a suitable host
+   - Assigns the target host and marks the reservation as Ready
+
+2. **For existing reservations** (already have a target host):
+   - Validates that allocated VMs are still on the expected host
+   - Updates allocations if VMs have migrated or been deleted
+   - Requeues for periodic revalidation
+
+### Usage API
+
+This API reports for a given project the total committed resources and usage per flavor group. For each VM, it reports whether the VM accounts to a specific commitment or PAYG. This assignment is deterministic and may differ from the actual Cortex internal assignment used for scheduling.
+
@@ -3,7 +3,7 @@ module github.com/cobaltcore-dev/cortex
 go 1.26
 
 require (
-	github.com/cobaltcore-dev/openstack-hypervisor-operator v1.0.1
+	github.com/cobaltcore-dev/openstack-hypervisor-operator v1.0.2-0.20260324155836-56b40c7ff846
 	github.com/go-gorp/gorp v2.2.0+incompatible
 	github.com/gophercloud/gophercloud/v2 v2.11.1
 	github.com/ironcore-dev/ironcore v0.2.4

@@ -22,6 +22,8 @@ github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UF
 github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
 github.com/cobaltcore-dev/openstack-hypervisor-operator v1.0.1 h1:wXolWfljyQQZbxNQ2pZVIw8wFz9BKiDIvLrECsqGDT8=
 github.com/cobaltcore-dev/openstack-hypervisor-operator v1.0.1/go.mod h1:b0KmJdxvRI8UXlGe8cRm5BD8Tm2WhF7zSKMSIRGyVL4=
+github.com/cobaltcore-dev/openstack-hypervisor-operator v1.0.2-0.20260324155836-56b40c7ff846 h1:Hg5+F1lOUpU9dZ8gVxeohodtYC4Z1fV/iqwYoF/RuNc=
+github.com/cobaltcore-dev/openstack-hypervisor-operator v1.0.2-0.20260324155836-56b40c7ff846/go.mod h1:j1SaxUTo0irugdC7aHuYDKEomIPZwCHoz+4kE8EBBGM=
 github.com/containerd/continuity v0.4.5 h1:ZRoN1sXq9u7V6QoHMcVWGhOwDFqZ4B9i5H6un1Wh0x4=
 github.com/containerd/continuity v0.4.5/go.mod h1:/lNJvtJKUQStBzpVQ1+rasXO1LAWtUQssk28EZvJ3nE=
 github.com/containerd/errdefs v1.0.0 h1:tg5yIfIlQIrxYtu9ajqY42W3lpS19XqdxRQeEwYG8PI=

@@ -5,7 +5,7 @@ apiVersion: v2
 name: cortex-cinder
 description: A Helm chart deploying Cortex for Cinder.
 type: application
-version: 0.0.43
+version: 0.0.44
 appVersion: 0.1.0
 dependencies:
   # from: file://../../library/cortex-postgres
@@ -16,12 +16,12 @@ dependencies:
   # from: file://../../library/cortex
   - name: cortex
     repository: oci://ghcr.io/cobaltcore-dev/cortex/charts
-    version: 0.0.30
+    version: 0.0.31
     alias: cortex-knowledge-controllers
   # from: file://../../library/cortex
   - name: cortex
     repository: oci://ghcr.io/cobaltcore-dev/cortex/charts
-    version: 0.0.30
+    version: 0.0.31
     alias: cortex-scheduling-controllers
 
   # Owner info adds a configmap to the kubernetes cluster with information on

@@ -5,13 +5,13 @@ apiVersion: v2
 name: cortex-crds
 description: A Helm chart deploying Cortex CRDs.
 type: application
-version: 0.0.43
+version: 0.0.44
 appVersion: 0.1.0
 dependencies:
   # from: file://../../library/cortex
   - name: cortex
     repository: oci://ghcr.io/cobaltcore-dev/cortex/charts
-    version: 0.0.30
+    version: 0.0.31
 
   # Owner info adds a configmap to the kubernetes cluster with information on
   # the service owner. This makes it easier to find out who to contact in case

@@ -5,13 +5,13 @@ apiVersion: v2
 name: cortex-ironcore
 description: A Helm chart deploying Cortex for IronCore.
 type: application
-version: 0.0.43
+version: 0.0.44
 appVersion: 0.1.0
 dependencies:
   # from: file://../../library/cortex
   - name: cortex
     repository: oci://ghcr.io/cobaltcore-dev/cortex/charts
-    version: 0.0.30
+    version: 0.0.31
 
   # Owner info adds a configmap to the kubernetes cluster with information on
   # the service owner. This makes it easier to find out who to contact in case

@@ -5,7 +5,7 @@ apiVersion: v2
 name: cortex-manila
 description: A Helm chart deploying Cortex for Manila.
 type: application
-version: 0.0.43
+version: 0.0.44
 appVersion: 0.1.0
 dependencies:
   # from: file://../../library/cortex-postgres
@@ -16,12 +16,12 @@ dependencies:
   # from: file://../../library/cortex
   - name: cortex
     repository: oci://ghcr.io/cobaltcore-dev/cortex/charts
-    version: 0.0.30
+    version: 0.0.31
     alias: cortex-knowledge-controllers
   # from: file://../../library/cortex
   - name: cortex
     repository: oci://ghcr.io/cobaltcore-dev/cortex/charts
-    version: 0.0.30
+    version: 0.0.31
     alias: cortex-scheduling-controllers
 
   # Owner info adds a configmap to the kubernetes cluster with information on

@@ -5,7 +5,7 @@ apiVersion: v2
 name: cortex-nova
 description: A Helm chart deploying Cortex for Nova.
 type: application
-version: 0.0.43
+version: 0.0.44
 appVersion: 0.1.0
 dependencies:
   # from: file://../../library/cortex-postgres
@@ -16,12 +16,12 @@ dependencies:
   # from: file://../../library/cortex
   - name: cortex
     repository: oci://ghcr.io/cobaltcore-dev/cortex/charts
-    version: 0.0.30
+    version: 0.0.31
     alias: cortex-knowledge-controllers
   # from: file://../../library/cortex
   - name: cortex
     repository: oci://ghcr.io/cobaltcore-dev/cortex/charts
-    version: 0.0.30
+    version: 0.0.31
     alias: cortex-scheduling-controllers
 
   # Owner info adds a configmap to the kubernetes cluster with information on

@@ -85,6 +85,12 @@ spec:
         hypervisor resource. Note that hosts allowing all projects are still
         accessible and will not be filtered out. In this way some hypervisors
         are made accessible to some projects only.
+    - name: filter_aggregate_metadata
+      description: |
+        This step filters hosts based on metadata defined in their aggregates. For
+        example, if an aggregate has the metadata "filter_tenant_id": "<project_id>",
+        only hosts in that aggregate that match the project ID in the nova request
+        will pass this filter.
     - name: filter_live_migratable
       description: |
         This step ensures that the target host of a live migration can accept
@@ -125,6 +131,13 @@ spec:
         into the smallest gaps possible, it spreads the load to ensure
         workloads are balanced across hosts. In this pipeline, the balancing will
         focus on general purpose virtual machines.
+    - name: kvm_failover_evacuation
+      description: |
+        This weigher prefers hosts with active failover reservations during
+        evacuation requests. Hosts matching a failover reservation where the
+        VM is allocated get a higher weight, encouraging placement on
+        pre-reserved failover capacity. For non-evacuation requests, this
+        weigher has no effect.
 ---
 apiVersion: cortex.cloud/v1alpha1
 kind: Pipeline
@@ -211,6 +224,12 @@ spec:
         hypervisor resource. Note that hosts allowing all projects are still
         accessible and will not be filtered out. In this way some hypervisors
         are made accessible to some projects only.
+    - name: filter_aggregate_metadata
+      description: |
+        This step filters hosts based on metadata defined in their aggregates. For
+        example, if an aggregate has the metadata "filter_tenant_id": "<project_id>",
+        only hosts in that aggregate that match the project ID in the nova request
+        will pass this filter.
     - name: filter_live_migratable
       description: |
         This step ensures that the target host of a live migration can accept
@@ -248,6 +267,13 @@ spec:
         It pulls the requested vm into the smallest gaps possible, to ensure
         other hosts with less allocation stay free for bigger vms.
         In this pipeline, the binpacking will focus on hana virtual machines.
+    - name: kvm_failover_evacuation
+      description: |
+        This weigher prefers hosts with active failover reservations during
+        evacuation requests. Hosts matching a failover reservation where the
+        VM is allocated get a higher weight, encouraging placement on
+        pre-reserved failover capacity. For non-evacuation requests, this
+        weigher has no effect.
 ---
 apiVersion: cortex.cloud/v1alpha1
 kind: Pipeline
@@ -523,5 +549,12 @@ spec:
         This step will filter out hosts that do not meet the compute capabilities
         requested by the nova flavor extra specs, like `{"arch": "x86_64",
         "maxphysaddr:bits": 46, ...}`.
-  weighers: []
+  weighers:
+    - name: kvm_failover_evacuation
+      description: |
+        This weigher prefers hosts with active failover reservations during
+        evacuation requests. Hosts matching a failover reservation where the
+        VM is allocated get a higher weight, encouraging placement on
+        pre-reserved failover capacity. For non-evacuation requests, this
+        weigher has no effect.
 {{- end }}
@@ -164,6 +164,12 @@ cortex-scheduling-controllers:
     # Whether the change-commitments API endpoint is active
     # When false, the endpoint returns HTTP 503. The info endpoint remains available.
     committedResourceEnableChangeCommitmentsAPI: true
+    # Whether the report-usage API endpoint is active
+    # When false, the endpoint returns HTTP 503.
+    committedResourceEnableReportUsageAPI: true
+    # Whether the report-capacity API endpoint is active
+    # When false, the endpoint returns HTTP 503.
+    committedResourceEnableReportCapacityAPI: true
     # OvercommitMappings is a list of mappings that map hypervisor traits to
     # overcommit ratios. Note that this list is applied in order, so if there
     # are multiple mappings applying to the same hypervisors, the last mapping
@@ -177,13 +183,13 @@ cortex-scheduling-controllers:
     # Maps flavor name patterns (glob) to required failover count
     # Example: {"hana_*": 2, "m1.xlarge": 1}
     flavorFailoverRequirements:
-      "*": 1
+      "*": 2
     # How often to check for missing failover reservations (periodic bulk reconciliation)
-    reconcileInterval: 15m
+    reconcileInterval: 5m
     # Used when maxVMsToProcess limits processing, allows faster catch-up and for the first reconcile
     shortReconcileInterval: 1m
     # Number of max VMs to process in one periodic reconciliation loop
-    maxVMsToProcess: 5
+    maxVMsToProcess: 25
     # Minimum successful reservations to use short interval
     minSuccessForShortInterval: 1
     # Maximum failures allowed to still use short interval

@@ -5,13 +5,13 @@ apiVersion: v2
 name: cortex-pods
 description: A Helm chart deploying Cortex for Pods.
 type: application
-version: 0.0.43
+version: 0.0.44
 appVersion: 0.1.0
 dependencies:
   # from: file://../../library/cortex
   - name: cortex
     repository: oci://ghcr.io/cobaltcore-dev/cortex/charts
-    version: 0.0.30
+    version: 0.0.31
 
   # Owner info adds a configmap to the kubernetes cluster with information on
   # the service owner. This makes it easier to find out who to contact in case

@@ -2,7 +2,7 @@ apiVersion: v2
 name: cortex
 description: A Helm chart to distribute cortex.
 type: application
-version: 0.0.30
-appVersion: "sha-ca02a516"
+version: 0.0.31
+appVersion: "sha-88946405"
 icon: "https://example.com/icon.png"
 dependencies: []