1515 */
1616package io .javaoperatorsdk .operator .monitoring .micrometer ;
1717
18+ import java .time .Duration ;
1819import java .util .*;
1920import java .util .concurrent .ConcurrentHashMap ;
2021import java .util .concurrent .atomic .AtomicInteger ;
4041public class MicrometerMetricsV2 implements Metrics {
4142
4243 private static final String CONTROLLER_NAME = "controller.name" ;
44+ private static final String NAMESPACE = "namespace" ;
4345 private static final String EVENT = "event" ;
4446 private static final String ACTION = "action" ;
4547 private static final String EVENTS_RECEIVED = "events.received" ;
@@ -59,13 +61,6 @@ public class MicrometerMetricsV2 implements Metrics {
5961 RECONCILIATIONS + "retries" + TOTAL_SUFFIX ;
6062 private static final String RECONCILIATIONS_STARTED = RECONCILIATIONS + "started" + TOTAL_SUFFIX ;
6163
62- private static final String CONTROLLERS = "controllers." ;
63-
64- private static final String CONTROLLERS_SUCCESSFUL_EXECUTION =
65- CONTROLLERS + SUCCESS_SUFFIX + TOTAL_SUFFIX ;
66- private static final String CONTROLLERS_FAILED_EXECUTION =
67- CONTROLLERS + FAILURE_SUFFIX + TOTAL_SUFFIX ;
68-
6964 private static final String RECONCILIATIONS_EXECUTIONS_GAUGE = RECONCILIATIONS + "executions" ;
7065 private static final String RECONCILIATIONS_QUEUE_SIZE_GAUGE = RECONCILIATIONS + "active" ;
7166 private static final String NUMBER_OF_RESOURCE_GAUGE = "custom_resources" ;
@@ -77,6 +72,7 @@ public class MicrometerMetricsV2 implements Metrics {
7772 private final Map <String , AtomicInteger > gauges = new ConcurrentHashMap <>();
7873 private final Map <String , Timer > executionTimers = new ConcurrentHashMap <>();
7974 private final Function <Timer .Builder , Timer .Builder > timerConfig ;
75+ private final boolean includeNamespaceTag ;
8076
8177 /**
8278 * Creates a new builder to configure how the eventual MicrometerMetricsV2 instance will behave,
@@ -98,15 +94,34 @@ public static MicrometerMetricsV2Builder newPerResourceCollectingMicrometerMetri
9894 * @param timerConfig optional configuration for timers, defaults to publishing percentiles 0.5,
9995 * 0.95, 0.99 and histogram
10096 */
101- private MicrometerMetricsV2 (MeterRegistry registry , Consumer <Timer .Builder > timerConfig ) {
97+ private MicrometerMetricsV2 (
98+ MeterRegistry registry , Consumer <Timer .Builder > timerConfig , boolean includeNamespaceTag ) {
10299 this .registry = registry ;
100+ this .includeNamespaceTag = includeNamespaceTag ;
103101 this .timerConfig =
104102 timerConfig != null
105103 ? builder -> {
106104 timerConfig .accept (builder );
107105 return builder ;
108106 }
109- : Timer .Builder ::publishPercentileHistogram ;
107+ // Use explicit SLO buckets rather than publishPercentileHistogram(). When using
108+ // OtlpMeterRegistry (Micrometer 1.12+), publishPercentileHistogram() sends Base2
109+ // Exponential Histograms over OTLP, which the OTel collector exposes as Prometheus
110+ // native histograms — incompatible with histogram_quantile() and classic _bucket
111+ // queries. Explicit SLO boundaries force EXPLICIT_BUCKET_HISTOGRAM format, which the
112+ // collector reliably exposes as _bucket metrics.
113+ : builder ->
114+ builder .serviceLevelObjectives (
115+ Duration .ofMillis (10 ),
116+ Duration .ofMillis (50 ),
117+ Duration .ofMillis (100 ),
118+ Duration .ofMillis (250 ),
119+ Duration .ofMillis (500 ),
120+ Duration .ofSeconds (1 ),
121+ Duration .ofSeconds (2 ),
122+ Duration .ofSeconds (5 ),
123+ Duration .ofSeconds (10 ),
124+ Duration .ofSeconds (30 ));
110125 }
111126
112127 @ Override
@@ -138,29 +153,18 @@ private String numberOfResourcesRefName(String name) {
138153 return NUMBER_OF_RESOURCE_GAUGE + name ;
139154 }
140155
141- // todo make the implementation more extensible, like easily add tags for namespace into metrics
142- // todo does it make sense to have both controller and reconciler execution counters?
143156 @ Override
144157 public <T > T timeControllerExecution (ControllerExecution <T > execution ) {
145158 final var name = execution .controllerName ();
146-
147159 final var timer = executionTimers .get (name );
148- try {
149- final var result =
150- timer .record (
151- () -> {
152- try {
153- return execution .execute ();
154- } catch (Exception e ) {
155- throw new OperatorException (e );
156- }
157- });
158- registry .counter (CONTROLLERS_SUCCESSFUL_EXECUTION , CONTROLLER_NAME , name ).increment ();
159- return result ;
160- } catch (Exception e ) {
161- registry .counter (CONTROLLERS_FAILED_EXECUTION , CONTROLLER_NAME , name ).increment ();
162- throw e ;
163- }
160+ return timer .record (
161+ () -> {
162+ try {
163+ return execution .execute ();
164+ } catch (Exception e ) {
165+ throw new OperatorException (e );
166+ }
167+ });
164168 }
165169
166170 @ Override
@@ -172,14 +176,17 @@ public void receivedEvent(Event event, Map<String, Object> metadata) {
172176 if (resourceEvent .getAction () == ResourceAction .DELETED ) {
173177 gauges .get (numberOfResourcesRefName (getControllerName (metadata ))).decrementAndGet ();
174178 }
179+ var namespace = resourceEvent .getRelatedCustomResourceID ().getNamespace ().orElse (null );
175180 incrementCounter (
176181 EVENTS_RECEIVED ,
182+ namespace ,
177183 metadata ,
178184 Tag .of (EVENT , event .getClass ().getSimpleName ()),
179185 Tag .of (ACTION , resourceEvent .getAction ().toString ()));
180186 } else {
181187 incrementCounter (
182188 EVENTS_RECEIVED ,
189+ null ,
183190 metadata ,
184191 Tag .of (EVENT , event .getClass ().getSimpleName ()),
185192 Tag .of (ACTION , UNKNOWN_ACTION ));
@@ -188,20 +195,20 @@ public void receivedEvent(Event event, Map<String, Object> metadata) {
188195
189196 @ Override
190197 public void cleanupDoneFor (ResourceID resourceID , Map <String , Object > metadata ) {
191- incrementCounter (EVENTS_DELETE , metadata );
198+ incrementCounter (EVENTS_DELETE , resourceID . getNamespace (). orElse ( null ), metadata );
192199 }
193200
194201 @ Override
195202 public void submittedForReconciliation (
196203 HasMetadata resource , RetryInfo retryInfoNullable , Map <String , Object > metadata ) {
197204 Optional <RetryInfo > retryInfo = Optional .ofNullable (retryInfoNullable );
198205
199- // Record the counter without retry tags
200- incrementCounter (RECONCILIATIONS_STARTED , metadata );
206+ var namespace = resource . getMetadata (). getNamespace ();
207+ incrementCounter (RECONCILIATIONS_STARTED , namespace , metadata );
201208
202209 int retryNumber = retryInfo .map (RetryInfo ::getAttemptCount ).orElse (0 );
203210 if (retryNumber > 0 ) {
204- incrementCounter (RECONCILIATIONS_RETRIES_NUMBER , metadata );
211+ incrementCounter (RECONCILIATIONS_RETRIES_NUMBER , namespace , metadata );
205212 }
206213
207214 var controllerQueueSize =
@@ -212,7 +219,7 @@ public void submittedForReconciliation(
212219 @ Override
213220 public void successfullyFinishedReconciliation (
214221 HasMetadata resource , Map <String , Object > metadata ) {
215- incrementCounter (RECONCILIATIONS_SUCCESS , metadata );
222+ incrementCounter (RECONCILIATIONS_SUCCESS , resource . getMetadata (). getNamespace (), metadata );
216223 }
217224
218225 @ Override
@@ -237,7 +244,7 @@ public void reconciliationExecutionFinished(
237244 @ Override
238245 public void failedReconciliation (
239246 HasMetadata resource , RetryInfo retry , Exception exception , Map <String , Object > metadata ) {
240- incrementCounter (RECONCILIATIONS_FAILED , metadata );
247+ incrementCounter (RECONCILIATIONS_FAILED , resource . getMetadata (). getNamespace (), metadata );
241248 }
242249
243250 private static void addTag (String name , String value , List <Tag > tags ) {
@@ -252,11 +259,17 @@ private static void addControllerNameTag(String name, List<Tag> tags) {
252259 addTag (CONTROLLER_NAME , name , tags );
253260 }
254261
255- private void incrementCounter (
256- String counterName , Map <String , Object > metadata , Tag ... additionalTags ) {
262+ private void addNamespaceTag (String namespace , List <Tag > tags ) {
263+ if (includeNamespaceTag && namespace != null && !namespace .isBlank ()) {
264+ tags .add (Tag .of (NAMESPACE , namespace ));
265+ }
266+ }
257267
258- final var tags = new ArrayList <Tag >(1 + additionalTags .length );
268+ private void incrementCounter (
269+ String counterName , String namespace , Map <String , Object > metadata , Tag ... additionalTags ) {
270+ final var tags = new ArrayList <Tag >(2 + additionalTags .length );
259271 addControllerNameTag (metadata , tags );
272+ addNamespaceTag (namespace , tags );
260273 if (additionalTags .length > 0 ) {
261274 tags .addAll (List .of (additionalTags ));
262275 }
@@ -278,6 +291,7 @@ public static String getControllerName(Map<String, Object> metadata) {
278291 public static class MicrometerMetricsV2Builder {
279292 protected final MeterRegistry registry ;
280293 protected Consumer <Timer .Builder > executionTimerConfig = null ;
294+ protected boolean includeNamespaceTag = false ;
281295
282296 public MicrometerMetricsV2Builder (MeterRegistry registry ) {
283297 this .registry = registry ;
@@ -297,8 +311,22 @@ public MicrometerMetricsV2Builder withExecutionTimerConfig(
297311 return this ;
298312 }
299313
314+ /**
315+ * When enabled, a {@code namespace} tag is added to all per-reconciliation counters (started,
316+ * success, failure, retries, events, deletes). Gauges remain controller-scoped because
317+ * namespaces are not known at controller registration time.
318+ *
319+ * <p>Disabled by default to avoid unexpected cardinality increases in existing deployments.
320+ *
321+ * @return this builder for method chaining
322+ */
323+ public MicrometerMetricsV2Builder withNamespaceAsTag () {
324+ this .includeNamespaceTag = true ;
325+ return this ;
326+ }
327+
300328 public MicrometerMetricsV2 build () {
301- return new MicrometerMetricsV2 (registry , executionTimerConfig );
329+ return new MicrometerMetricsV2 (registry , executionTimerConfig , includeNamespaceTag );
302330 }
303331 }
304332}
0 commit comments