Skip to content

Commit d8c6dde

Browse files
authored
rgw usage metrics cleanup (#28)
* prometheus interval support On-behalf-of: SAP <filipp.akinfiev@clyso.com> Signed-off-by: Filipp Akinfiev <filipp.akinfiev@clyso.com> * removed all unreliability metrics On-behalf-of: SAP <filipp.akinfiev@clyso.com> Signed-off-by: Filipp Akinfiev <filipp.akinfiev@clyso.com> --------- Signed-off-by: Filipp Akinfiev <filipp.akinfiev@clyso.com>
1 parent d8ff0e5 commit d8c6dde

10 files changed

Lines changed: 72 additions & 538 deletions

File tree

pkg/commands/producer_ops_log.go

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ var (
2626
opsPromEnabled bool
2727
opsPromPort int
2828
opsIgnoreAnonymousRequests bool
29+
opsPromIntervalSeconds int
2930

3031
// MetricsConfig-related flags
3132
opsTrackRequestsByIP bool
@@ -73,18 +74,19 @@ Then restart all RadosGW daemons:
7374
Following this configuration change, the RadosGW will log operations to the file /var/log/ceph/ceph-rgw-ops.json.log.`,
7475
Run: func(cmd *cobra.Command, args []string) {
7576
config := opslog.OpsLogConfig{
76-
LogFilePath: opsLogFilePath,
77-
TruncateLogOnStart: opsTruncateLogOnStart,
78-
SocketPath: opsSocketPath,
79-
NatsURL: opsNatsURL,
80-
NatsSubject: opsNatsSubject,
81-
NatsMetricsSubject: opsNatsMetricsSubject,
82-
LogToStdout: opsLogToStdout,
83-
LogRetentionDays: opsLogRetentionDays,
84-
MaxLogFileSize: opsMaxLogFileSize,
85-
Prometheus: opsPromEnabled,
86-
PrometheusPort: opsPromPort,
87-
IgnoreAnonymousRequests: opsIgnoreAnonymousRequests,
77+
LogFilePath: opsLogFilePath,
78+
TruncateLogOnStart: opsTruncateLogOnStart,
79+
SocketPath: opsSocketPath,
80+
NatsURL: opsNatsURL,
81+
NatsSubject: opsNatsSubject,
82+
NatsMetricsSubject: opsNatsMetricsSubject,
83+
LogToStdout: opsLogToStdout,
84+
LogRetentionDays: opsLogRetentionDays,
85+
MaxLogFileSize: opsMaxLogFileSize,
86+
Prometheus: opsPromEnabled,
87+
PrometheusPort: opsPromPort,
88+
IgnoreAnonymousRequests: opsIgnoreAnonymousRequests,
89+
PrometheusIntervalSeconds: opsPromIntervalSeconds,
8890
MetricsConfig: opslog.MetricsConfig{
8991
TrackRequestsByIP: opsTrackRequestsByIP,
9092
TrackBytesSentByIP: opsTrackBytesSentByIP,
@@ -192,6 +194,7 @@ func mergeOpsLogConfigWithEnv(cfg opslog.OpsLogConfig) opslog.OpsLogConfig {
192194
cfg.PrometheusPort = getEnvInt("PROMETHEUS_PORT", cfg.PrometheusPort)
193195
cfg.PodName = getEnv("POD_NAME", cfg.PodName)
194196
cfg.IgnoreAnonymousRequests = getEnvBool("IGNORE_ANONYMOUS_REQUESTS", cfg.IgnoreAnonymousRequests)
197+
cfg.PrometheusIntervalSeconds = getEnvInt("PROMETHEUS_INTERVAL", cfg.PrometheusIntervalSeconds)
195198

196199
// MetricsConfig environment variables
197200
cfg.MetricsConfig.TrackRequestsByIP = getEnvBool("TRACK_REQUESTS_BY_IP", cfg.MetricsConfig.TrackRequestsByIP)
@@ -233,6 +236,7 @@ func init() {
233236
opsLogCmd.Flags().BoolVar(&opsPromEnabled, "prometheus", false, "Enable Prometheus metrics")
234237
opsLogCmd.Flags().IntVar(&opsPromPort, "prometheus-port", 8080, "Prometheus metrics port")
235238
opsLogCmd.Flags().BoolVar(&opsIgnoreAnonymousRequests, "ignore-anonymous-requests", true, "Ignore anonymous requests")
239+
opsLogCmd.Flags().IntVar(&opsPromIntervalSeconds, "prometheus-interval", 60, "Prometheus metrics update interval in seconds")
236240

237241
// Metrics Tracking Flags (All Disabled by Default)
238242
opsLogCmd.Flags().BoolVar(&opsTrackRequestsByIP, "track-requests-by-ip", false, "Track requests by IP")

pkg/producers/opslog/config.go

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,21 +5,22 @@
55
package opslog
66

77
type OpsLogConfig struct {
8-
LogFilePath string
9-
TruncateLogOnStart bool
10-
SocketPath string
11-
NatsURL string
12-
NatsSubject string
13-
NatsMetricsSubject string
14-
UseNats bool
15-
LogToStdout bool
16-
LogRetentionDays int // Number of days to keep old log files
17-
MaxLogFileSize int64 // Maximum log file size in bytes before rotation
18-
Prometheus bool
19-
PrometheusPort int
20-
PodName string
21-
IgnoreAnonymousRequests bool
22-
MetricsConfig MetricsConfig
8+
LogFilePath string
9+
TruncateLogOnStart bool
10+
SocketPath string
11+
NatsURL string
12+
NatsSubject string
13+
NatsMetricsSubject string
14+
UseNats bool
15+
LogToStdout bool
16+
LogRetentionDays int // Number of days to keep old log files
17+
MaxLogFileSize int64 // Maximum log file size in bytes before rotation
18+
Prometheus bool
19+
PrometheusPort int
20+
PodName string
21+
IgnoreAnonymousRequests bool
22+
PrometheusIntervalSeconds int
23+
MetricsConfig MetricsConfig
2324
}
2425

2526
type MetricsConfig struct {

pkg/producers/opslog/opslog.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,8 @@ func StartFileOpsLogger(cfg OpsLogConfig) {
7878

7979
// Initialize metrics
8080
metrics := NewMetrics()
81-
ticker := time.NewTicker(10 * time.Second) // Aggregation interval
81+
interval := time.Duration(cfg.PrometheusIntervalSeconds) * time.Second
82+
ticker := time.NewTicker(interval)
8283
defer ticker.Stop()
8384

8485
watcher := createLogWatcher(cfg)

pkg/producers/radosgwusage/README.md

Lines changed: 1 addition & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -58,40 +58,7 @@ Configuration can also be set through environment variables:
5858
## Metrics Collected
5959

6060
The RadosGW Usage Exporter collects and exposes the following metrics:
61-
### Operation Metrics
62-
63-
- `radosgw_usage_ops_total`: Total number of operations across all buckets and users.
64-
- `radosgw_usage_successful_ops_total`: Total number of successful operations across all buckets and
65-
users.
66-
- `radosgw_user_ops_total`: Total operations performed by each user.
67-
- `radosgw_user_read_ops_total`: Total read operations performed by each user.
68-
- `radosgw_user_write_ops_total`: Total write operations performed by each user.
69-
- `radosgw_user_success_ops_total`: Total number of successful operations per user.
70-
- `radosgw_bucket_ops_total`: Total operations performed in each bucket.
71-
- `radosgw_user_ops_per_sec`: Current number of operations (reads/writes) per second for each user.
72-
- `radosgw_bucket_ops_per_sec`: Current number of operations per second for each bucket.
73-
- `radosgw_bucket_read_ops_total`: Total read operations in each bucket.
74-
- `radosgw_bucket_write_ops_total`: Total write operations in each bucket.
75-
- `radosgw_bucket_success_ops_total`: Total successful operations for each bucket.
76-
77-
### Byte Metrics
78-
79-
- `radosgw_usage_sent_bytes_total`: Total bytes sent by RadosGW.
80-
- `radosgw_usage_received_bytes_total`: Total bytes received by RadosGW.
81-
- `radosgw_user_bytes_sent_total`: Total bytes sent by each user (cumulative).
82-
- `radosgw_user_bytes_received_total`: Total bytes received by each user (cumulative).
83-
- `radosgw_user_bytes_sent_per_sec`: Bytes sent by each user per second (rate).
84-
- `radosgw_user_bytes_received_per_sec`: Bytes received by each user per second (rate).
85-
- `radosgw_user_throughput_bytes_total`: Total throughput for each user in bytes (read and write
86-
combined).
87-
- `radosgw_user_throughput_bytes_per_sec`: Current throughput in bytes per second for each user
88-
(read and write combined).
89-
- `radosgw_bucket_bytes_sent_total`: Total bytes sent from each bucket.
90-
- `radosgw_bucket_bytes_received_total`: Total bytes received by each bucket.
91-
- `radosgw_bucket_bytes_sent_per_sec`: Current bytes sent per second from each bucket.
92-
- `radosgw_bucket_bytes_received_per_sec`: Current bytes received per second by each bucket.
93-
- `radosgw_bucket_throughput_bytes_per_sec`: Current throughput in bytes per second for each bucket (read and write combined).
94-
- `radosgw_bucket_throughput_bytes_total`: Total throughput for each bucket in bytes (read and write combined).
61+
9562

9663
### Bucket Usage Metrics
9764

@@ -114,31 +81,6 @@ The RadosGW Usage Exporter collects and exposes the following metrics:
11481
- `radosgw_usage_bucket_shards`: Number of shards in the bucket.
11582
- `radosgw_user_metadata`: User metadata (e.g., display name, email, storage class).
11683

117-
### Cluster-Level Metrics
118-
119-
- `radosgw_cluster_ops_total`: Total operations performed in the cluster.
120-
- `radosgw_cluster_reads_per_sec`: Total read operations per second for the entire cluster.
121-
- `radosgw_cluster_writes_per_sec`: Total write operations per second for the entire cluster.
122-
- `radosgw_cluster_ops_per_sec`: Current number of operations per second for the cluster.
123-
- `radosgw_cluster_bytes_sent_total`: Total bytes sent in the cluster.
124-
- `radosgw_cluster_bytes_received_total`: Total bytes received in the cluster.
125-
- `radosgw_cluster_bytes_sent_per_sec`: Total bytes sent per second for the entire cluster.
126-
- `radosgw_cluster_bytes_received_per_sec`: Total bytes received per second for the entire cluster.
127-
- `radosgw_cluster_throughput_bytes_total`: Total throughput of the cluster in bytes (read and write combined).
128-
- `radosgw_cluster_throughput_bytes_per_sec`: Total throughput in bytes per second for the entire cluster.
129-
- `radosgw_cluster_error_rate`: Error rate (percentage) for the entire cluster.
130-
- `radosgw_cluster_capacity_usage_bytes`: Total capacity used across the entire cluster in bytes.
131-
- `radosgw_cluster_success_ops_total`: Total successful operations across the entire cluster.
132-
133-
### API Usage Metrics
134-
135-
- `radosgw_api_usage_per_user`: API usage per user and per category.
136-
- `radosgw_api_usage_per_user_per_sec`: API usage per second per user and category.
137-
- `radosgw_api_usage_per_user_total_per_sec`: Total API usage per second for each user.
138-
- `radosgw_bucket_api_usage_total`: Total number of API operations by category for each bucket.
139-
- `radosgw_bucket_api_usage_per_sec`: Current API usage rate (ops per second) for each bucket and category.
140-
- `radosgw_bucket_api_usage_total_per_sec`: Total API usage per second for each bucket.
141-
14284
### Miscellaneous Metrics
14385

14486
- `radosgw_usage_scrape_duration_seconds`: Amount of time each scrape takes.

pkg/producers/radosgwusage/nats.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ func ensureStream(js nats.JetStreamContext, streamName string) error {
6161

6262
// publishEvent(nc, "sync_users", "in_progress", nil, map[string]string{"sync_mode": "full"})
6363
func publishEvent(nc *nats.Conn, eventType string, status string, ids []string, metadata map[string]string) error {
64-
eventData := map[string]interface{}{
64+
eventData := map[string]any{
6565
"event": eventType,
6666
"status": status,
6767
"ids": ids,
@@ -76,7 +76,7 @@ func publishEvent(nc *nats.Conn, eventType string, status string, ids []string,
7676

7777
func listenForEvents(nc *nats.Conn) {
7878
sub, err := nc.Subscribe("notifications", func(msg *nats.Msg) {
79-
var event map[string]interface{}
79+
var event map[string]any
8080
if err := json.Unmarshal(msg.Data, &event); err != nil {
8181
log.Error().Err(err).Msg("Failed to parse event")
8282
return
@@ -119,7 +119,7 @@ func listenForEvents(nc *nats.Conn) {
119119

120120
func retryFailedEvents(nc *nats.Conn) {
121121
sub, err := nc.Subscribe("notifications", func(msg *nats.Msg) {
122-
var event map[string]interface{}
122+
var event map[string]any
123123
if err := json.Unmarshal(msg.Data, &event); err != nil {
124124
log.Error().Err(err).Msg("Failed to parse event")
125125
return

0 commit comments

Comments
 (0)