From 95d8a2bdf6af7c98aec8e8b0980bf70a129274e1 Mon Sep 17 00:00:00 2001 From: IsaacDSC Date: Tue, 3 Mar 2026 17:14:14 -0300 Subject: [PATCH 1/9] feat: integrate OpenTelemetry for enhanced observability - Added OpenTelemetry support for metrics collection and monitoring across services. - Implemented Prometheus metrics exporter and configured HTTP handlers for metrics exposure. - Updated Docker Compose to include Prometheus and OpenTelemetry Collector services for observability. - Enhanced middleware to record HTTP metrics, including request counts and durations. - Introduced telemetry context management for better integration with existing services. - Updated service configurations to enable metrics collection and reporting. --- cmd/api/main.go | 14 +++ cmd/setup/backoffice/httpserver.go | 8 +- cmd/setup/httpsvc/http_api.go | 10 +- cmd/setup/middleware/middleware.go | 72 +++++++++++- cmd/setup/pubsub/http_api.go | 2 +- cmd/setup/task/http_api.go | 2 +- deployment/app-pgsql/docker-compose.yaml | 39 +++++++ .../app-pgsql/otel-collector-config.yaml | 16 +++ .../app-pgsql/prometheus/prometheus.yml | 26 +++++ .../provisioning/dashboards/dashboards.yml | 10 ++ .../dashboards/metrics-overview.json | 92 +++++++++++++++ .../provisioning/datasources/prometheus.yml | 10 ++ example/example.md | 4 +- .../simulation/multiples_producer_pubsub.sh | 22 ++++ go.mod | 35 ++++-- go.sum | 82 +++++++++----- .../app/backofficeapp/get_event_handle.go | 30 +++++ .../{get_event.go => get_events_handle.go} | 22 ---- .../app/pubsubapp/publisher_handle_http.go | 9 ++ internal/app/taskapp/publisher_handle_http.go | 9 ++ internal/cfg/env.go | 3 + internal/cfg/metrics.go | 7 ++ internal/domain/event.go | 8 ++ pkg/telemetry/context.go | 29 +++++ pkg/telemetry/telemetry.go | 106 ++++++++++++++++++ 25 files changed, 595 insertions(+), 72 deletions(-) create mode 100644 deployment/app-pgsql/otel-collector-config.yaml create mode 100644 deployment/app-pgsql/prometheus/prometheus.yml create mode 100644 deployment/app-pgsql/provisioning/dashboards/dashboards.yml create mode 100644 deployment/app-pgsql/provisioning/dashboards/metrics-overview.json create mode 100644 deployment/app-pgsql/provisioning/datasources/prometheus.yml create mode 100644 example/simulation/multiples_producer_pubsub.sh create mode 100644 internal/app/backofficeapp/get_event_handle.go rename internal/app/backofficeapp/{get_event.go => get_events_handle.go} (64%) create mode 100644 internal/cfg/metrics.go create mode 100644 pkg/telemetry/context.go create mode 100644 pkg/telemetry/telemetry.go diff --git a/cmd/api/main.go b/cmd/api/main.go index 2b82212..859fcb9 100644 --- a/cmd/api/main.go +++ b/cmd/api/main.go @@ -17,6 +17,7 @@ import ( "github.com/IsaacDSC/gqueue/internal/fetcher" "github.com/IsaacDSC/gqueue/internal/interstore" "github.com/IsaacDSC/gqueue/internal/storests" + "github.com/IsaacDSC/gqueue/pkg/telemetry" "github.com/redis/go-redis/v9" ) @@ -58,6 +59,13 @@ func main() { scope := flag.String("scope", "all", "service to run") flag.Parse() + _, err := telemetry.New(telemetry.Config{ + Enabled: conf.MetricsEnabled, + }) + if err != nil { + panic(err) + } + redisClient := redis.NewClient(&redis.Options{Addr: conf.Cache.CacheAddr}) if err := redisClient.Ping(ctx).Err(); err != nil { panic(err) @@ -116,6 +124,12 @@ func main() { for _, closeFn := range closers { closeFn() } + + shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), 5*time.Second) + defer shutdownCancel() + if err := telemetry.Shutdown(shutdownCtx); err != nil { + log.Printf("Error shutting down telemetry: %v", err) + } } func scopeOrAll(scope, expected string) bool { diff --git a/cmd/setup/backoffice/httpserver.go b/cmd/setup/backoffice/httpserver.go index f290ed0..cb6a465 100644 --- a/cmd/setup/backoffice/httpserver.go +++ b/cmd/setup/backoffice/httpserver.go @@ -13,6 +13,7 @@ import ( "github.com/IsaacDSC/gqueue/internal/domain" "github.com/IsaacDSC/gqueue/internal/interstore" "github.com/IsaacDSC/gqueue/pkg/httpadapter" + "github.com/IsaacDSC/gqueue/pkg/telemetry" "github.com/redis/go-redis/v9" ) @@ -27,6 +28,9 @@ func Start( ) *http.Server { mux := http.NewServeMux() + // Rota de métricas para Prometheus. + mux.Handle("/metrics", telemetry.Handler()) + routes := []httpadapter.HttpHandle{ health.GetHealthCheckHandler(), backofficeapp.PatchConsumer(store), @@ -47,7 +51,9 @@ func Start( // config.ProjectID: config.SecretKey, // }) - handler := middleware.CORSMiddleware(middleware.LoggerMiddleware(mux)) + handler := middleware.CORSMiddleware( + middleware.MetricsMiddleware(cfg.BACKOFFICE_APP_NAME, middleware.LoggerMiddleware(mux)), + ) // h := authorization.Middleware(handler.ServeHTTP) env := cfg.Get() diff --git a/cmd/setup/httpsvc/http_api.go b/cmd/setup/httpsvc/http_api.go index 135a935..9704480 100644 --- a/cmd/setup/httpsvc/http_api.go +++ b/cmd/setup/httpsvc/http_api.go @@ -10,12 +10,16 @@ import ( "github.com/IsaacDSC/gqueue/internal/app/health" "github.com/IsaacDSC/gqueue/internal/cfg" "github.com/IsaacDSC/gqueue/pkg/httpadapter" + "github.com/IsaacDSC/gqueue/pkg/telemetry" ) -func StartHttpServer(ctx context.Context, env cfg.Config, routes []httpadapter.HttpHandle, port string) *http.Server { +func StartHttpServer(ctx context.Context, env cfg.Config, routes []httpadapter.HttpHandle, port string, serviceName string) *http.Server { mux := http.NewServeMux() + // Rota de métricas para Prometheus. + mux.Handle("/metrics", telemetry.Handler()) + routes = append(routes, health.GetHealthCheckHandler()) for _, route := range routes { @@ -28,7 +32,9 @@ func StartHttpServer(ctx context.Context, env cfg.Config, routes []httpadapter.H // config.ProjectID: config.SecretKey, // }) - handler := middleware.CORSMiddleware(middleware.LoggerMiddleware(mux)) + handler := middleware.CORSMiddleware( + middleware.MetricsMiddleware(serviceName, middleware.LoggerMiddleware(mux)), + ) // h := authorization.Middleware(handler.ServeHTTP) server := &http.Server{ diff --git a/cmd/setup/middleware/middleware.go b/cmd/setup/middleware/middleware.go index f836e6c..5bfd644 100644 --- a/cmd/setup/middleware/middleware.go +++ b/cmd/setup/middleware/middleware.go @@ -9,8 +9,11 @@ import ( "github.com/IsaacDSC/gqueue/pkg/ctxlogger" "github.com/IsaacDSC/gqueue/pkg/logs" + "github.com/IsaacDSC/gqueue/pkg/telemetry" "github.com/google/uuid" "github.com/hibiken/asynq" + "go.opentelemetry.io/otel/attribute" + api "go.opentelemetry.io/otel/metric" ) type CORSConfig struct { @@ -29,7 +32,7 @@ func DefaultCORSConfig() CORSConfig { AllowedHeaders: []string{"Content-Type", "Authorization", "X-Requested-With"}, ExposedHeaders: []string{}, AllowCredentials: false, - MaxAge: 86400, // 24 horas + MaxAge: 86400, // 24 hours } } @@ -145,3 +148,70 @@ func LoggerMiddleware(next http.Handler) http.Handler { next.ServeHTTP(w, r) }) } + +type statusRecorder struct { + http.ResponseWriter + statusCode int +} + +func (r *statusRecorder) WriteHeader(code int) { + r.statusCode = code + r.ResponseWriter.WriteHeader(code) +} + +// MetricsMiddleware records HTTP metrics per service using OpenTelemetry. +func MetricsMiddleware(serviceName string, next http.Handler) http.Handler { + // Create instruments once per middleware chain. + meter := telemetry.Meter(serviceName) + + requestCounter, err := meter.Int64Counter( + "http_server_requests_total", + api.WithDescription("Total HTTP requests received"), + ) + if err != nil { + // If instrument creation fails, return the original handler. + return next + } + + requestDuration, err := meter.Float64Histogram( + "http_server_request_duration_seconds", + api.WithDescription("HTTP request duration in seconds"), + ) + if err != nil { + return next + } + + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + path := r.URL.Path + if path == "/metrics" || path == "/health" { + next.ServeHTTP(w, r) + return + } + + start := time.Now() + + rec := &statusRecorder{ + ResponseWriter: w, + statusCode: http.StatusOK, + } + + // Ensure the Meter is available in the request context. + ctxWithMeter := telemetry.WithMeter(r.Context(), meter) + r = r.WithContext(ctxWithMeter) + + next.ServeHTTP(rec, r) + + duration := time.Since(start).Seconds() + + attrs := []attribute.KeyValue{ + attribute.String("http.method", r.Method), + attribute.String("http.route", path), + attribute.Int("http.status_code", rec.statusCode), + attribute.String("service.name", serviceName), + } + + ctx := r.Context() + requestCounter.Add(ctx, 1, api.WithAttributes(attrs...)) + requestDuration.Record(ctx, duration, api.WithAttributes(attrs...)) + }) +} diff --git a/cmd/setup/pubsub/http_api.go b/cmd/setup/pubsub/http_api.go index f931b70..d511e34 100644 --- a/cmd/setup/pubsub/http_api.go +++ b/cmd/setup/pubsub/http_api.go @@ -15,5 +15,5 @@ func (s *Service) startHttpServer(ctx context.Context, env cfg.Config) *http.Ser pubsubapp.PublisherEvent(s.memStore, s.gcppublisher, s.insightsStore), } - return httpsvc.StartHttpServer(ctx, env, routes, env.PubsubApiPort.String()) + return httpsvc.StartHttpServer(ctx, env, routes, env.PubsubApiPort.String(), cfg.PUBSUB_APP_NAME) } diff --git a/cmd/setup/task/http_api.go b/cmd/setup/task/http_api.go index e2dfa61..cca126b 100644 --- a/cmd/setup/task/http_api.go +++ b/cmd/setup/task/http_api.go @@ -15,5 +15,5 @@ func (s *Service) startHttpServer(ctx context.Context, env cfg.Config) *http.Ser taskapp.PublisherEvent(s.memStore, s.asynqPublisher, s.insightsStore), } - return httpsvc.StartHttpServer(ctx, env, routes, env.TaskApiPort.String()) + return httpsvc.StartHttpServer(ctx, env, routes, env.TaskApiPort.String(), cfg.TASK_APP_NAME) } diff --git a/deployment/app-pgsql/docker-compose.yaml b/deployment/app-pgsql/docker-compose.yaml index 8e0c179..d4cef5c 100644 --- a/deployment/app-pgsql/docker-compose.yaml +++ b/deployment/app-pgsql/docker-compose.yaml @@ -40,6 +40,7 @@ services: profiles: - complete - gqueue + - observability pubsub: build: @@ -87,6 +88,7 @@ services: profiles: - complete - gqueue + - observability task: build: @@ -134,6 +136,7 @@ services: profiles: - complete - gqueue + - observability redis: image: redis:latest @@ -160,6 +163,7 @@ services: - complete - infra - gqueue + - observability postgres: image: postgres:15 @@ -191,6 +195,7 @@ services: - complete - infra - gqueue + - observability pgadmin: image: dpage/pgadmin4:latest @@ -243,6 +248,7 @@ services: - complete - infra - gqueue + - observability consumer: build: @@ -282,6 +288,39 @@ services: depends_on: - postgres - redis + - prometheus + profiles: + - observability + + prometheus: + image: prom/prometheus:latest + container_name: prometheus + volumes: + - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml + command: + - "--config.file=/etc/prometheus/prometheus.yml" + ports: + - "9090:9090" + networks: + - app-network + restart: unless-stopped + profiles: + - observability + + otel-collector: + image: otel/opentelemetry-collector-contrib:latest + container_name: otel-collector + command: + - "--config=/etc/otel-collector-config.yaml" + volumes: + - ./otel-collector-config.yaml:/etc/otel-collector-config.yaml + ports: + - "4317:4317" + - "4318:4318" + - "9464:9464" + networks: + - app-network + restart: unless-stopped profiles: - observability diff --git a/deployment/app-pgsql/otel-collector-config.yaml b/deployment/app-pgsql/otel-collector-config.yaml new file mode 100644 index 0000000..9a862e4 --- /dev/null +++ b/deployment/app-pgsql/otel-collector-config.yaml @@ -0,0 +1,16 @@ +receivers: + otlp: + protocols: + grpc: + http: + +exporters: + prometheus: + endpoint: "0.0.0.0:9464" + +service: + pipelines: + metrics: + receivers: [otlp] + exporters: [prometheus] + diff --git a/deployment/app-pgsql/prometheus/prometheus.yml b/deployment/app-pgsql/prometheus/prometheus.yml new file mode 100644 index 0000000..20bf0c2 --- /dev/null +++ b/deployment/app-pgsql/prometheus/prometheus.yml @@ -0,0 +1,26 @@ +global: + scrape_interval: 15s + evaluation_interval: 15s + +scrape_configs: + - job_name: "gqueue-backoffice" + metrics_path: /metrics + static_configs: + - targets: ["backoffice:8081"] + + - job_name: "gqueue-pubsub" + metrics_path: /metrics + static_configs: + - targets: ["pubsub:8082"] + + - job_name: "gqueue-task" + metrics_path: /metrics + static_configs: + - targets: ["task:8083"] + + # Optional: collect metrics from the otel-collector + - job_name: "otel-collector" + metrics_path: /metrics + static_configs: + - targets: ["otel-collector:9464"] + diff --git a/deployment/app-pgsql/provisioning/dashboards/dashboards.yml b/deployment/app-pgsql/provisioning/dashboards/dashboards.yml new file mode 100644 index 0000000..11f7fa6 --- /dev/null +++ b/deployment/app-pgsql/provisioning/dashboards/dashboards.yml @@ -0,0 +1,10 @@ +apiVersion: 1 + +providers: + - name: gqueue-metrics + type: file + disableDeletion: false + editable: true + options: + path: /etc/grafana/provisioning/dashboards + diff --git a/deployment/app-pgsql/provisioning/dashboards/metrics-overview.json b/deployment/app-pgsql/provisioning/dashboards/metrics-overview.json new file mode 100644 index 0000000..0240590 --- /dev/null +++ b/deployment/app-pgsql/provisioning/dashboards/metrics-overview.json @@ -0,0 +1,92 @@ +{ + "id": null, + "uid": "gqueue-metrics", + "title": "gqueue HTTP & Task Metrics", + "tags": ["gqueue", "metrics"], + "timezone": "browser", + "schemaVersion": 39, + "version": 1, + "refresh": "15s", + "time": { + "from": "now-1h", + "to": "now" + }, + "panels": [ + { + "id": 1, + "type": "timeseries", + "title": "HTTP RPS por serviço", + "datasource": "Prometheus", + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 }, + "targets": [ + { + "refId": "A", + "expr": "sum by (service_name) (rate(http_server_requests_total[5m]))" + } + ], + "options": { + "legend": { + "displayMode": "table", + "placement": "bottom" + } + }, + "fieldConfig": { + "defaults": { + "unit": "req/s" + }, + "overrides": [] + } + }, + { + "id": 2, + "type": "timeseries", + "title": "HTTP p95 duration por serviço (s)", + "datasource": "Prometheus", + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 }, + "targets": [ + { + "refId": "A", + "expr": "histogram_quantile(0.95, sum by (le, service_name) (rate(http_server_request_duration_seconds_bucket[5m])))" + } + ], + "options": { + "legend": { + "displayMode": "table", + "placement": "bottom" + } + }, + "fieldConfig": { + "defaults": { + "unit": "s" + }, + "overrides": [] + } + }, + { + "id": 3, + "type": "timeseries", + "title": "Task publisher RPS", + "datasource": "Prometheus", + "gridPos": { "h": 8, "w": 24, "x": 0, "y": 8 }, + "targets": [ + { + "refId": "A", + "expr": "sum(rate(task_publisher_requests_total[5m]))" + } + ], + "options": { + "legend": { + "displayMode": "table", + "placement": "bottom" + } + }, + "fieldConfig": { + "defaults": { + "unit": "req/s" + }, + "overrides": [] + } + } + ] +} + diff --git a/deployment/app-pgsql/provisioning/datasources/prometheus.yml b/deployment/app-pgsql/provisioning/datasources/prometheus.yml new file mode 100644 index 0000000..f4644c0 --- /dev/null +++ b/deployment/app-pgsql/provisioning/datasources/prometheus.yml @@ -0,0 +1,10 @@ +apiVersion: 1 + +datasources: + - name: Prometheus + type: prometheus + access: proxy + url: http://prometheus:9090 + isDefault: false + editable: true + diff --git a/example/example.md b/example/example.md index 3f06e44..e64658e 100644 --- a/example/example.md +++ b/example/example.md @@ -11,11 +11,11 @@ curl -X PATCH \ curl -X GET \ -H "Authorization: Basic YWRtaW46cGFzc3dvcmQ=" \ - http://localhost:8080/api/v1/events/{{eventName}} | jq + http://localhost:8081/api/v1/events/payment.charged1 | jq curl -X GET \ -H "Authorization: Basic YWRtaW46cGFzc3dvcmQ=" \ - http://localhost:8080/api/v1/events + http://localhost:8081/api/v1/events curl -X GET \ -H "Authorization: Basic YWRtaW46cGFzc3dvcmQ=" \ diff --git a/example/simulation/multiples_producer_pubsub.sh b/example/simulation/multiples_producer_pubsub.sh new file mode 100644 index 0000000..b62ec6f --- /dev/null +++ b/example/simulation/multiples_producer_pubsub.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash + +URL="http://localhost:8082/api/v1/pubsub" +AUTH="Basic YWRtaW46cGFzc3dvcmQ=" +DATA_FILE="example/publisher_data.json" + +while true; do + # 10 chamadas em paralelo + for i in {1..10}; do + curl -s -X POST \ + "$URL" \ + -H "Content-Type: application/json" \ + -H "Authorization: $AUTH" \ + -d @"$DATA_FILE" & + done + + # espera as 10 terminarem (opcional, mas recomendado) + wait + + # espera 100ms + sleep 0.1 +done diff --git a/go.mod b/go.mod index 27dd232..1ea3e54 100644 --- a/go.mod +++ b/go.mod @@ -13,9 +13,14 @@ require ( github.com/hibiken/asynq v0.25.1 github.com/ilyakaznacheev/cleanenv v1.5.0 github.com/lib/pq v1.10.9 + github.com/prometheus/client_golang v1.23.2 github.com/redis/go-redis/v9 v9.12.0 - github.com/stretchr/testify v1.10.0 + github.com/stretchr/testify v1.11.1 github.com/tsenart/vegeta/v12 v12.12.0 + go.opentelemetry.io/otel v1.41.0 + go.opentelemetry.io/otel/exporters/prometheus v0.63.0 + go.opentelemetry.io/otel/metric v1.41.0 + go.opentelemetry.io/otel/sdk/metric v1.41.0 go.uber.org/mock v0.5.2 google.golang.org/grpc v1.74.2 ) @@ -27,6 +32,7 @@ require ( cloud.google.com/go/compute/metadata v0.8.0 // indirect cloud.google.com/go/iam v1.5.2 // indirect github.com/BurntSushi/toml v1.5.0 // indirect + github.com/beorn7/perks v1.0.1 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect @@ -39,30 +45,35 @@ require ( github.com/joho/godotenv v1.5.1 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/mailru/easyjson v0.7.7 // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/prometheus/client_model v0.6.2 // indirect + github.com/prometheus/common v0.67.5 // indirect + github.com/prometheus/otlptranslator v1.0.0 // indirect + github.com/prometheus/procfs v0.19.2 // indirect github.com/robfig/cron/v3 v3.0.1 // indirect github.com/rs/dnscache v0.0.0-20230804202142-fc85eb664529 // indirect github.com/spf13/cast v1.7.0 // indirect go.einride.tech/aip v0.73.0 // indirect go.opencensus.io v0.24.0 // indirect - go.opentelemetry.io/auto/sdk v1.1.0 // indirect + go.opentelemetry.io/auto/sdk v1.2.1 // indirect go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.61.0 // indirect go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 // indirect - go.opentelemetry.io/otel v1.36.0 // indirect - go.opentelemetry.io/otel/metric v1.36.0 // indirect - go.opentelemetry.io/otel/trace v1.36.0 // indirect - golang.org/x/crypto v0.41.0 // indirect - golang.org/x/net v0.43.0 // indirect - golang.org/x/oauth2 v0.30.0 // indirect - golang.org/x/sync v0.16.0 // indirect - golang.org/x/sys v0.35.0 // indirect - golang.org/x/text v0.28.0 // indirect + go.opentelemetry.io/otel/sdk v1.41.0 // indirect + go.opentelemetry.io/otel/trace v1.41.0 // indirect + go.yaml.in/yaml/v2 v2.4.3 // indirect + golang.org/x/crypto v0.46.0 // indirect + golang.org/x/net v0.48.0 // indirect + golang.org/x/oauth2 v0.34.0 // indirect + golang.org/x/sync v0.19.0 // indirect + golang.org/x/sys v0.41.0 // indirect + golang.org/x/text v0.32.0 // indirect golang.org/x/time v0.12.0 // indirect google.golang.org/api v0.247.0 // indirect google.golang.org/genproto v0.0.0-20250603155806-513f23925822 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20250818200422-3122310a409c // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20250811230008-5f3141c8851a // indirect - google.golang.org/protobuf v1.36.7 // indirect + google.golang.org/protobuf v1.36.11 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect olympos.io/encoding/edn v0.0.0-20201019073823-d3554ca0b0a3 // indirect ) diff --git a/go.sum b/go.sum index 35d110f..61f5bc8 100644 --- a/go.sum +++ b/go.sum @@ -21,6 +21,8 @@ github.com/BurntSushi/toml v1.5.0 h1:W5quZX/G/csjUnuI8SUYlsHs9M38FC7znL0lIO+DvMg github.com/BurntSushi/toml v1.5.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho= github.com/IsaacDSC/clienthttp v1.0.1 h1:FIptKZ1ZjJrLGn0cB9S8nzUBErE2eq2QaBzUiJOmBPI= github.com/IsaacDSC/clienthttp v1.0.1/go.mod h1:TFzAThW6KUDOugso4Fq4GQdtpOhFRL+ByH3YMFaVhbM= +github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= +github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/bmizerany/perks v0.0.0-20230307044200-03f9df79da1e h1:mWOqoK5jV13ChKf/aF3plwQ96laasTJgZi4f1aSOu+M= github.com/bmizerany/perks v0.0.0-20230307044200-03f9df79da1e/go.mod h1:ac9efd0D1fsDb3EJvhqgXRbFx7bs2wqZ10HQPeU8U/Q= github.com/brianvoe/gofakeit/v6 v6.28.0 h1:Xib46XXuQfmlLS2EXRuJpqcw8St6qSZz75OUo0tgAW4= @@ -97,23 +99,39 @@ github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0= github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= +github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= +github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw= github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o= +github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk= +github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE= +github.com/prometheus/common v0.67.5 h1:pIgK94WWlQt1WLwAC5j2ynLaBRDiinoAb86HZHTUGI4= +github.com/prometheus/common v0.67.5/go.mod h1:SjE/0MzDEEAyrdr5Gqc6G+sXI67maCxzaT3A2+HqjUw= +github.com/prometheus/otlptranslator v1.0.0 h1:s0LJW/iN9dkIH+EnhiD3BlkkP5QVIUVEoIwkU+A6qos= +github.com/prometheus/otlptranslator v1.0.0/go.mod h1:vRYWnXvI6aWGpsdY/mOT/cbeVRBlPWtBNDb7kGR3uKM= +github.com/prometheus/procfs v0.19.2 h1:zUMhqEW66Ex7OXIiDkll3tl9a1ZdilUOd/F6ZXw4Vws= +github.com/prometheus/procfs v0.19.2/go.mod h1:M0aotyiemPhBCM0z5w87kL22CxfcH05ZpYlu+b4J7mw= github.com/redis/go-redis/v9 v9.12.0 h1:XlVPGlflh4nxfhsNXPA8Qp6EmEfTo0rp8oaBzPipXnU= github.com/redis/go-redis/v9 v9.12.0/go.mod h1:huWgSWd8mW6+m0VPhJjSSQ+d6Nh1VICQ6Q5lHuCH/Iw= github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs= github.com/robfig/cron/v3 v3.0.1/go.mod h1:eQICP3HwyT7UooqI/z+Ov+PtYAWygg1TEWWzGIFLtro= -github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= -github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= +github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= +github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= github.com/rs/dnscache v0.0.0-20230804202142-fc85eb664529 h1:18kd+8ZUlt/ARXhljq+14TwAoKa61q6dX8jtwOf6DH8= github.com/rs/dnscache v0.0.0-20230804202142-fc85eb664529/go.mod h1:qe5TWALJ8/a1Lqznoc5BDHpYX/8HU60Hm2AwRmqzxqA= github.com/spf13/cast v1.7.0 h1:ntdiHjuueXFgm5nzDRdOS4yfT43P5Fnud6DH50rz/7w= @@ -126,38 +144,42 @@ github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpE github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= -github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= -github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= github.com/tsenart/vegeta/v12 v12.12.0 h1:FKMMNomd3auAElO/TtbXzRFXAKGee6N/GKCGweFVm2U= github.com/tsenart/vegeta/v12 v12.12.0/go.mod h1:gpdfR++WHV9/RZh4oux0f6lNPhsOH8pCjIGUlcPQe1M= go.einride.tech/aip v0.73.0 h1:bPo4oqBo2ZQeBKo4ZzLb1kxYXTY1ysJhpvQyfuGzvps= go.einride.tech/aip v0.73.0/go.mod h1:Mj7rFbmXEgw0dq1dqJ7JGMvYCZZVxmGOR3S4ZcV5LvQ= go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= -go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= -go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= +go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= +go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.61.0 h1:q4XOmH/0opmeuJtPsbFNivyl7bCt7yRBbeEm2sC/XtQ= go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.61.0/go.mod h1:snMWehoOh2wsEwnvvwtDyFCxVeDAODenXHtn5vzrKjo= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 h1:F7Jx+6hwnZ41NSFTO5q4LYDtJRXBf2PD0rNBkeB/lus= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0/go.mod h1:UHB22Z8QsdRDrnAtX4PntOl36ajSxcdUMt1sF7Y6E7Q= -go.opentelemetry.io/otel v1.36.0 h1:UumtzIklRBY6cI/lllNZlALOF5nNIzJVb16APdvgTXg= -go.opentelemetry.io/otel v1.36.0/go.mod h1:/TcFMXYjyRNh8khOAO9ybYkqaDBb/70aVwkNML4pP8E= -go.opentelemetry.io/otel/metric v1.36.0 h1:MoWPKVhQvJ+eeXWHFBOPoBOi20jh6Iq2CcCREuTYufE= -go.opentelemetry.io/otel/metric v1.36.0/go.mod h1:zC7Ks+yeyJt4xig9DEw9kuUFe5C3zLbVjV2PzT6qzbs= -go.opentelemetry.io/otel/sdk v1.36.0 h1:b6SYIuLRs88ztox4EyrvRti80uXIFy+Sqzoh9kFULbs= -go.opentelemetry.io/otel/sdk v1.36.0/go.mod h1:+lC+mTgD+MUWfjJubi2vvXWcVxyr9rmlshZni72pXeY= -go.opentelemetry.io/otel/sdk/metric v1.36.0 h1:r0ntwwGosWGaa0CrSt8cuNuTcccMXERFwHX4dThiPis= -go.opentelemetry.io/otel/sdk/metric v1.36.0/go.mod h1:qTNOhFDfKRwX0yXOqJYegL5WRaW376QbB7P4Pb0qva4= -go.opentelemetry.io/otel/trace v1.36.0 h1:ahxWNuqZjpdiFAyrIoQ4GIiAIhxAunQR6MUoKrsNd4w= -go.opentelemetry.io/otel/trace v1.36.0/go.mod h1:gQ+OnDZzrybY4k4seLzPAWNwVBBVlF2szhehOBB/tGA= +go.opentelemetry.io/otel v1.41.0 h1:YlEwVsGAlCvczDILpUXpIpPSL/VPugt7zHThEMLce1c= +go.opentelemetry.io/otel v1.41.0/go.mod h1:Yt4UwgEKeT05QbLwbyHXEwhnjxNO6D8L5PQP51/46dE= +go.opentelemetry.io/otel/exporters/prometheus v0.63.0 h1:OLo1FNb0pBZykLqbKRZolKtGZd0Waqlr240YdMEnhhg= +go.opentelemetry.io/otel/exporters/prometheus v0.63.0/go.mod h1:8yeQAdhrK5xsWuFehO13Dk/Xb9FuhZoVpJfpoNCfJnw= +go.opentelemetry.io/otel/metric v1.41.0 h1:rFnDcs4gRzBcsO9tS8LCpgR0dxg4aaxWlJxCno7JlTQ= +go.opentelemetry.io/otel/metric v1.41.0/go.mod h1:xPvCwd9pU0VN8tPZYzDZV/BMj9CM9vs00GuBjeKhJps= +go.opentelemetry.io/otel/sdk v1.41.0 h1:YPIEXKmiAwkGl3Gu1huk1aYWwtpRLeskpV+wPisxBp8= +go.opentelemetry.io/otel/sdk v1.41.0/go.mod h1:ahFdU0G5y8IxglBf0QBJXgSe7agzjE4GiTJ6HT9ud90= +go.opentelemetry.io/otel/sdk/metric v1.41.0 h1:siZQIYBAUd1rlIWQT2uCxWJxcCO7q3TriaMlf08rXw8= +go.opentelemetry.io/otel/sdk/metric v1.41.0/go.mod h1:HNBuSvT7ROaGtGI50ArdRLUnvRTRGniSUZbxiWxSO8Y= +go.opentelemetry.io/otel/trace v1.41.0 h1:Vbk2co6bhj8L59ZJ6/xFTskY+tGAbOnCtQGVVa9TIN0= +go.opentelemetry.io/otel/trace v1.41.0/go.mod h1:U1NU4ULCoxeDKc09yCWdWe+3QoyweJcISEVa1RBzOis= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= go.uber.org/mock v0.5.2 h1:LbtPTcP8A5k9WPXj54PPPbjcI4Y6lhyOZXn+VS7wNko= go.uber.org/mock v0.5.2/go.mod h1:wLlUxC2vVTPTaE3UD51E0BGOAElKrILxhVSDYQLld5o= +go.yaml.in/yaml/v2 v2.4.3 h1:6gvOSjQoTB3vt1l+CU+tSyi/HOjfOjRLJ4YwYZGwRO0= +go.yaml.in/yaml/v2 v2.4.3/go.mod h1:zSxWcmIDjOzPXpjlTTbAsKokqkDNAVtZO0WOMiT90s8= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.41.0 h1:WKYxWedPGCTVVl5+WHSSrOBT0O8lx32+zxmHxijgXp4= -golang.org/x/crypto v0.41.0/go.mod h1:pO5AFd7FA68rFak7rOAGVuygIISepHftHnr8dr6+sUc= +golang.org/x/crypto v0.46.0 h1:cKRW/pmt1pKAfetfu+RCEvjvZkA9RimPbh7bhFjGVBU= +golang.org/x/crypto v0.46.0/go.mod h1:Evb/oLKmMraqjZ2iQTwDwvCtJkczlDuTmdJXoZVzqU0= golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20240119083558-1b970713d09a h1:Q8/wZp0KX97QFTc2ywcOE0YRjZPVIx+MXInMzdvQqcA= @@ -171,26 +193,26 @@ golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73r golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE= -golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg= +golang.org/x/net v0.48.0 h1:zyQRTTrjc33Lhh0fBgT/H3oZq9WuvRR5gPC70xpDiQU= +golang.org/x/net v0.48.0/go.mod h1:+ndRgGjkh8FGtu1w1FGbEC31if4VrNVMuKTgcAAnQRY= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= -golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI= -golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU= +golang.org/x/oauth2 v0.34.0 h1:hqK/t4AKgbqWkdkcAeI8XLmbK+4m4G5YeQRrmiotGlw= +golang.org/x/oauth2 v0.34.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.16.0 h1:ycBJEhp9p4vXvUZNszeOq0kGTPghopOL8q0fq3vstxw= -golang.org/x/sync v0.16.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= +golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4= +golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI= -golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/sys v0.41.0 h1:Ivj+2Cp/ylzLiEU89QhWblYnOE9zerudt9Ftecq2C6k= +golang.org/x/sys v0.41.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng= -golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU= +golang.org/x/text v0.32.0 h1:ZD01bjUt1FQ9WJ0ClOL5vxgxOI/sVCNgX1YtKwcY0mU= +golang.org/x/text v0.32.0/go.mod h1:o/rUWzghvpD5TXrTIBuJU77MTaN0ljMWE47kxGJQ7jY= golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE= golang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= @@ -232,8 +254,8 @@ google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2 google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= -google.golang.org/protobuf v1.36.7 h1:IgrO7UwFQGJdRNXH/sQux4R1Dj1WAKcLElzeeRaXV2A= -google.golang.org/protobuf v1.36.7/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= +google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE= +google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= diff --git a/internal/app/backofficeapp/get_event_handle.go b/internal/app/backofficeapp/get_event_handle.go new file mode 100644 index 0000000..f503395 --- /dev/null +++ b/internal/app/backofficeapp/get_event_handle.go @@ -0,0 +1,30 @@ +package backofficeapp + +import ( + "encoding/json" + "net/http" + + "github.com/IsaacDSC/gqueue/pkg/httpadapter" +) + +func GetEvent(repo Repository) httpadapter.HttpHandle { + return httpadapter.HttpHandle{ + Path: "GET /api/v1/events/{event_name}", + Handler: func(w http.ResponseWriter, r *http.Request) { + ctx := r.Context() + eventName := r.PathValue("event_name") + + event, err := repo.GetInternalEvent(ctx, eventName) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + + w.WriteHeader(http.StatusOK) + if err := json.NewEncoder(w).Encode(event); err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + }, + } +} diff --git a/internal/app/backofficeapp/get_event.go b/internal/app/backofficeapp/get_events_handle.go similarity index 64% rename from internal/app/backofficeapp/get_event.go rename to internal/app/backofficeapp/get_events_handle.go index 68eeab7..7e267df 100644 --- a/internal/app/backofficeapp/get_event.go +++ b/internal/app/backofficeapp/get_events_handle.go @@ -9,28 +9,6 @@ import ( "github.com/IsaacDSC/gqueue/pkg/queryparser" ) -func GetEvent(repo Repository) httpadapter.HttpHandle { - return httpadapter.HttpHandle{ - Path: "GET /api/v1/events/{event_name}", - Handler: func(w http.ResponseWriter, r *http.Request) { - ctx := r.Context() - eventName := r.PathValue("event_name") - - event, err := repo.GetInternalEvent(ctx, eventName) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - - w.WriteHeader(http.StatusOK) - if err := json.NewEncoder(w).Encode(event); err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - }, - } -} - func GetEvents(repo Repository) httpadapter.HttpHandle { return httpadapter.HttpHandle{ Path: "GET /api/v1/events", diff --git a/internal/app/pubsubapp/publisher_handle_http.go b/internal/app/pubsubapp/publisher_handle_http.go index f8fa86b..1fa99ed 100644 --- a/internal/app/pubsubapp/publisher_handle_http.go +++ b/internal/app/pubsubapp/publisher_handle_http.go @@ -12,6 +12,7 @@ import ( "github.com/IsaacDSC/gqueue/pkg/ctxlogger" "github.com/IsaacDSC/gqueue/pkg/httpadapter" "github.com/IsaacDSC/gqueue/pkg/pubadapter" + "github.com/IsaacDSC/gqueue/pkg/telemetry" "github.com/IsaacDSC/gqueue/pkg/topicutils" ) @@ -71,6 +72,14 @@ func PublisherEvent( ctx := r.Context() l := ctxlogger.GetLogger(ctx) + if meter := telemetry.MeterFromContext(ctx); meter != nil { + if counter, err := meter.Int64Counter("task_publisher_requests_total"); err == nil { + counter.Add(ctx, 1) + } + }else{ + l.Warn("meter not found in context") + } + var payload InternalPayload defer r.Body.Close() diff --git a/internal/app/taskapp/publisher_handle_http.go b/internal/app/taskapp/publisher_handle_http.go index 12fb5ee..75a8691 100644 --- a/internal/app/taskapp/publisher_handle_http.go +++ b/internal/app/taskapp/publisher_handle_http.go @@ -12,6 +12,7 @@ import ( "github.com/IsaacDSC/gqueue/pkg/ctxlogger" "github.com/IsaacDSC/gqueue/pkg/httpadapter" "github.com/IsaacDSC/gqueue/pkg/pubadapter" + "github.com/IsaacDSC/gqueue/pkg/telemetry" "github.com/IsaacDSC/gqueue/pkg/topicutils" ) @@ -83,6 +84,14 @@ func PublisherEvent( ctx := r.Context() l := ctxlogger.GetLogger(ctx) + if meter := telemetry.MeterFromContext(ctx); meter != nil { + if counter, err := meter.Int64Counter("task_publisher_requests_total"); err == nil { + counter.Add(ctx, 1) + } + } else { + l.Warn("meter not found in context") + } + var payload InternalPayload defer r.Body.Close() diff --git a/internal/cfg/env.go b/internal/cfg/env.go index f34559b..dac2356 100644 --- a/internal/cfg/env.go +++ b/internal/cfg/env.go @@ -63,6 +63,9 @@ type Config struct { TaskApiPort ServerPort `env:"TASK_API_PORT" env-default:"8082"` BackofficeApiPort ServerPort `env:"BACKOFFICE_API_PORT" env-default:"8081"` ShutdownTimeout time.Duration `env:"SHUTDOWN_TIMEOUT" env-default:"30s"` //TODO: porque não está sendo usado? + + MetricsEnabled bool `env:"METRICS_ENABLED" env-default:"true"` + OTELExporterOTLPEndpoint string `env:"OTEL_EXPORTER_OTLP_ENDPOINT" env-default:""` } var cfg Config diff --git a/internal/cfg/metrics.go b/internal/cfg/metrics.go new file mode 100644 index 0000000..dac12ac --- /dev/null +++ b/internal/cfg/metrics.go @@ -0,0 +1,7 @@ +package cfg + +const ( + PUBSUB_APP_NAME = "pubsub" + BACKOFFICE_APP_NAME = "backoffice" + TASK_APP_NAME = "task" +) diff --git a/internal/domain/event.go b/internal/domain/event.go index 0bd4f74..eb69dd6 100644 --- a/internal/domain/event.go +++ b/internal/domain/event.go @@ -30,6 +30,14 @@ func (e *Event) Validate() error { return fmt.Errorf("invalid event option: %w", err) } + if len(e.Consumers) == 0 { + return fmt.Errorf("at least one consumer is required") + } + + if len(e.Consumers) > 10 { + return fmt.Errorf("consumers must be less than 10") + } + return nil } diff --git a/pkg/telemetry/context.go b/pkg/telemetry/context.go new file mode 100644 index 0000000..2a7965e --- /dev/null +++ b/pkg/telemetry/context.go @@ -0,0 +1,29 @@ +package telemetry + +import ( + "context" + + "go.opentelemetry.io/otel/metric" +) + +type meterKeyType struct{} + +var meterKey = meterKeyType{} + +// WithMeter injects the Meter into the context. +func WithMeter(ctx context.Context, m metric.Meter) context.Context { + return context.WithValue(ctx, meterKey, m) +} + +// MeterFromContext retrieves the Meter from the context, or a default Meter if absent. +func MeterFromContext(ctx context.Context) metric.Meter { + if ctx == nil { + return Meter("default") + } + + if m, ok := ctx.Value(meterKey).(metric.Meter); ok && m != nil { + return m + } + + return Meter("default") +} diff --git a/pkg/telemetry/telemetry.go b/pkg/telemetry/telemetry.go new file mode 100644 index 0000000..13c3fda --- /dev/null +++ b/pkg/telemetry/telemetry.go @@ -0,0 +1,106 @@ +package telemetry + +import ( + "context" + "net/http" + "sync" + + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/metric" + sdkmetric "go.opentelemetry.io/otel/sdk/metric" + + otelprom "go.opentelemetry.io/otel/exporters/prometheus" + + promclient "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" +) + +// Config encapsulates metric provider initialization options. +type Config struct { + Enabled bool +} + +var ( + mu sync.RWMutex + meterProvider *sdkmetric.MeterProvider + metricsHandler http.Handler = http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte("# metrics not initialized\n")) + }) +) + +// New initializes the global MeterProvider and the HTTP metrics handler. +// It should be called once at application startup. +func New(cfg Config) (http.Handler, error) { + mu.Lock() + defer mu.Unlock() + + // If already initialized, just return the current handler. + if meterProvider != nil { + return metricsHandler, nil + } + + var mp *sdkmetric.MeterProvider + + if !cfg.Enabled { + // No-op provider: no metrics will be exported. + mp = sdkmetric.NewMeterProvider() + metricsHandler = http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte("# metrics disabled\n")) + }) + } else { + // Prometheus exporter: explicitly use the client_golang default registry + // so promhttp.Handler() serves the same metrics. + exp, err := otelprom.New(otelprom.WithRegisterer(promclient.DefaultRegisterer)) + if err != nil { + return nil, err + } + + // Use the exporter as a reader for the MeterProvider. + mp = sdkmetric.NewMeterProvider( + sdkmetric.WithReader(exp), + ) + + // Prometheus default handler to expose registered metrics. + metricsHandler = promhttp.Handler() + } + + otel.SetMeterProvider(mp) + meterProvider = mp + + return metricsHandler, nil +} + +// Shutdown stops the global MeterProvider and releases resources. +func Shutdown(ctx context.Context) error { + mu.RLock() + mp := meterProvider + mu.RUnlock() + + if mp == nil { + return nil + } + + return mp.Shutdown(ctx) +} + +// Meter returns a Meter from the global provider. +func Meter(name string) metric.Meter { + mu.RLock() + defer mu.RUnlock() + + if meterProvider == nil { + return otel.Meter(name) + } + + return meterProvider.Meter(name) +} + +// Handler returns the current HTTP handler that exposes metrics. +func Handler() http.Handler { + mu.RLock() + defer mu.RUnlock() + + return metricsHandler +} From e4e0f9da6fbc457b5dcfaac5fd8e4fae9383433f Mon Sep 17 00:00:00 2001 From: IsaacDSC Date: Wed, 4 Mar 2026 08:27:51 -0300 Subject: [PATCH 2/9] refactor: enhance telemetry metrics integration across services - Removed legacy metric implementations in favor of a centralized telemetry package for better consistency. - Updated middleware and handlers to utilize new telemetry metrics for HTTP requests and consumer durations. - Improved error handling and logging for telemetry events, ensuring better observability. - Adjusted service configurations to reflect new metric definitions and enhance monitoring capabilities. --- cmd/setup/middleware/middleware.go | 22 +------- internal/app/pubsubapp/consumer_handle.go | 27 +++++++++ .../app/pubsubapp/publisher_handle_http.go | 9 --- internal/app/taskapp/consumer_handle.go | 27 +++++++++ internal/app/taskapp/publisher_handle_http.go | 9 --- internal/cfg/env.go | 2 +- internal/fetcher/notification.go | 14 +++++ internal/interstore/mem_store.go | 9 +++ pkg/asyncadapter/gbubsub_adapter.go | 7 ++- pkg/pubadapter/pub_sub_asynq.go | 10 +++- pkg/pubadapter/pub_sub_google.go | 9 ++- pkg/telemetry/context.go | 5 ++ pkg/telemetry/metrics.go | 55 +++++++++++++++++++ 13 files changed, 163 insertions(+), 42 deletions(-) create mode 100644 pkg/telemetry/metrics.go diff --git a/cmd/setup/middleware/middleware.go b/cmd/setup/middleware/middleware.go index 5bfd644..13182ff 100644 --- a/cmd/setup/middleware/middleware.go +++ b/cmd/setup/middleware/middleware.go @@ -13,7 +13,6 @@ import ( "github.com/google/uuid" "github.com/hibiken/asynq" "go.opentelemetry.io/otel/attribute" - api "go.opentelemetry.io/otel/metric" ) type CORSConfig struct { @@ -164,23 +163,6 @@ func MetricsMiddleware(serviceName string, next http.Handler) http.Handler { // Create instruments once per middleware chain. meter := telemetry.Meter(serviceName) - requestCounter, err := meter.Int64Counter( - "http_server_requests_total", - api.WithDescription("Total HTTP requests received"), - ) - if err != nil { - // If instrument creation fails, return the original handler. - return next - } - - requestDuration, err := meter.Float64Histogram( - "http_server_request_duration_seconds", - api.WithDescription("HTTP request duration in seconds"), - ) - if err != nil { - return next - } - return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { path := r.URL.Path if path == "/metrics" || path == "/health" { @@ -211,7 +193,7 @@ func MetricsMiddleware(serviceName string, next http.Handler) http.Handler { } ctx := r.Context() - requestCounter.Add(ctx, 1, api.WithAttributes(attrs...)) - requestDuration.Record(ctx, duration, api.WithAttributes(attrs...)) + telemetry.HTTPServerRequests.Increment(ctx, attrs...) + telemetry.HTTPServerRequestDuration.Record(ctx, duration, attrs...) }) } diff --git a/internal/app/pubsubapp/consumer_handle.go b/internal/app/pubsubapp/consumer_handle.go index 557b556..e83ee66 100644 --- a/internal/app/pubsubapp/consumer_handle.go +++ b/internal/app/pubsubapp/consumer_handle.go @@ -5,10 +5,13 @@ import ( "fmt" "time" + "github.com/IsaacDSC/gqueue/internal/cfg" "github.com/IsaacDSC/gqueue/internal/domain" "github.com/IsaacDSC/gqueue/internal/notifyopt" "github.com/IsaacDSC/gqueue/pkg/asyncadapter" "github.com/IsaacDSC/gqueue/pkg/ctxlogger" + "github.com/IsaacDSC/gqueue/pkg/telemetry" + "go.opentelemetry.io/otel/attribute" ) type Fetcher interface { @@ -51,12 +54,36 @@ func GetRequestHandle(fetch Fetcher, insights ConsumerInsights) asyncadapter.Han headers := payload.mergeHeaders(payload.Consumer.Headers) if err := fetch.Notify(ctx, payload.Data, headers, payload.Consumer, notifyopt.HighThroughput); err != nil { insertInsights(ctx, payload, started, false) + recordDuration(ctx, started, payload, err) return fmt.Errorf("fetch consumer: %w", err) } insertInsights(ctx, payload, started, true) + recordDuration(ctx, started, payload, nil) return nil }, } } + +func recordDuration(ctx context.Context, started time.Time, payload RequestPayload, err error) { + attrs := []attribute.KeyValue{ + attribute.String("consumer.app_name", cfg.PUBSUB_APP_NAME), + attribute.String("consumer.base_url", payload.Consumer.BaseUrl), + attribute.String("consumer.path", payload.Consumer.Path), + attribute.String("consumer.service_name", payload.Consumer.ServiceName), + } + + if err != nil { + attrs = append(attrs, attribute.Bool("success", false)) + attrs = append(attrs, attribute.String("error", err.Error())) + } else { + attrs = append(attrs, attribute.Bool("success", true)) + } + + duration := time.Since(started).Seconds() + telemetry.PubSubConsumerDuration.Record( + ctx, duration, + attrs..., + ) +} diff --git a/internal/app/pubsubapp/publisher_handle_http.go b/internal/app/pubsubapp/publisher_handle_http.go index 1fa99ed..f8fa86b 100644 --- a/internal/app/pubsubapp/publisher_handle_http.go +++ b/internal/app/pubsubapp/publisher_handle_http.go @@ -12,7 +12,6 @@ import ( "github.com/IsaacDSC/gqueue/pkg/ctxlogger" "github.com/IsaacDSC/gqueue/pkg/httpadapter" "github.com/IsaacDSC/gqueue/pkg/pubadapter" - "github.com/IsaacDSC/gqueue/pkg/telemetry" "github.com/IsaacDSC/gqueue/pkg/topicutils" ) @@ -72,14 +71,6 @@ func PublisherEvent( ctx := r.Context() l := ctxlogger.GetLogger(ctx) - if meter := telemetry.MeterFromContext(ctx); meter != nil { - if counter, err := meter.Int64Counter("task_publisher_requests_total"); err == nil { - counter.Add(ctx, 1) - } - }else{ - l.Warn("meter not found in context") - } - var payload InternalPayload defer r.Body.Close() diff --git a/internal/app/taskapp/consumer_handle.go b/internal/app/taskapp/consumer_handle.go index 7350938..4fde6a2 100644 --- a/internal/app/taskapp/consumer_handle.go +++ b/internal/app/taskapp/consumer_handle.go @@ -5,10 +5,13 @@ import ( "fmt" "time" + "github.com/IsaacDSC/gqueue/internal/cfg" "github.com/IsaacDSC/gqueue/internal/domain" "github.com/IsaacDSC/gqueue/internal/notifyopt" "github.com/IsaacDSC/gqueue/pkg/asyncadapter" "github.com/IsaacDSC/gqueue/pkg/ctxlogger" + "github.com/IsaacDSC/gqueue/pkg/telemetry" + "go.opentelemetry.io/otel/attribute" ) type Fetcher interface { @@ -55,12 +58,36 @@ func GetRequestHandle(fetch Fetcher, insights ConsumerInsights) asyncadapter.Han headers := payload.mergeHeaders(payload.Consumer.Headers) if err := fetch.Notify(ctx, payload.Data, headers, payload.Consumer, notifyopt.LongRunning); err != nil { insertInsights(ctx, payload, started, false) + recordDuration(ctx, started, payload, err) return fmt.Errorf("fetch consumer: %w", err) } insertInsights(ctx, payload, started, true) + recordDuration(ctx, started, payload, nil) return nil }, } } + +func recordDuration(ctx context.Context, started time.Time, payload RequestPayload, err error) { + attrs := []attribute.KeyValue{ + attribute.String("consumer.app_name", cfg.TASK_APP_NAME), + attribute.String("consumer.base_url", payload.Consumer.BaseUrl), + attribute.String("consumer.path", payload.Consumer.Path), + attribute.String("consumer.service_name", payload.Consumer.ServiceName), + } + + if err != nil { + attrs = append(attrs, attribute.Bool("success", false)) + attrs = append(attrs, attribute.String("error", err.Error())) + } else { + attrs = append(attrs, attribute.Bool("success", true)) + } + + duration := time.Since(started).Seconds() + telemetry.PubSubConsumerDuration.Record( + ctx, duration, + attrs..., + ) +} diff --git a/internal/app/taskapp/publisher_handle_http.go b/internal/app/taskapp/publisher_handle_http.go index 75a8691..12fb5ee 100644 --- a/internal/app/taskapp/publisher_handle_http.go +++ b/internal/app/taskapp/publisher_handle_http.go @@ -12,7 +12,6 @@ import ( "github.com/IsaacDSC/gqueue/pkg/ctxlogger" "github.com/IsaacDSC/gqueue/pkg/httpadapter" "github.com/IsaacDSC/gqueue/pkg/pubadapter" - "github.com/IsaacDSC/gqueue/pkg/telemetry" "github.com/IsaacDSC/gqueue/pkg/topicutils" ) @@ -84,14 +83,6 @@ func PublisherEvent( ctx := r.Context() l := ctxlogger.GetLogger(ctx) - if meter := telemetry.MeterFromContext(ctx); meter != nil { - if counter, err := meter.Int64Counter("task_publisher_requests_total"); err == nil { - counter.Add(ctx, 1) - } - } else { - l.Warn("meter not found in context") - } - var payload InternalPayload defer r.Body.Close() diff --git a/internal/cfg/env.go b/internal/cfg/env.go index dac2356..9ef6e38 100644 --- a/internal/cfg/env.go +++ b/internal/cfg/env.go @@ -60,7 +60,7 @@ type Config struct { InternalServiceName string `env:"INTERNAL_SERVICE_NAME"` PubsubApiPort ServerPort `env:"PUBSUB_API_PORT" env-default:"8082"` - TaskApiPort ServerPort `env:"TASK_API_PORT" env-default:"8082"` + TaskApiPort ServerPort `env:"TASK_API_PORT" env-default:"8083"` BackofficeApiPort ServerPort `env:"BACKOFFICE_API_PORT" env-default:"8081"` ShutdownTimeout time.Duration `env:"SHUTDOWN_TIMEOUT" env-default:"30s"` //TODO: porque não está sendo usado? diff --git a/internal/fetcher/notification.go b/internal/fetcher/notification.go index 81f76b6..b0e76e2 100644 --- a/internal/fetcher/notification.go +++ b/internal/fetcher/notification.go @@ -6,11 +6,14 @@ import ( "encoding/json" "fmt" "net/http" + "time" "github.com/IsaacDSC/clienthttp" "github.com/IsaacDSC/gqueue/internal/domain" "github.com/IsaacDSC/gqueue/internal/notifyopt" "github.com/IsaacDSC/gqueue/pkg/httpclient" + "github.com/IsaacDSC/gqueue/pkg/telemetry" + "go.opentelemetry.io/otel/attribute" ) type Notification struct{} @@ -44,6 +47,7 @@ func (n Notification) NotifyScheduler(ctx context.Context, url string, data any, } func fetch(ctx context.Context, url string, data any, headers map[string]string, settings ...clienthttp.Option) error { + start := time.Now() payload, err := json.Marshal(data) if err != nil { return fmt.Errorf("marshal data: %w", err) @@ -70,6 +74,16 @@ func fetch(ctx context.Context, url string, data any, headers map[string]string, } defer resp.Body.Close() + attrs := []attribute.KeyValue{ + attribute.String("http.method", req.Method), + attribute.String("http.url", req.URL.String()), + attribute.Int("http.status_code", resp.StatusCode), + } + + duration := time.Since(start).Seconds() + telemetry.HTTPClientRequests.Increment(ctx, attrs...) + telemetry.HTTPClientRequestDuration.Record(ctx, duration, attrs...) + if resp.StatusCode > 299 { return fmt.Errorf("unexpected status code: %d", resp.StatusCode) } diff --git a/internal/interstore/mem_store.go b/internal/interstore/mem_store.go index 033daad..35f58b6 100644 --- a/internal/interstore/mem_store.go +++ b/internal/interstore/mem_store.go @@ -8,6 +8,8 @@ import ( "github.com/IsaacDSC/gqueue/internal/domain" "github.com/IsaacDSC/gqueue/pkg/ctxlogger" + "github.com/IsaacDSC/gqueue/pkg/telemetry" + "go.opentelemetry.io/otel/attribute" ) type PersistentStore interface { @@ -52,6 +54,13 @@ func (ms *MemStore) GetEvent(ctx context.Context, eventName string) (domain.Even event, exists := eventsMap[eventName] if !exists { l.Warn("Event not found", "event_name", eventName, "tag", ms.tag) + + telemetry.MemStoreEventNotFound.Increment( + ctx, + attribute.String("event_name", eventName), + attribute.String("error", domain.EventNotFound.Error()), + ) + return domain.Event{}, domain.EventNotFound } diff --git a/pkg/asyncadapter/gbubsub_adapter.go b/pkg/asyncadapter/gbubsub_adapter.go index d792337..f6b9416 100644 --- a/pkg/asyncadapter/gbubsub_adapter.go +++ b/pkg/asyncadapter/gbubsub_adapter.go @@ -9,7 +9,9 @@ import ( "github.com/IsaacDSC/gqueue/internal/domain" "github.com/IsaacDSC/gqueue/pkg/gpubsub" "github.com/IsaacDSC/gqueue/pkg/pubadapter" + "github.com/IsaacDSC/gqueue/pkg/telemetry" "github.com/IsaacDSC/gqueue/pkg/topicutils" + "go.opentelemetry.io/otel/attribute" ) func (h Handle[T]) ToGPubSubHandler(pub pubadapter.GenericPublisher) gpubsub.Handle { @@ -26,6 +28,7 @@ func (h Handle[T]) ToGPubSubHandler(pub pubadapter.GenericPublisher) gpubsub.Han retryable := func(ctx context.Context, msg *pubsub.Message) { defer msg.Ack() + topic := msg.Attributes["topic"] strRetryCount, ok := msg.Attributes["retry_count"] if !ok { @@ -45,13 +48,15 @@ func (h Handle[T]) ToGPubSubHandler(pub pubadapter.GenericPublisher) gpubsub.Han } if retryCount >= maxRetryAttempts { + telemetry.PubSubConsumerArchived.Increment(ctx, attribute.String("topic", topic)) archivedMsg(ctx, msg) return } + telemetry.PubSubConsumerRetries.Increment(ctx, attribute.String("topic", topic)) + retryCount++ msg.Attributes["retry_count"] = strconv.Itoa(retryCount) - topic := msg.Attributes["topic"] // Wait respecting the context select { diff --git a/pkg/pubadapter/pub_sub_asynq.go b/pkg/pubadapter/pub_sub_asynq.go index d57d278..59524fd 100644 --- a/pkg/pubadapter/pub_sub_asynq.go +++ b/pkg/pubadapter/pub_sub_asynq.go @@ -7,7 +7,9 @@ import ( "time" "github.com/IsaacDSC/gqueue/pkg/ctxlogger" + "github.com/IsaacDSC/gqueue/pkg/telemetry" "github.com/hibiken/asynq" + "go.opentelemetry.io/otel/attribute" ) type Task struct { @@ -37,10 +39,16 @@ func (t *Task) Publish(ctx context.Context, eventName string, payload any, opts task := asynq.NewTask(eventName, p) info, err := t.client.Enqueue(task, definedOpts...) if err != nil { + telemetry.TaskPublisherRequests.Increment( + ctx, + attribute.String("event_name", eventName), + attribute.String("error", err.Error()), + ) + return fmt.Errorf("could not schedule task: %v", err) } - l.Info("enqueued task", "id", info.ID, "queue", info.Queue) + l.Debug("enqueued task", "id", info.ID, "queue", info.Queue) return nil } diff --git a/pkg/pubadapter/pub_sub_google.go b/pkg/pubadapter/pub_sub_google.go index bb8c370..ddab9e0 100644 --- a/pkg/pubadapter/pub_sub_google.go +++ b/pkg/pubadapter/pub_sub_google.go @@ -7,6 +7,8 @@ import ( "cloud.google.com/go/pubsub" "github.com/IsaacDSC/gqueue/pkg/ctxlogger" + "github.com/IsaacDSC/gqueue/pkg/telemetry" + "go.opentelemetry.io/otel/attribute" ) type PubSubGoogle struct { @@ -21,7 +23,6 @@ func NewPubSubGoogle(client *pubsub.Client) *PubSubGoogle { func (p *PubSubGoogle) Publish(ctx context.Context, topicName string, payload any, opts Opts) error { l := ctxlogger.GetLogger(ctx) - l.Info("[*] Publisher msg to topic", "topic", topicName) bytesPayload, err := json.Marshal(payload) if err != nil { @@ -44,6 +45,12 @@ func (p *PubSubGoogle) Publish(ctx context.Context, topicName string, payload an id, err := result.Get(ctx) if err != nil { + telemetry.PubSubPublisherRequests.Increment( + ctx, + attribute.String("topic", topicName), + attribute.String("error", err.Error()), + ) + return fmt.Errorf("could not publish message: %v", err) } diff --git a/pkg/telemetry/context.go b/pkg/telemetry/context.go index 2a7965e..29fcb1d 100644 --- a/pkg/telemetry/context.go +++ b/pkg/telemetry/context.go @@ -3,6 +3,7 @@ package telemetry import ( "context" + "github.com/IsaacDSC/gqueue/pkg/ctxlogger" "go.opentelemetry.io/otel/metric" ) @@ -17,7 +18,10 @@ func WithMeter(ctx context.Context, m metric.Meter) context.Context { // MeterFromContext retrieves the Meter from the context, or a default Meter if absent. func MeterFromContext(ctx context.Context) metric.Meter { + l := ctxlogger.GetLogger(ctx) + if ctx == nil { + l.Warn("context is nil when getting meter from context") return Meter("default") } @@ -25,5 +29,6 @@ func MeterFromContext(ctx context.Context) metric.Meter { return m } + l.Warn("meter not found in context") return Meter("default") } diff --git a/pkg/telemetry/metrics.go b/pkg/telemetry/metrics.go new file mode 100644 index 0000000..ef507c7 --- /dev/null +++ b/pkg/telemetry/metrics.go @@ -0,0 +1,55 @@ +package telemetry + +import ( + "context" + + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/metric" +) + +type Metric struct { + Name string + Description string +} + +var ( + // Mem Store + MemStoreEventNotFound = Metric{Name: "mem_store_event_not_found_total", Description: "Total of events not found in the memory store"} + // HTTP Server + HTTPServerRequests = Metric{Name: "http_server_requests_total", Description: "Total of requests to the HTTP server"} + HTTPServerRequestDuration = Metric{Name: "http_server_request_duration_seconds", Description: "Duration of requests to the HTTP server"} + // HTTP Client + HTTPClientRequests = Metric{Name: "http_client_requests_total", Description: "Total of requests to the HTTP client"} + HTTPClientRequestDuration = Metric{Name: "http_client_request_duration_seconds", Description: "Duration of requests to the HTTP client"} + // PubSub + PubSubPublisherRequests = Metric{Name: "pubsub_publisher_requests_total", Description: "Total of requests to the pubsub publisher"} + PubSubConsumerRetries = Metric{Name: "pubsub_consumer_retries_total", Description: "Total of retries for a consumer"} + PubSubConsumerArchived = Metric{Name: "pubsub_consumer_archived_total", Description: "Total of archived messages for a consumer"} + PubSubConsumerDuration = Metric{Name: "pubsub_consumer_duration_seconds", Description: "Duration of a consumer"} + // Task + TaskPublisherRequests = Metric{Name: "task_publisher_requests_total", Description: "Total of requests to the task publisher"} + TaskConsumerRetries = Metric{Name: "task_consumer_retries_total", Description: "Total of retries for a consumer"} + TaskConsumerArchived = Metric{Name: "task_consumer_archived_total", Description: "Total of archived messages for a consumer"} + TaskConsumerDuration = Metric{Name: "task_consumer_duration_seconds", Description: "Duration of a consumer"} +) + +func (m Metric) Increment(ctx context.Context, attrs ...attribute.KeyValue) { + meter := MeterFromContext(ctx) + if counter, err := meter.Int64Counter(m.Name, metric.WithDescription(m.Description)); err == nil { + counter.Add(ctx, 1, metric.WithAttributes(attrs...)) + } +} + +func (m Metric) Count(ctx context.Context, value int64, attrs ...attribute.KeyValue) { + meter := MeterFromContext(ctx) + if counter, err := meter.Int64Counter(m.Name, metric.WithDescription(m.Description)); err == nil { + counter.Add(ctx, value, metric.WithAttributes(attrs...)) + } +} + +func (m Metric) Record(ctx context.Context, value float64, attrs ...attribute.KeyValue) { + meter := MeterFromContext(ctx) + if histogram, err := meter.Float64Histogram(m.Name, metric.WithDescription(m.Description)); err == nil { + histogram.Record(ctx, value, metric.WithAttributes(attrs...)) + } +} From 192409c7bd0556289b743d52f3d06401c701cbf8 Mon Sep 17 00:00:00 2001 From: IsaacDSC Date: Wed, 4 Mar 2026 09:13:23 -0300 Subject: [PATCH 3/9] feat: add publishedAt field and consumer lag telemetry - Introduced a new `PublishedAt` field in the `RequestPayload` struct to track message publication time. - Implemented consumer lag telemetry to measure the time difference between message publication and processing start. - Enhanced tests to validate the handling of the new `PublishedAt` field and its impact on consumer lag metrics. - Updated telemetry metrics to include `PubSubConsumerLagSeconds` for improved observability of processing delays. --- internal/app/pubsubapp/consumer_handle.go | 8 ++++ .../app/pubsubapp/consumer_handle_test.go | 39 +++++++++++++++++++ .../app/pubsubapp/publisher_handle_http.go | 25 ++++++++---- pkg/asyncadapter/gbubsub_adapter.go | 2 +- pkg/telemetry/metrics.go | 9 +++-- 5 files changed, 70 insertions(+), 13 deletions(-) diff --git a/internal/app/pubsubapp/consumer_handle.go b/internal/app/pubsubapp/consumer_handle.go index e83ee66..55188ed 100644 --- a/internal/app/pubsubapp/consumer_handle.go +++ b/internal/app/pubsubapp/consumer_handle.go @@ -11,6 +11,7 @@ import ( "github.com/IsaacDSC/gqueue/pkg/asyncadapter" "github.com/IsaacDSC/gqueue/pkg/ctxlogger" "github.com/IsaacDSC/gqueue/pkg/telemetry" + "github.com/IsaacDSC/gqueue/pkg/topicutils" "go.opentelemetry.io/otel/attribute" ) @@ -58,6 +59,13 @@ func GetRequestHandle(fetch Fetcher, insights ConsumerInsights) asyncadapter.Han return fmt.Errorf("fetch consumer: %w", err) } + publishedTime := time.UnixMilli(payload.PublishedAt) + lag := started.Sub(publishedTime).Seconds() + topic := topicutils.BuildTopicName(domain.ProjectID, domain.EventQueueRequestToExternal) + telemetry.PubSubConsumerLagSeconds.Record(ctx, lag, + attribute.String("topic", topic), + attribute.String("consumer.service_name", payload.Consumer.ServiceName)) + insertInsights(ctx, payload, started, true) recordDuration(ctx, started, payload, nil) diff --git a/internal/app/pubsubapp/consumer_handle_test.go b/internal/app/pubsubapp/consumer_handle_test.go index 027408d..74abbb8 100644 --- a/internal/app/pubsubapp/consumer_handle_test.go +++ b/internal/app/pubsubapp/consumer_handle_test.go @@ -7,6 +7,7 @@ import ( "net/http/httptest" "os" "testing" + "time" "github.com/IsaacDSC/gqueue/internal/app/pubsubapp" "github.com/IsaacDSC/gqueue/internal/domain" @@ -173,6 +174,44 @@ func TestGetRequestHandle_Handler_InvalidPayload(t *testing.T) { assert.Contains(t, err.Error(), "unmarshal payload:") } +func TestGetRequestHandle_Handler_WithPublishedAtInBody_RecordsLag(t *testing.T) { + testServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + })) + defer testServer.Close() + + // published_at 2 seconds in the past (Unix ms) in the body + publishedAtMs := time.Now().Add(-2 * time.Second).UnixMilli() + payload := pubsubapp.RequestPayload{ + EventName: "user.created", + PublishedAt: publishedAtMs, + Consumer: domain.Consumer{ + ServiceName: "user-service", + BaseUrl: testServer.URL, + Path: "/webhook", + Headers: map[string]string{}, + }, + Data: map[string]any{"key": "value"}, + } + taskPayload, err := json.Marshal(payload) + require.NoError(t, err) + + ctrl := gomock.NewController(t) + defer ctrl.Finish() + mockFetcher := mockpubsubapp.NewMockFetcher(ctrl) + mockInsights := mockpubsubapp.NewMockConsumerInsights(ctrl) + mockFetcher.EXPECT(). + Notify(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), notifyopt.HighThroughput). + Return(nil).Times(1) + mockInsights.EXPECT().Consumed(gomock.Any(), gomock.Any()).Return(nil).Times(1) + + handle := pubsubapp.GetRequestHandle(mockFetcher, mockInsights) + asyncCtx := asyncadapter.NewAsyncCtx[pubsubapp.RequestPayload](context.Background(), taskPayload) + + err = handle.Handler(asyncCtx) + assert.NoError(t, err) +} + func TestRequestPayload_mergeHeaders_Integration(t *testing.T) { tests := []struct { name string diff --git a/internal/app/pubsubapp/publisher_handle_http.go b/internal/app/pubsubapp/publisher_handle_http.go index f8fa86b..792ff0d 100644 --- a/internal/app/pubsubapp/publisher_handle_http.go +++ b/internal/app/pubsubapp/publisher_handle_http.go @@ -24,10 +24,11 @@ type Store interface { } type RequestPayload struct { - EventName string `json:"event_name"` - Consumer domain.Consumer `json:"consumer"` - Data map[string]any `json:"data"` - Headers map[string]string `json:"headers,omitempty"` + EventName string `json:"event_name"` + Consumer domain.Consumer `json:"consumer"` + Data map[string]any `json:"data"` + Headers map[string]string `json:"headers,omitempty"` + PublishedAt int64 `json:"published_at,omitempty"` } func (p RequestPayload) mergeHeaders(headers map[string]string) map[string]string { @@ -109,10 +110,12 @@ func PublisherEvent( config := event.Option.ToAsynqOptions() for _, consumer := range event.Consumers { + nowMs := time.Now().UnixMilli() input := RequestPayload{ - EventName: event.Name, - Data: payload.Data, - Headers: payload.Metadata.Headers, + EventName: event.Name, + Data: payload.Data, + Headers: payload.Metadata.Headers, + PublishedAt: nowMs, Consumer: domain.Consumer{ ServiceName: consumer.ServiceName, BaseUrl: consumer.BaseUrl, @@ -122,7 +125,13 @@ func PublisherEvent( } topic := topicutils.BuildTopicName(domain.ProjectID, domain.EventQueueRequestToExternal) - opts := pubadapter.Opts{Attributes: make(map[string]string), AsynqOpts: config} + opts := pubadapter.Opts{ + Attributes: map[string]string{ + "topic": topic, + "max_retries": "1", + }, + AsynqOpts: config, + } if err = adaptpub.Publish(ctx, topic, input, opts); err != nil { err = fmt.Errorf("publish event: %w", err) l.Error("failed to publish event", "error", err.Error()) diff --git a/pkg/asyncadapter/gbubsub_adapter.go b/pkg/asyncadapter/gbubsub_adapter.go index f6b9416..6bdb845 100644 --- a/pkg/asyncadapter/gbubsub_adapter.go +++ b/pkg/asyncadapter/gbubsub_adapter.go @@ -48,7 +48,7 @@ func (h Handle[T]) ToGPubSubHandler(pub pubadapter.GenericPublisher) gpubsub.Han } if retryCount >= maxRetryAttempts { - telemetry.PubSubConsumerArchived.Increment(ctx, attribute.String("topic", topic)) + telemetry.PubSubConsumerDlq.Increment(ctx, attribute.String("topic", topic)) archivedMsg(ctx, msg) return } diff --git a/pkg/telemetry/metrics.go b/pkg/telemetry/metrics.go index ef507c7..05a9a8b 100644 --- a/pkg/telemetry/metrics.go +++ b/pkg/telemetry/metrics.go @@ -22,10 +22,11 @@ var ( HTTPClientRequests = Metric{Name: "http_client_requests_total", Description: "Total of requests to the HTTP client"} HTTPClientRequestDuration = Metric{Name: "http_client_request_duration_seconds", Description: "Duration of requests to the HTTP client"} // PubSub - PubSubPublisherRequests = Metric{Name: "pubsub_publisher_requests_total", Description: "Total of requests to the pubsub publisher"} - PubSubConsumerRetries = Metric{Name: "pubsub_consumer_retries_total", Description: "Total of retries for a consumer"} - PubSubConsumerArchived = Metric{Name: "pubsub_consumer_archived_total", Description: "Total of archived messages for a consumer"} - PubSubConsumerDuration = Metric{Name: "pubsub_consumer_duration_seconds", Description: "Duration of a consumer"} + PubSubPublisherRequests = Metric{Name: "pubsub_publisher_requests_total", Description: "Total of requests to the pubsub publisher"} + PubSubConsumerRetries = Metric{Name: "pubsub_consumer_retries_total", Description: "Total of retries for a consumer"} + PubSubConsumerDlq = Metric{Name: "pubsub_consumer_dlq_total", Description: "Total of archived messages for a consumer"} + PubSubConsumerDuration = Metric{Name: "pubsub_consumer_duration_seconds", Description: "Duration of a consumer"} + PubSubConsumerLagSeconds = Metric{Name: "pubsub_consumer_lag_seconds", Description: "Time in seconds between message publish and consumer processing start"} // Task TaskPublisherRequests = Metric{Name: "task_publisher_requests_total", Description: "Total of requests to the task publisher"} TaskConsumerRetries = Metric{Name: "task_consumer_retries_total", Description: "Total of retries for a consumer"} From e5d4cd0bb68658f6d0b1283eae4bfa64c42bd43f Mon Sep 17 00:00:00 2001 From: IsaacDSC Date: Fri, 6 Mar 2026 18:07:48 -0300 Subject: [PATCH 4/9] feat: enhance telemetry metrics for task processing and memory store synchronization - Added telemetry metrics for task consumer processing, including total processing, success, and failure counts. - Integrated memory store activity duration tracking to monitor refresh performance. - Updated middleware to utilize new metrics for improved observability of task handling. - Refactored logging to include more detailed context during memory store synchronization. --- cmd/setup/memstore/task_sync.go | 5 +++ cmd/setup/middleware/middleware.go | 25 +++++++++++++- cmd/setup/task/consumer.go | 1 + internal/app/taskapp/consumer_handle.go | 3 +- internal/app/taskapp/publisher_handle_http.go | 9 ++--- internal/fetcher/notification.go | 9 ++--- internal/notifyopt/notify_opts.go | 4 +++ pkg/logs/slog.go | 2 +- pkg/telemetry/metrics.go | 33 +++++++++++++------ 9 files changed, 69 insertions(+), 22 deletions(-) diff --git a/cmd/setup/memstore/task_sync.go b/cmd/setup/memstore/task_sync.go index d3f3073..9b9b814 100644 --- a/cmd/setup/memstore/task_sync.go +++ b/cmd/setup/memstore/task_sync.go @@ -6,6 +6,7 @@ import ( "github.com/IsaacDSC/gqueue/internal/interstore" "github.com/IsaacDSC/gqueue/pkg/ctxlogger" + "github.com/IsaacDSC/gqueue/pkg/telemetry" ) func SyncMemStore(ctx context.Context, memStore *interstore.MemStore) { @@ -14,11 +15,15 @@ func SyncMemStore(ctx context.Context, memStore *interstore.MemStore) { for { select { case <-trigger.C: + start := time.Now() if err := memStore.LoadInMemStore(ctx); err != nil { l.Error("Error refreshing mem store with events from persistent store", "error", err) continue } + duration := time.Since(start).Seconds() + telemetry.MemActivityDuration.Record(ctx, duration) + l.Debug("Executed periodic refresh of mem store with events from persistent store", "scope", "pubsub") case <-ctx.Done(): trigger.Stop() diff --git a/cmd/setup/middleware/middleware.go b/cmd/setup/middleware/middleware.go index 13182ff..10ed332 100644 --- a/cmd/setup/middleware/middleware.go +++ b/cmd/setup/middleware/middleware.go @@ -130,6 +130,29 @@ func AsynqLogger(h asynq.Handler) asynq.Handler { }) } +func AsynqMetrics(next asynq.Handler) asynq.Handler { + return asynq.HandlerFunc(func(ctx context.Context, t *asynq.Task) error { + meter := telemetry.Meter("task-consumer") + ctx = telemetry.WithMeter(ctx, meter) + + attrs := []attribute.KeyValue{ + attribute.String("task.event_name", t.Type()), + } + + telemetry.TaskConsumerTotalProcessing.Increment(ctx, attrs...) + defer telemetry.TaskConsumerTotalProcessing.Decrement(ctx, attrs...) + + if err := next.ProcessTask(ctx, t); err != nil { + telemetry.TaskConsumerTotalFailure.Count(ctx, 1, attrs...) + return err + } + + telemetry.TaskConsumerTotalSuccess.Count(ctx, 1, attrs...) + + return nil + }) +} + func LoggerMiddleware(next http.Handler) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") @@ -188,7 +211,7 @@ func MetricsMiddleware(serviceName string, next http.Handler) http.Handler { attrs := []attribute.KeyValue{ attribute.String("http.method", r.Method), attribute.String("http.route", path), - attribute.Int("http.status_code", rec.statusCode), + attribute.Int("http.response_code", rec.statusCode), attribute.String("service.name", serviceName), } diff --git a/cmd/setup/task/consumer.go b/cmd/setup/task/consumer.go index 7858058..10e96da 100644 --- a/cmd/setup/task/consumer.go +++ b/cmd/setup/task/consumer.go @@ -17,6 +17,7 @@ func (s *Service) consumer(ctx context.Context, env cfg.Config, asynqCfg asynq.C mux := asynq.NewServeMux() mux.Use(middleware.AsynqLogger) + mux.Use(middleware.AsynqMetrics) events := []asynqsvc.AsynqHandle{ taskapp.GetRequestHandle(s.fetch, s.insightsStore).ToAsynqHandler(), diff --git a/internal/app/taskapp/consumer_handle.go b/internal/app/taskapp/consumer_handle.go index 4fde6a2..cbee2ad 100644 --- a/internal/app/taskapp/consumer_handle.go +++ b/internal/app/taskapp/consumer_handle.go @@ -64,7 +64,6 @@ func GetRequestHandle(fetch Fetcher, insights ConsumerInsights) asyncadapter.Han insertInsights(ctx, payload, started, true) recordDuration(ctx, started, payload, nil) - return nil }, } @@ -86,7 +85,7 @@ func recordDuration(ctx context.Context, started time.Time, payload RequestPaylo } duration := time.Since(started).Seconds() - telemetry.PubSubConsumerDuration.Record( + telemetry.TaskConsumerDuration.Record( ctx, duration, attrs..., ) diff --git a/internal/app/taskapp/publisher_handle_http.go b/internal/app/taskapp/publisher_handle_http.go index 12fb5ee..9906046 100644 --- a/internal/app/taskapp/publisher_handle_http.go +++ b/internal/app/taskapp/publisher_handle_http.go @@ -24,10 +24,11 @@ type Store interface { } type RequestPayload struct { - EventName string `json:"event_name"` - Consumer domain.Consumer `json:"consumer"` - Data map[string]any `json:"data"` - Headers map[string]string `json:"headers,omitempty"` + EventName string `json:"event_name"` + Consumer domain.Consumer `json:"consumer"` + Data map[string]any `json:"data"` + Headers map[string]string `json:"headers,omitempty"` + PublishedAt int64 `json:"published_at,omitempty"` } func (p RequestPayload) Validate() error { diff --git a/internal/fetcher/notification.go b/internal/fetcher/notification.go index b0e76e2..941b2b8 100644 --- a/internal/fetcher/notification.go +++ b/internal/fetcher/notification.go @@ -35,18 +35,18 @@ func (n Notification) Notify(ctx context.Context, data map[string]any, headers m settings = append(settings, httpclient.HighThroughputSettings()...) } - return fetch(ctx, url, data, headers, settings...) + return fetch(ctx, url, data, headers, opt, settings...) } func (n Notification) NotifyConsumer(ctx context.Context, url string, data map[string]any, headers map[string]string) error { - return fetch(ctx, url, data, headers) + return fetch(ctx, url, data, headers, notifyopt.LongRunning) } func (n Notification) NotifyScheduler(ctx context.Context, url string, data any, headers map[string]string) error { - return fetch(ctx, url, data, headers) + return fetch(ctx, url, data, headers, notifyopt.LongRunning) } -func fetch(ctx context.Context, url string, data any, headers map[string]string, settings ...clienthttp.Option) error { +func fetch(ctx context.Context, url string, data any, headers map[string]string, opt notifyopt.Kind, settings ...clienthttp.Option) error { start := time.Now() payload, err := json.Marshal(data) if err != nil { @@ -75,6 +75,7 @@ func fetch(ctx context.Context, url string, data any, headers map[string]string, defer resp.Body.Close() attrs := []attribute.KeyValue{ + attribute.String("http.service_name", opt.String()), attribute.String("http.method", req.Method), attribute.String("http.url", req.URL.String()), attribute.Int("http.status_code", resp.StatusCode), diff --git a/internal/notifyopt/notify_opts.go b/internal/notifyopt/notify_opts.go index 9d94b14..0c1dd47 100644 --- a/internal/notifyopt/notify_opts.go +++ b/internal/notifyopt/notify_opts.go @@ -2,6 +2,10 @@ package notifyopt type Kind string +func (k Kind) String() string { + return string(k) +} + const ( Default Kind = "default" HighThroughput Kind = "high_throughput" diff --git a/pkg/logs/slog.go b/pkg/logs/slog.go index 0f201a5..89a17e1 100644 --- a/pkg/logs/slog.go +++ b/pkg/logs/slog.go @@ -94,7 +94,7 @@ func (l LogLevel) GetLevel() slog.Level { // New creates a new configured logger func New(opts ...LogOption) *Logger { config := &logConfig{ - level: LevelInfo, + level: LevelDebug, output: os.Stdout, addSource: false, jsonFormat: true, // Default to JSON format diff --git a/pkg/telemetry/metrics.go b/pkg/telemetry/metrics.go index 05a9a8b..8eff846 100644 --- a/pkg/telemetry/metrics.go +++ b/pkg/telemetry/metrics.go @@ -15,8 +15,10 @@ type Metric struct { var ( // Mem Store MemStoreEventNotFound = Metric{Name: "mem_store_event_not_found_total", Description: "Total of events not found in the memory store"} + MemActivityDuration = Metric{Name: "mem_store_activity_duration", Description: "Activity of the memory store"} + // HTTP Server - HTTPServerRequests = Metric{Name: "http_server_requests_total", Description: "Total of requests to the HTTP server"} + HTTPServerRequests = Metric{Name: "http_server_requests_total", Description: "Total of requests to the HTTP server"} // Filter by http.response_code HTTPServerRequestDuration = Metric{Name: "http_server_request_duration_seconds", Description: "Duration of requests to the HTTP server"} // HTTP Client HTTPClientRequests = Metric{Name: "http_client_requests_total", Description: "Total of requests to the HTTP client"} @@ -28,23 +30,34 @@ var ( PubSubConsumerDuration = Metric{Name: "pubsub_consumer_duration_seconds", Description: "Duration of a consumer"} PubSubConsumerLagSeconds = Metric{Name: "pubsub_consumer_lag_seconds", Description: "Time in seconds between message publish and consumer processing start"} // Task - TaskPublisherRequests = Metric{Name: "task_publisher_requests_total", Description: "Total of requests to the task publisher"} - TaskConsumerRetries = Metric{Name: "task_consumer_retries_total", Description: "Total of retries for a consumer"} - TaskConsumerArchived = Metric{Name: "task_consumer_archived_total", Description: "Total of archived messages for a consumer"} - TaskConsumerDuration = Metric{Name: "task_consumer_duration_seconds", Description: "Duration of a consumer"} + TaskPublisherRequests = Metric{Name: "task_publisher_requests_total", Description: "Total of requests to the task publisher"} + TaskConsumerRetries = Metric{Name: "task_consumer_retries_total", Description: "Total of retries for a consumer"} + TaskConsumerArchived = Metric{Name: "task_consumer_archived_total", Description: "Total of archived messages for a consumer"} + TaskConsumerDuration = Metric{Name: "task_consumer_duration_seconds", Description: "Duration of a consumer"} + TaskConsumerLagSeconds = Metric{Name: "task_consumer_lag_seconds", Description: "Time in seconds between message publish and consumer processing start"} + TaskConsumerTotalProcessing = Metric{Name: "task_consumer_total_processing", Description: "Total of tasks being consumed"} // Filter by task.event_name + TaskConsumerTotalFailure = Metric{Name: "task_consumer_total_failure", Description: "Total of tasks being consumed with failure"} // Filter by task.event_name + TaskConsumerTotalSuccess = Metric{Name: "task_consumer_total_success", Description: "Total of tasks being consumed with success"} // Filter by task.event_name ) -func (m Metric) Increment(ctx context.Context, attrs ...attribute.KeyValue) { +func (m Metric) Count(ctx context.Context, value int64, attrs ...attribute.KeyValue) { meter := MeterFromContext(ctx) if counter, err := meter.Int64Counter(m.Name, metric.WithDescription(m.Description)); err == nil { - counter.Add(ctx, 1, metric.WithAttributes(attrs...)) + counter.Add(ctx, value, metric.WithAttributes(attrs...)) } } -func (m Metric) Count(ctx context.Context, value int64, attrs ...attribute.KeyValue) { +func (m Metric) Increment(ctx context.Context, attrs ...attribute.KeyValue) { meter := MeterFromContext(ctx) - if counter, err := meter.Int64Counter(m.Name, metric.WithDescription(m.Description)); err == nil { - counter.Add(ctx, value, metric.WithAttributes(attrs...)) + if upDown, err := meter.Int64UpDownCounter(m.Name, metric.WithDescription(m.Description)); err == nil { + upDown.Add(ctx, 1, metric.WithAttributes(attrs...)) + } +} + +func (m Metric) Decrement(ctx context.Context, attrs ...attribute.KeyValue) { + meter := MeterFromContext(ctx) + if upDown, err := meter.Int64UpDownCounter(m.Name, metric.WithDescription(m.Description)); err == nil { + upDown.Add(ctx, -1, metric.WithAttributes(attrs...)) } } From 6f79e6af7c95bff7dfa72124ef519fd56ef56a92 Mon Sep 17 00:00:00 2001 From: IsaacDSC Date: Fri, 6 Mar 2026 19:25:14 -0300 Subject: [PATCH 5/9] feat: introduce new Grafana dashboards for observability - Added new Grafana dashboards for backoffice, pubsub, and task services to enhance monitoring capabilities. - Removed legacy dashboard files and replaced them with updated JSON configurations for better performance and usability. - Updated deployment scripts to support the new dashboard structure and ensure proper import into Grafana. - Integrated PostgreSQL exporter for improved metrics collection related to database performance. --- backup.dashboard.grafana.json | 948 ++++++ deployment/app-pgsql/docker-compose.yaml | 24 + .../app-pgsql/prometheus/prometheus.yml | 5 + .../provisioning/dashboards/dashboards.yml | 2 +- deployment/grafana/backoffice_dashboard.json | 948 ++++++ deployment/grafana/pubsub_dashboard.json | 1166 ++++++++ deployment/grafana/task_dashboard.json | 1376 +++++++++ deployment/grafana_dashboard.json | 2557 ----------------- deployment/grafana_dashboard_temp.json | 1 - deployment/import_dashboard.sh | 178 +- 10 files changed, 4591 insertions(+), 2614 deletions(-) create mode 100644 backup.dashboard.grafana.json create mode 100644 deployment/grafana/backoffice_dashboard.json create mode 100644 deployment/grafana/pubsub_dashboard.json create mode 100644 deployment/grafana/task_dashboard.json delete mode 100644 deployment/grafana_dashboard.json delete mode 100644 deployment/grafana_dashboard_temp.json diff --git a/backup.dashboard.grafana.json b/backup.dashboard.grafana.json new file mode 100644 index 0000000..5177bab --- /dev/null +++ b/backup.dashboard.grafana.json @@ -0,0 +1,948 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 45, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "100 * sum(rate(container_cpu_usage_seconds_total{container_label_com_docker_compose_service=\"postgres\"}[5m]))", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "New panel", + "type": "timeseries" + }, + { + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 20, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "# Backoffice message visualization\n\nData visualization, monitoring activity\n", + "mode": "markdown" + }, + "pluginVersion": "12.4.0", + "title": "Scope Backoffice ", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 12 + }, + "id": 34, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "100 * sum(rate(process_cpu_seconds_total{job=\"gqueue-backoffice\"}[5m])) by (job, instance)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Backoffice Service CPU", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 12 + }, + "id": 35, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(process_resident_memory_bytes{job=\"gqueue-backoffice\"}) by (job, instance) / 1024 / 1024", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Backoffice Service Mem (Mb)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 20 + }, + "id": 36, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(go_goroutines{job=\"gqueue-backoffice\"}) by (job, instance)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Backoffice Service Goroutines", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "O que a métrica representa na prática\nSe go_threads = 18, significa que o runtime Go está usando 18 threads do SO para executar:\n\ngoroutines da aplicação\ntrabalho de GC\noperações de rede / syscalls\nscheduler interno do runtime", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 20 + }, + "id": 37, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(go_threads{job=\"gqueue-backoffice\"}) by (job, instance)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Backoffice Service Threads Go process", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 28 + }, + "id": 40, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "60 * rate(go_gc_duration_seconds_count{job=\"gqueue-backoffice\"}[1m])", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Backoffice service GC activity (min)", + "type": "timeseries" + }, + { + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 24, + "x": 0, + "y": 36 + }, + "id": 33, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "# Database visualization\n\nData visualization, monitoring activity\n", + "mode": "markdown" + }, + "pluginVersion": "12.4.0", + "title": "Scope Task ", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "Total de conexões abertas Painel stat ou timeseries", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 7, + "x": 0, + "y": 40 + }, + "id": 41, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(pg_stat_database_numbackends{job=\"postgres\", datname=\"gqueue\"})", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Total Conn ", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "Tamanho do banco em GB Painel stat", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 7, + "y": 40 + }, + "id": 42, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "pg_database_size_bytes{job=\"postgres\", datname=\"gqueue\"} / 1024 / 1024 / 1024", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "GB disc", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 5, + "x": 13, + "y": 40 + }, + "id": 43, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "100 *\nsum(rate(pg_stat_database_blks_hit{job=\"postgres\", datname=\"gqueue\"}[5m]))\n/\n(\n sum(rate(pg_stat_database_blks_hit{job=\"postgres\", datname=\"gqueue\"}[5m]))\n +\n sum(rate(pg_stat_database_blks_read{job=\"postgres\", datname=\"gqueue\"}[5m]))\n)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Cache hit ratio Painel", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 40 + }, + "id": 44, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "rate(pg_stat_database_blks_read{job=\"postgres\", datname=\"gqueue\"}[5m])", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Disc Reading", + "type": "stat" + } + ], + "preload": false, + "refresh": "5s", + "schemaVersion": 42, + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-3h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Gqueue - Backoffice Service Dashboard", + "uid": "adxctbtbackoffice", + "version": 4, + "weekStart": "" +} \ No newline at end of file diff --git a/deployment/app-pgsql/docker-compose.yaml b/deployment/app-pgsql/docker-compose.yaml index d4cef5c..2f62d43 100644 --- a/deployment/app-pgsql/docker-compose.yaml +++ b/deployment/app-pgsql/docker-compose.yaml @@ -197,6 +197,29 @@ services: - gqueue - observability + postgres-exporter: + image: prometheuscommunity/postgres-exporter:latest + container_name: postgres-exporter + environment: + - DATA_SOURCE_URI=postgres:5432/gqueue?sslmode=disable + - DATA_SOURCE_USER=idsc + - DATA_SOURCE_PASS=admin + ports: + - "9187:9187" + depends_on: + postgres: + condition: service_healthy + networks: + - app-network + restart: unless-stopped + deploy: + resources: + limits: + cpus: "0.25" + memory: 128M + profiles: + - observability + pgadmin: image: dpage/pgadmin4:latest ports: @@ -280,6 +303,7 @@ services: volumes: - grafana_data:/var/lib/grafana - ./provisioning:/etc/grafana/provisioning + - ../grafana:/etc/grafana/dashboards:ro ports: - "3000:3000" networks: diff --git a/deployment/app-pgsql/prometheus/prometheus.yml b/deployment/app-pgsql/prometheus/prometheus.yml index 20bf0c2..0cb7faf 100644 --- a/deployment/app-pgsql/prometheus/prometheus.yml +++ b/deployment/app-pgsql/prometheus/prometheus.yml @@ -24,3 +24,8 @@ scrape_configs: static_configs: - targets: ["otel-collector:9464"] + - job_name: "postgres" + metrics_path: /metrics + static_configs: + - targets: ["postgres-exporter:9187"] + diff --git a/deployment/app-pgsql/provisioning/dashboards/dashboards.yml b/deployment/app-pgsql/provisioning/dashboards/dashboards.yml index 11f7fa6..e82912b 100644 --- a/deployment/app-pgsql/provisioning/dashboards/dashboards.yml +++ b/deployment/app-pgsql/provisioning/dashboards/dashboards.yml @@ -6,5 +6,5 @@ providers: disableDeletion: false editable: true options: - path: /etc/grafana/provisioning/dashboards + path: /etc/grafana/dashboards diff --git a/deployment/grafana/backoffice_dashboard.json b/deployment/grafana/backoffice_dashboard.json new file mode 100644 index 0000000..5177bab --- /dev/null +++ b/deployment/grafana/backoffice_dashboard.json @@ -0,0 +1,948 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 45, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "100 * sum(rate(container_cpu_usage_seconds_total{container_label_com_docker_compose_service=\"postgres\"}[5m]))", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "New panel", + "type": "timeseries" + }, + { + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 20, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "# Backoffice message visualization\n\nData visualization, monitoring activity\n", + "mode": "markdown" + }, + "pluginVersion": "12.4.0", + "title": "Scope Backoffice ", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 12 + }, + "id": 34, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "100 * sum(rate(process_cpu_seconds_total{job=\"gqueue-backoffice\"}[5m])) by (job, instance)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Backoffice Service CPU", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 12 + }, + "id": 35, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(process_resident_memory_bytes{job=\"gqueue-backoffice\"}) by (job, instance) / 1024 / 1024", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Backoffice Service Mem (Mb)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 20 + }, + "id": 36, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(go_goroutines{job=\"gqueue-backoffice\"}) by (job, instance)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Backoffice Service Goroutines", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "O que a métrica representa na prática\nSe go_threads = 18, significa que o runtime Go está usando 18 threads do SO para executar:\n\ngoroutines da aplicação\ntrabalho de GC\noperações de rede / syscalls\nscheduler interno do runtime", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 20 + }, + "id": 37, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(go_threads{job=\"gqueue-backoffice\"}) by (job, instance)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Backoffice Service Threads Go process", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 28 + }, + "id": 40, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "60 * rate(go_gc_duration_seconds_count{job=\"gqueue-backoffice\"}[1m])", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Backoffice service GC activity (min)", + "type": "timeseries" + }, + { + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 24, + "x": 0, + "y": 36 + }, + "id": 33, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "# Database visualization\n\nData visualization, monitoring activity\n", + "mode": "markdown" + }, + "pluginVersion": "12.4.0", + "title": "Scope Task ", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "Total de conexões abertas Painel stat ou timeseries", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 7, + "x": 0, + "y": 40 + }, + "id": 41, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(pg_stat_database_numbackends{job=\"postgres\", datname=\"gqueue\"})", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Total Conn ", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "Tamanho do banco em GB Painel stat", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 7, + "y": 40 + }, + "id": 42, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "pg_database_size_bytes{job=\"postgres\", datname=\"gqueue\"} / 1024 / 1024 / 1024", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "GB disc", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 5, + "x": 13, + "y": 40 + }, + "id": 43, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "100 *\nsum(rate(pg_stat_database_blks_hit{job=\"postgres\", datname=\"gqueue\"}[5m]))\n/\n(\n sum(rate(pg_stat_database_blks_hit{job=\"postgres\", datname=\"gqueue\"}[5m]))\n +\n sum(rate(pg_stat_database_blks_read{job=\"postgres\", datname=\"gqueue\"}[5m]))\n)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Cache hit ratio Painel", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 40 + }, + "id": 44, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "rate(pg_stat_database_blks_read{job=\"postgres\", datname=\"gqueue\"}[5m])", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Disc Reading", + "type": "stat" + } + ], + "preload": false, + "refresh": "5s", + "schemaVersion": 42, + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-3h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Gqueue - Backoffice Service Dashboard", + "uid": "adxctbtbackoffice", + "version": 4, + "weekStart": "" +} \ No newline at end of file diff --git a/deployment/grafana/pubsub_dashboard.json b/deployment/grafana/pubsub_dashboard.json new file mode 100644 index 0000000..1eb50fd --- /dev/null +++ b/deployment/grafana/pubsub_dashboard.json @@ -0,0 +1,1166 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "panels": [ + { + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 41, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "# Publihser and Consumer Metrics\n\nData visualization, monitoring activity\n", + "mode": "markdown" + }, + "pluginVersion": "12.4.0", + "title": "Scope Pubsub ", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "P95 last 1m\n\nExemplo prático:\n- Se a query retornar `0.420` para `http_route=\"/api/v1/task\"`, significa que:\n - nos últimos 1 minuto,\n - 95% das requests dessa rota\n - responderam em até `420 ms`.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 4 + }, + "id": 21, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(\n 0.95,\n sum(rate(http_server_request_duration_seconds_bucket{http_route=\"/api/v1/pubsub\", service_name=\"pubsub\"}[1m])) by (le, http_route, service_name)\n)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Publisher Latency p95(s)", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 5, + "x": 6, + "y": 4 + }, + "id": 22, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "60 * sum(rate(http_server_request_duration_seconds_count{service_name=\"pubsub\"}[1m])) by (http_route, service_name)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Pubsub Publisher RPM ", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 5, + "x": 11, + "y": 4 + }, + "id": 23, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "60 * sum(rate(pubsub_consumer_duration_seconds_count[1m])) by (consumer_service_name, consumer_path)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Pubsub Consumer RPM ", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 4 + }, + "id": 24, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "60 * sum(rate(http_client_request_duration_seconds_count{job=\"gqueue-pubsub\"}[1m])) by (http_url, service_name, job)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "RPM - Pubsub Client HTTP Delivery ", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 12 + }, + "id": 10, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99,\n sum(rate(pubsub_consumer_lag_seconds_bucket[5m])) by (le, topic, consumer_service_name)\n) and on(topic, consumer_service_name)\nsum(rate(pubsub_consumer_lag_seconds_count[5m])) by (topic, consumer_service_name) > 0", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Pubsub Consumer Lag (s)", + "type": "timeseries" + }, + { + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 24, + "x": 0, + "y": 20 + }, + "id": 40, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "# Task memStore Metrics\n\nData visualization, monitoring activity\n", + "mode": "markdown" + }, + "pluginVersion": "12.4.0", + "title": "Scope Pubsub ", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 24 + }, + "id": 43, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "60 * sum(rate(mem_store_activity_duration_count{job=\"gqueue-pubsub\"}[1m])) by (instance)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Task MemStrore Sync per minute", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 24 + }, + "id": 44, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "1000 *\n(\n sum(rate(mem_store_activity_duration_sum{job=\"gqueue-pubsub\"}[5m])) by (job)\n /\n sum(rate(mem_store_activity_duration_count{job=\"gqueue-pubsub\"}[5m])) by (job)\n)\nand on(job)\nsum(rate(mem_store_activity_duration_count{job=\"gqueue-pubsub\"}[5m])) by (job) > 0", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Sync MemStore Time execution (Ms)", + "type": "stat" + }, + { + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 24, + "x": 0, + "y": 32 + }, + "id": 42, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "# Pubsub Service Metrics Visualization\n\nData visualization, monitoring activity\n", + "mode": "markdown" + }, + "pluginVersion": "12.4.0", + "title": "Scope Pubsub ", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 36 + }, + "id": 25, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "100 * sum(rate(process_cpu_seconds_total{job=\"gqueue-pubsub\"}[5m])) by (job, instance)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "CPU pubsub", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 36 + }, + "id": 26, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(process_resident_memory_bytes{job=\"gqueue-pubsub\"}) by (job, instance) / 1024 / 1024", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Pubsub Mem (Mb)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 44 + }, + "id": 27, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(go_goroutines{job=\"gqueue-pubsub\"}) by (job, instance)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Pubsub Goroutines", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "O que a métrica representa na prática\nSe go_threads = 18, significa que o runtime Go está usando 18 threads do SO para executar:\n\ngoroutines da aplicação\ntrabalho de GC\noperações de rede / syscalls\nscheduler interno do runtime", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 44 + }, + "id": 28, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(go_threads{job=\"gqueue-pubsub\"}) by (job, instance)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Pubsub Threads Go process", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 52 + }, + "id": 39, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "60 * rate(go_gc_duration_seconds_count{job=\"gqueue-pubsub\"}[1m])", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Pubsub service GC activity (min)", + "type": "timeseries" + } + ], + "preload": false, + "refresh": "5s", + "schemaVersion": 42, + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-3h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Gqueue - Pubsub Service Dashboard", + "uid": "adhqlpfpubsub", + "version": 3, + "weekStart": "" +} \ No newline at end of file diff --git a/deployment/grafana/task_dashboard.json b/deployment/grafana/task_dashboard.json new file mode 100644 index 0000000..093c91e --- /dev/null +++ b/deployment/grafana/task_dashboard.json @@ -0,0 +1,1376 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "panels": [ + { + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 33, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "# Tasks message visualization\n\nData visualization, monitoring activity\n", + "mode": "markdown" + }, + "pluginVersion": "12.4.0", + "title": "Scope Task ", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 4 + }, + "id": 14, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum by (task_event_name) (task_consumer_total_processing)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Processing consumer task", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 4 + }, + "id": 12, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum by (task_event_name) (task_consumer_success_total)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Success consumer task", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 4 + }, + "id": 13, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum by (task_event_name) (task_consumer_failure_total)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Failure consumer task", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "P95 last 1m\n\nExemplo prático:\n- Se a query retornar `0.420` para `http_route=\"/api/v1/task\"`, significa que:\n - nos últimos 1 minuto,\n - 95% das requests dessa rota\n - responderam em até `420 ms`.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 12 + }, + "id": 5, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(\n 0.95,\n sum(rate(http_server_request_duration_seconds_bucket{job=\"gqueue-task\", http_route=\"/api/v1/task\"}[1m])) by (le, http_route, service_name)\n)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Publisher Latency p95(s)", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "#EAB839", + "value": 2000 + }, + { + "color": "red", + "value": 3000 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 5, + "x": 6, + "y": 12 + }, + "id": 18, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "60 * sum(rate(http_server_request_duration_seconds_count{job=\"gqueue-task\"}[1m])) by (http_route, service_name)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Task Publisher RPM ", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "#EAB839", + "value": 2000 + }, + { + "color": "red", + "value": 3000 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 5, + "x": 11, + "y": 12 + }, + "id": 17, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "60 * sum(rate(task_consumer_success_total{job=\"gqueue-task\"}[1m])) by (task_event_name)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Task Consumer RPM ", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 12 + }, + "id": 16, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "60 * sum(rate(http_client_request_duration_seconds_count{job=\"gqueue-task\"}[1m])) by (http_url, service_name, job)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "RPM - Task Client HTTP Delivery ", + "type": "timeseries" + }, + { + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 24, + "x": 0, + "y": 20 + }, + "id": 15, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "# MemStore task visualization\n\nData visualization, monitoring activity\n", + "mode": "markdown" + }, + "pluginVersion": "12.4.0", + "title": "Scope Pubsub ", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 24 + }, + "id": 40, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "60 * sum(rate(mem_store_activity_duration_count{job=\"gqueue-task\"}[1m])) by (instance)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Task MemStrore Sync per minute", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 24 + }, + "id": 41, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "1000 *\n(\n sum(rate(mem_store_activity_duration_sum{job=\"gqueue-task\"}[5m])) by (job)\n /\n sum(rate(mem_store_activity_duration_count{job=\"gqueue-task\"}[5m])) by (job)\n)\nand on(job)\nsum(rate(mem_store_activity_duration_count{job=\"gqueue-task\"}[5m])) by (job) > 0", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Sync MemStore Time execution (Ms)", + "type": "stat" + }, + { + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 24, + "x": 0, + "y": 32 + }, + "id": 39, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "# Task Service Metrics Visualization\n\nData visualization, monitoring activity\n", + "mode": "markdown" + }, + "pluginVersion": "12.4.0", + "title": "Scope Pubsub ", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 36 + }, + "id": 29, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "100 * sum(rate(process_cpu_seconds_total{job=\"gqueue-task\"}[5m])) by (job, instance)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Task Service CPU", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 36 + }, + "id": 30, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(process_resident_memory_bytes{job=\"gqueue-task\"}) by (job, instance) / 1024 / 1024", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Task Service Mem (Mb)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 44 + }, + "id": 31, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(go_goroutines{job=\"gqueue-task\"}) by (job, instance)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Task Service Goroutines", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "O que a métrica representa na prática\nSe go_threads = 18, significa que o runtime Go está usando 18 threads do SO para executar:\n\ngoroutines da aplicação\ntrabalho de GC\noperações de rede / syscalls\nscheduler interno do runtime", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 44 + }, + "id": 32, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(go_threads{job=\"gqueue-task\"}) by (job, instance)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Task Service Threads Go process", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 52 + }, + "id": 38, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "60 * rate(go_gc_duration_seconds_count{job=\"gqueue-task\"}[1m])", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Task service GC activity (min)", + "type": "timeseries" + } + ], + "preload": false, + "refresh": "5s", + "schemaVersion": 42, + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Gqueue - Task Service Dashboard", + "uid": "adxctbttask", + "version": 3, + "weekStart": "" +} \ No newline at end of file diff --git a/deployment/grafana_dashboard.json b/deployment/grafana_dashboard.json deleted file mode 100644 index 09b8501..0000000 --- a/deployment/grafana_dashboard.json +++ /dev/null @@ -1,2557 +0,0 @@ -{ - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": { - "type": "grafana", - "uid": "-- Grafana --" - }, - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "editable": true, - "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "id": 0, - "links": [], - "panels": [ - { - "datasource": { - "type": "redis-datasource", - "uid": "redis-main" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "continuous-GrYlRd" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 0, - "y": 0 - }, - "id": 1, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "horizontal", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "12.2.0", - "targets": [ - { - "command": "get", - "datasource": { - "type": "redis-datasource", - "uid": "redis-main" - }, - "keyName": "gqueue:metric.insights:collector", - "query": "", - "refId": "A", - "streaming": false, - "type": "command" - } - ], - "title": "Total Published Messages", - "transformations": [ - { - "id": "extractFields", - "options": { - "delimiter": ",", - "format": "json", - "jsonPaths": [ - { - "alias": "total_published", - "path": "total_published" - } - ], - "keepTime": false, - "replace": false, - "source": "gqueue:metric.insights:collector" - } - } - ], - "type": "stat" - }, - { - "datasource": { - "type": "redis-datasource", - "uid": "redis-main" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "continuous-BlYlRd" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 6, - "y": 0 - }, - "id": 2, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "horizontal", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "12.2.0", - "targets": [ - { - "command": "get", - "datasource": { - "type": "redis-datasource", - "uid": "redis-main" - }, - "keyName": "gqueue:metric.insights:collector", - "query": "", - "refId": "A", - "streaming": false, - "type": "command" - } - ], - "title": "Total Consumed Messages", - "transformations": [ - { - "id": "extractFields", - "options": { - "delimiter": ",", - "format": "json", - "jsonPaths": [ - { - "alias": "total_consumed", - "path": "total_consumed" - } - ], - "keepTime": false, - "replace": false, - "source": "gqueue:metric.insights:collector" - } - } - ], - "type": "stat" - }, - { - "datasource": { - "type": "redis-datasource", - "uid": "redis-main" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 1 - }, - { - "color": "red", - "value": 5 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 12, - "y": 0 - }, - "id": 3, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "horizontal", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "12.2.0", - "targets": [ - { - "command": "get", - "datasource": { - "type": "redis-datasource", - "uid": "redis-main" - }, - "keyName": "gqueue:metric.insights:collector", - "query": "", - "refId": "A", - "streaming": false, - "type": "command" - } - ], - "title": "Publishing Errors", - "transformations": [ - { - "id": "extractFields", - "options": { - "delimiter": ",", - "format": "json", - "jsonPaths": [ - { - "alias": "published_errors", - "path": "total_published_with_err" - } - ], - "keepTime": false, - "replace": false, - "source": "gqueue:metric.insights:collector" - } - } - ], - "type": "stat" - }, - { - "datasource": { - "type": "redis-datasource", - "uid": "redis-main" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 1 - }, - { - "color": "red", - "value": 5 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 18, - "y": 0 - }, - "id": 4, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "horizontal", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "12.2.0", - "targets": [ - { - "command": "get", - "datasource": { - "type": "redis-datasource", - "uid": "redis-main" - }, - "keyName": "gqueue:metric.insights:collector", - "query": "", - "refId": "A", - "streaming": false, - "type": "command" - } - ], - "title": "Consumer Errors", - "transformations": [ - { - "id": "extractFields", - "options": { - "delimiter": ",", - "format": "json", - "jsonPaths": [ - { - "alias": "consumer_errors", - "path": "total_consumed_with_err" - } - ], - "keepTime": false, - "replace": false, - "source": "gqueue:metric.insights:collector" - } - } - ], - "type": "stat" - }, - { - "datasource": { - "type": "redis-datasource", - "uid": "redis-main" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisGridShow": true, - "axisLabel": "Messages/Minute", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "vis": false - }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineWidth": 2, - "pointSize": 6, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "always", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "line" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "transparent", - "value": null - }, - { - "color": "red", - "value": 10 - }, - { - "color": "yellow", - "value": 15 - }, - { - "color": "green", - "value": 20 - } - ] - }, - "unit": "rpm" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 6 - }, - "id": 5, - "options": { - "legend": { - "calcs": [ - "lastNotNull", - "mean", - "max" - ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "command": "get", - "datasource": { - "type": "redis-datasource", - "uid": "redis-main" - }, - "keyName": "gqueue:metric.insights:collector", - "query": "", - "refId": "A", - "streaming": true, - "type": "command" - } - ], - "title": "Publisher RPM (Time Series)", - "transformations": [ - { - "id": "extractFields", - "options": { - "format": "json", - "source": "gqueue:metric.insights:collector" - } - }, - { - "id": "filterFieldsByName", - "options": { - "include": { - "pattern": "rpm_publisher.*" - } - } - } - ], - "type": "timeseries", - "description": "Publisher RPM over time from array structure" - }, - { - "datasource": { - "type": "redis-datasource", - "uid": "redis-main" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisGridShow": true, - "axisLabel": "Messages/Minute", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "vis": false - }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineWidth": 2, - "pointSize": 6, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "always", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "line" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "transparent", - "value": null - }, - { - "color": "red", - "value": 10 - }, - { - "color": "yellow", - "value": 15 - }, - { - "color": "green", - "value": 20 - } - ] - }, - "unit": "rpm" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 6 - }, - "id": 6, - "options": { - "legend": { - "calcs": [ - "lastNotNull", - "mean", - "max" - ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "command": "get", - "datasource": { - "type": "redis-datasource", - "uid": "redis-main" - }, - "keyName": "gqueue:metric.insights:collector", - "query": "", - "refId": "A", - "streaming": true, - "type": "command" - } - ], - "title": "Consumer RPM (Time Series)", - "transformations": [ - { - "id": "extractFields", - "options": { - "format": "json", - "source": "gqueue:metric.insights:collector" - } - }, - { - "id": "filterFieldsByName", - "options": { - "include": { - "pattern": "rpm_consumer.*" - } - } - } - ], - "type": "timeseries", - "description": "Consumer RPM over time from array structure" - }, - { - "datasource": { - "type": "redis-datasource", - "uid": "redis-main" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 50 - }, - { - "color": "red", - "value": 100 - } - ] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 0, - "y": 14 - }, - "id": 7, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "horizontal", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "12.2.0", - "targets": [ - { - "command": "get", - "datasource": { - "type": "redis-datasource", - "uid": "redis-main" - }, - "keyName": "gqueue:metric.insights:collector", - "query": "", - "refId": "A", - "streaming": false, - "type": "command" - } - ], - "title": "Avg Publishing Duration", - "transformations": [ - { - "id": "extractFields", - "options": { - "delimiter": ",", - "format": "json", - "jsonPaths": [ - { - "alias": "durations", - "path": "segmentation_published.payment\\.processed[*].TimeDurationMs" - } - ], - "keepTime": false, - "replace": false, - "source": "gqueue:metric.insights:collector" - } - }, - { - "id": "reduce", - "options": { - "includeTimeField": false, - "mode": "reduceFields", - "reducers": [ - "mean" - ] - } - } - ], - "type": "stat" - }, - { - "datasource": { - "type": "redis-datasource", - "uid": "redis-main" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 10 - }, - { - "color": "red", - "value": 50 - } - ] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 6, - "y": 14 - }, - "id": 8, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "horizontal", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "12.2.0", - "targets": [ - { - "command": "get", - "datasource": { - "type": "redis-datasource", - "uid": "redis-main" - }, - "keyName": "gqueue:metric.insights:collector", - "query": "", - "refId": "A", - "streaming": false, - "type": "command" - } - ], - "title": "Avg Consumer Duration", - "transformations": [ - { - "id": "extractFields", - "options": { - "delimiter": ",", - "format": "json", - "jsonPaths": [ - { - "alias": "durations", - "path": "segmentation_consumed.payment\\.processed\\.consumer-1[*].TimeDurationMs" - } - ], - "keepTime": false, - "replace": false, - "source": "gqueue:metric.insights:collector" - } - }, - { - "id": "reduce", - "options": { - "includeTimeField": false, - "mode": "reduceFields", - "reducers": [ - "mean" - ] - } - } - ], - "type": "stat" - }, - { - "datasource": { - "type": "redis-datasource", - "uid": "redis-main" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "max": 100, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "red", - "value": null - }, - { - "color": "yellow", - "value": 90 - }, - { - "color": "green", - "value": 99 - } - ] - }, - "unit": "percent" - }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 12, - "y": 14 - }, - "id": 9, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "horizontal", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "12.2.0", - "targets": [ - { - "command": "get", - "datasource": { - "type": "redis-datasource", - "uid": "redis-main" - }, - "keyName": "gqueue:metric.insights:collector", - "query": "", - "refId": "A", - "streaming": false, - "type": "command" - } - ], - "title": "Publisher Success Rate", - "transformations": [ - { - "id": "extractFields", - "options": { - "delimiter": ",", - "format": "json", - "jsonPaths": [ - { - "alias": "total_published", - "path": "total_published" - }, - { - "alias": "total_errors", - "path": "total_published_with_err" - } - ], - "keepTime": false, - "replace": false, - "source": "gqueue:metric.insights:collector" - } - }, - { - "id": "calculateField", - "options": { - "alias": "success_rate", - "binary": { - "left": "total_published", - "operator": "/", - "reducer": "sum", - "right": "total_published" - }, - "mode": "binary", - "reduce": { - "reducer": "sum" - } - } - } - ], - "type": "stat" - }, - { - "datasource": { - "type": "redis-datasource", - "uid": "redis-main" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "custom": { - "align": "auto", - "cellOptions": { - "type": "auto" - }, - "inspect": false - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 18, - "y": 14 - }, - "id": 10, - "options": { - "cellHeight": "sm", - "footer": { - "countRows": false, - "fields": "", - "reducer": [ - "sum" - ], - "show": false - }, - "showHeader": true - }, - "pluginVersion": "12.2.0", - "targets": [ - { - "command": "get", - "datasource": { - "type": "redis-datasource", - "uid": "redis-main" - }, - "keyName": "gqueue:metric.insights:collector", - "query": "", - "refId": "A", - "streaming": false, - "type": "command" - } - ], - "title": "Topic Segmentation", - "transformations": [ - { - "id": "extractFields", - "options": { - "delimiter": ",", - "format": "json", - "jsonPaths": [ - { - "alias": "topic_published", - "path": "total_segmentation_published" - }, - { - "alias": "topic_consumed", - "path": "total_segmentation_consumed" - } - ], - "keepTime": false, - "replace": false, - "source": "gqueue:metric.insights:collector" - } - } - ], - "type": "table" - }, - { - "datasource": { - "type": "redis-datasource", - "uid": "redis-main" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "vis": false - } - }, - "mappings": [] - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 20 - }, - "id": 11, - "options": { - "reduceOptions": { - "values": false, - "calcs": [ - "lastNotNull" - ], - "fields": "" - }, - "pieType": "pie", - "tooltip": { - "mode": "single", - "sort": "none" - }, - "legend": { - "displayMode": "list", - "placement": "right", - "showLegend": true, - "values": [] - } - }, - "targets": [ - { - "command": "get", - "datasource": { - "type": "redis-datasource", - "uid": "redis-main" - }, - "keyName": "gqueue:metric.insights:collector", - "query": "", - "refId": "A", - "streaming": false, - "type": "command" - } - ], - "title": "Message Distribution by Topic", - "transformations": [ - { - "id": "extractFields", - "options": { - "delimiter": ",", - "format": "json", - "jsonPaths": [ - { - "alias": "topic_distribution", - "path": "total_segmentation_published" - } - ], - "keepTime": false, - "replace": false, - "source": "gqueue:metric.insights:collector" - } - } - ], - "type": "piechart" - }, - { - "datasource": { - "type": "redis-datasource", - "uid": "redis-main" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "0": { - "color": "red", - "index": 0, - "text": "UNHEALTHY" - }, - "1": { - "color": "green", - "index": 1, - "text": "HEALTHY" - } - }, - "type": "value" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "red", - "value": null - }, - { - "color": "green", - "value": 1 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 20 - }, - "id": 12, - "options": { - "colorMode": "background", - "graphMode": "none", - "justifyMode": "center", - "orientation": "horizontal", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "value_and_name", - "wideLayout": true - }, - "pluginVersion": "12.2.0", - "targets": [ - { - "command": "get", - "datasource": { - "type": "redis-datasource", - "uid": "redis-main" - }, - "keyName": "gqueue:metric.insights:collector", - "query": "", - "refId": "A", - "streaming": false, - "type": "command" - } - ], - "title": "System Health Status", - "transformations": [ - { - "id": "extractFields", - "options": { - "delimiter": ",", - "format": "json", - "jsonPaths": [ - { - "alias": "total_published", - "path": "total_published" - }, - { - "alias": "total_consumed", - "path": "total_consumed" - }, - { - "alias": "pub_errors", - "path": "total_published_with_err" - }, - { - "alias": "cons_errors", - "path": "total_consumed_with_err" - } - ], - "keepTime": false, - "replace": false, - "source": "gqueue:metric.insights:collector" - } - }, - { - "id": "calculateField", - "options": { - "alias": "health_status", - "mode": "reduceRow", - "reduce": { - "include": [ - "pub_errors", - "cons_errors" - ], - "reducer": "sum" - } - } - } - ], - "type": "stat" - }, - { - "datasource": { - "type": "redis-datasource", - "uid": "redis-main" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisGridShow": true, - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "vis": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 0, - "y": 28 - }, - "id": 13, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "command": "get", - "datasource": { - "type": "redis-datasource", - "uid": "redis-main" - }, - "keyName": "gqueue:metric.insights:collector", - "query": "", - "refId": "A", - "streaming": true, - "type": "command" - } - ], - "title": "Total Messages Published", - "transformations": [ - { - "id": "extractFields", - "options": { - "delimiter": ",", - "format": "json", - "jsonPaths": [ - { - "alias": "total_published", - "path": "total_published" - } - ], - "keepTime": true, - "replace": false, - "source": "gqueue:metric.insights:collector" - } - } - ], - "type": "timeseries" - }, - { - "datasource": { - "type": "redis-datasource", - "uid": "redis-main" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisGridShow": true, - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 20, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "vis": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 12, - "y": 28 - }, - "id": 14, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "command": "get", - "datasource": { - "type": "redis-datasource", - "uid": "redis-main" - }, - "keyName": "gqueue:metric.insights:collector", - "query": "", - "refId": "A", - "streaming": true, - "type": "command" - } - ], - "title": "Total Messages Consumed", - "transformations": [ - { - "id": "extractFields", - "options": { - "delimiter": ",", - "format": "json", - "jsonPaths": [ - { - "alias": "total_consumed", - "path": "total_consumed" - } - ], - "keepTime": true, - "replace": false, - "source": "gqueue:metric.insights:collector" - } - } - ], - "type": "timeseries" - }, - { - "datasource": { - "type": "redis-datasource", - "uid": "redis-main" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "custom": { - "align": "auto", - "cellOptions": { - "type": "auto" - }, - "inspect": false - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Metric" - }, - "properties": [ - { - "id": "custom.width", - "value": 200 - } - ] - } - ] - }, - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 34 - }, - "id": 15, - "options": { - "cellHeight": "sm", - "footer": { - "countRows": false, - "fields": "", - "reducer": [ - "sum" - ], - "show": false - }, - "showHeader": true - }, - "pluginVersion": "12.2.0", - "targets": [ - { - "command": "get", - "datasource": { - "type": "redis-datasource", - "uid": "redis-main" - }, - "keyName": "gqueue:metric.insights:collector", - "query": "", - "refId": "A", - "streaming": false, - "type": "command" - } - ], - "title": "Current Metrics Summary", - "transformations": [ - { - "id": "extractFields", - "options": { - "delimiter": ",", - "format": "json", - "jsonPaths": [ - { - "alias": "Published", - "path": "total_published" - }, - { - "alias": "Consumed", - "path": "total_consumed" - }, - { - "alias": "Pub_Errors", - "path": "total_published_with_err" - }, - { - "alias": "Cons_Errors", - "path": "total_consumed_with_err" - } - ], - "keepTime": false, - "replace": false, - "source": "gqueue:metric.insights:collector" - } - } - ], - "type": "table" - }, - { - "datasource": { - "type": "redis-datasource", - "uid": "redis-main" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "custom": { - "align": "auto", - "cellOptions": { - "type": "auto" - }, - "inspect": false - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 12, - "x": 0, - "y": 42 - }, - "id": 16, - "options": { - "cellHeight": "sm", - "footer": { - "countRows": false, - "fields": "", - "reducer": [ - "sum" - ], - "show": false - }, - "showHeader": true - }, - "pluginVersion": "12.2.0", - "targets": [ - { - "command": "get", - "datasource": { - "type": "redis-datasource", - "uid": "redis-main" - }, - "keyName": "gqueue:metric.insights:collector", - "query": "", - "refId": "A", - "streaming": false, - "type": "command" - } - ], - "title": "Publisher RPM Data (payment.processed)", - "transformations": [ - { - "id": "extractFields", - "options": { - "delimiter": ",", - "format": "json", - "jsonPaths": [ - { - "alias": "rpm_publisher_data", - "path": "rpm_publisher" - } - ], - "keepTime": false, - "replace": false, - "source": "gqueue:metric.insights:collector" - } - } - ], - "type": "table", - "description": "Shows RPM values by timestamp for payment.processed topic" - }, - { - "datasource": { - "type": "redis-datasource", - "uid": "redis-main" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "custom": { - "align": "auto", - "cellOptions": { - "type": "auto" - }, - "inspect": false - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 12, - "x": 12, - "y": 42 - }, - "id": 17, - "options": { - "cellHeight": "sm", - "footer": { - "countRows": false, - "fields": "", - "reducer": [ - "sum" - ], - "show": false - }, - "showHeader": true - }, - "pluginVersion": "12.2.0", - "targets": [ - { - "command": "get", - "datasource": { - "type": "redis-datasource", - "uid": "redis-main" - }, - "keyName": "gqueue:metric.insights:collector", - "query": "", - "refId": "A", - "streaming": false, - "type": "command" - } - ], - "title": "Consumer RPM Data (consumer-1)", - "transformations": [ - { - "id": "extractFields", - "options": { - "delimiter": ",", - "format": "json", - "jsonPaths": [ - { - "alias": "rpm_consumer_data", - "path": "rpm_consumer" - } - ], - "keepTime": false, - "replace": false, - "source": "gqueue:metric.insights:collector" - } - } - ], - "type": "table", - "description": "Shows RPM values by timestamp for consumer-1" - }, - { - "datasource": { - "type": "redis-datasource", - "uid": "redis-main" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "continuous-GrYlRd" - }, - "custom": { - "axisGridShow": true, - "axisLabel": "Messages/Min", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "vis": false - }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineWidth": 3, - "pointSize": 8, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "always", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "line" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "transparent", - "value": null - }, - { - "color": "red", - "value": 10 - }, - { - "color": "yellow", - "value": 15 - }, - { - "color": "green", - "value": 20 - } - ] - }, - "unit": "rpm" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Publisher RPM" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "blue", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 46 - }, - "id": 18, - "options": { - "legend": { - "calcs": [ - "lastNotNull", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "targets": [ - { - "command": "get", - "datasource": { - "type": "redis-datasource", - "uid": "redis-main" - }, - "keyName": "gqueue:metric.insights:collector", - "query": "", - "refId": "A", - "streaming": true, - "type": "command" - } - ], - "title": "Publisher RPM Trend (Live)", - "transformations": [ - { - "id": "extractFields", - "options": { - "delimiter": ",", - "format": "json", - "jsonPaths": [ - { - "alias": "total_pub", - "path": "total_published" - } - ], - "keepTime": true, - "replace": false, - "source": "gqueue:metric.insights:collector" - } - }, - { - "id": "organize", - "options": { - "excludeByName": {}, - "indexByName": {}, - "renameByName": { - "total_pub": "Publisher Rate" - } - } - } - ], - "type": "timeseries" - }, - { - "datasource": { - "type": "redis-datasource", - "uid": "redis-main" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "continuous-BlYlRd" - }, - "custom": { - "axisGridShow": true, - "axisLabel": "Messages/Min", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "vis": false - }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineWidth": 3, - "pointSize": 8, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "always", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "line" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "transparent", - "value": null - }, - { - "color": "red", - "value": 10 - }, - { - "color": "yellow", - "value": 15 - }, - { - "color": "green", - "value": 20 - } - ] - }, - "unit": "rpm" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Consumer RPM" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 46 - }, - "id": 19, - "options": { - "legend": { - "calcs": [ - "lastNotNull", - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "targets": [ - { - "command": "get", - "datasource": { - "type": "redis-datasource", - "uid": "redis-main" - }, - "keyName": "gqueue:metric.insights:collector", - "query": "", - "refId": "A", - "streaming": true, - "type": "command" - } - ], - "title": "Consumer RPM Trend (Live)", - "transformations": [ - { - "id": "extractFields", - "options": { - "delimiter": ",", - "format": "json", - "jsonPaths": [ - { - "alias": "total_cons", - "path": "total_consumed" - } - ], - "keepTime": true, - "replace": false, - "source": "gqueue:metric.insights:collector" - } - }, - { - "id": "organize", - "options": { - "excludeByName": {}, - "indexByName": {}, - "renameByName": { - "total_cons": "Consumer Rate" - } - } - } - ], - "type": "timeseries" - }, - { - "datasource": { - "type": "redis-datasource", - "uid": "redis-main" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisGridShow": true, - "axisLabel": "Messages/Minute", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "vis": false - }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineWidth": 3, - "pointSize": 8, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "always", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "line" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "transparent", - "value": null - }, - { - "color": "red", - "value": 8 - }, - { - "color": "yellow", - "value": 12 - }, - { - "color": "green", - "value": 16 - } - ] - }, - "unit": "rpm" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Publisher RPM" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "blue", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Consumer RPM" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 24, - "x": 0, - "y": 60 - }, - "id": 21, - "options": { - "legend": { - "calcs": [ - "lastNotNull", - "mean", - "max", - "min" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "targets": [ - { - "command": "get", - "datasource": { - "type": "redis-datasource", - "uid": "redis-main" - }, - "keyName": "gqueue:metric.insights:collector", - "query": "", - "refId": "A", - "streaming": true, - "type": "command" - } - ], - "title": "RPM Comparison - Publisher vs Consumer", - "transformations": [ - { - "id": "extractFields", - "options": { - "delimiter": ",", - "format": "json", - "jsonPaths": [ - { - "alias": "publisher_data", - "path": "rpm_publisher[\"payment.processed\"]" - }, - { - "alias": "consumer_data", - "path": "rpm_consumer[\"payment.processed:consumer-1\"]" - } - ], - "keepTime": true, - "replace": false, - "source": "gqueue:metric.insights:collector" - } - }, - { - "id": "extractFields", - "options": { - "delimiter": ",", - "format": "json", - "jsonPaths": [ - { - "alias": "pub_time", - "path": "publisher_data[*].time" - }, - { - "alias": "Publisher RPM", - "path": "publisher_data[*].value" - } - ], - "keepTime": false, - "replace": false, - "source": "publisher_data" - } - }, - { - "id": "extractFields", - "options": { - "delimiter": ",", - "format": "json", - "jsonPaths": [ - { - "alias": "cons_time", - "path": "consumer_data[*].time" - }, - { - "alias": "Consumer RPM", - "path": "consumer_data[*].value" - } - ], - "keepTime": false, - "replace": false, - "source": "consumer_data" - } - }, - { - "id": "convertFieldType", - "options": { - "conversions": [ - { - "destinationType": "time", - "targetField": "pub_time" - }, - { - "destinationType": "time", - "targetField": "cons_time" - } - ] - } - } - ], - "type": "timeseries" - }, - { - "datasource": { - "type": "redis-datasource", - "uid": "redis-main" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "red", - "value": null - }, - { - "color": "yellow", - "value": 10 - }, - { - "color": "green", - "value": 15 - } - ] - }, - "unit": "rpm" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 6, - "x": 0, - "y": 70 - }, - "id": 22, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "center", - "orientation": "horizontal", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "value_and_name", - "wideLayout": true - }, - "pluginVersion": "12.2.0", - "targets": [ - { - "command": "get", - "datasource": { - "type": "redis-datasource", - "uid": "redis-main" - }, - "keyName": "gqueue:metric.insights:collector", - "query": "", - "refId": "A", - "streaming": true, - "type": "command" - } - ], - "title": "Publisher RPM Raw Data", - "transformations": [ - { - "id": "extractFields", - "options": { - "delimiter": ",", - "format": "json", - "jsonPaths": [ - { - "alias": "rpm_publisher_raw", - "path": "rpm_publisher" - } - ], - "keepTime": false, - "replace": false, - "source": "gqueue:metric.insights:collector" - } - } - ], - "type": "table" - }, - { - "datasource": { - "type": "redis-datasource", - "uid": "redis-main" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "red", - "value": null - }, - { - "color": "yellow", - "value": 10 - }, - { - "color": "green", - "value": 15 - } - ] - }, - "unit": "rpm" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 6, - "x": 6, - "y": 70 - }, - "id": 23, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "center", - "orientation": "horizontal", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "value_and_name", - "wideLayout": true - }, - "pluginVersion": "12.2.0", - "targets": [ - { - "command": "get", - "datasource": { - "type": "redis-datasource", - "uid": "redis-main" - }, - "keyName": "gqueue:metric.insights:collector", - "query": "", - "refId": "A", - "streaming": true, - "type": "command" - } - ], - "title": "Consumer RPM Raw Data", - "transformations": [ - { - "id": "extractFields", - "options": { - "delimiter": ",", - "format": "json", - "jsonPaths": [ - { - "alias": "rpm_consumer_raw", - "path": "rpm_consumer" - } - ], - "keepTime": false, - "replace": false, - "source": "gqueue:metric.insights:collector" - } - } - ], - "type": "table" - }, - { - "datasource": { - "type": "redis-datasource", - "uid": "redis-main" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "custom": { - "align": "auto", - "cellOptions": { - "type": "auto" - }, - "inspect": false - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 70 - }, - "id": 24, - "options": { - "cellHeight": "sm", - "footer": { - "countRows": false, - "fields": "", - "reducer": [ - "sum" - ], - "show": false - }, - "showHeader": true - }, - "pluginVersion": "12.2.0", - "targets": [ - { - "command": "get", - "datasource": { - "type": "redis-datasource", - "uid": "redis-main" - }, - "keyName": "gqueue:metric.insights:collector", - "query": "", - "refId": "A", - "streaming": false, - "type": "command" - } - ], - "title": "All Available Fields (Debug)", - "transformations": [ - { - "id": "extractFields", - "options": { - "format": "json", - "source": "gqueue:metric.insights:collector" - } - } - ], - "type": "table" - } - ], - "preload": false, - "schemaVersion": 42, - "tags": [ - "gqueue", - "messaging" - ], - "templating": { - "list": [] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": {}, - "timezone": "browser", - "title": "GQueue Monitoring Dashboard", - "uid": "gqueue-dashboard", - "version": 1, - "refresh": "5s" -} \ No newline at end of file diff --git a/deployment/grafana_dashboard_temp.json b/deployment/grafana_dashboard_temp.json deleted file mode 100644 index fb7e855..0000000 --- a/deployment/grafana_dashboard_temp.json +++ /dev/null @@ -1 +0,0 @@ -{"panels":[]} diff --git a/deployment/import_dashboard.sh b/deployment/import_dashboard.sh index 6f143ed..fdaf4d0 100644 --- a/deployment/import_dashboard.sh +++ b/deployment/import_dashboard.sh @@ -9,7 +9,7 @@ set -e GRAFANA_URL="${GRAFANA_URL:-http://localhost:3000}" GRAFANA_USER="${GRAFANA_USER:-admin}" GRAFANA_PASSWORD="${GRAFANA_PASSWORD:-admin}" -DASHBOARD_FILE="$(dirname "$0")/grafana_dashboard.json" +DASHBOARD_DIR="$(dirname "$0")/grafana" REDIS_DATASOURCE_NAME="Redis Main" REDIS_URL="${REDIS_URL:-redis://localhost:6379}" @@ -67,22 +67,29 @@ check_grafana() { log_success "Grafana is accessible" } -# Check if dashboard file exists -check_dashboard_file() { - log_info "Checking dashboard file..." +# Check if dashboard files exist +check_dashboard_files() { + log_info "Checking dashboard files..." - if [ ! -f "$DASHBOARD_FILE" ]; then - log_error "Dashboard file not found: $DASHBOARD_FILE" - exit 1 - fi + local dashboard_files=( + "$DASHBOARD_DIR/backoffice_dashboard.json" + "$DASHBOARD_DIR/pubsub_dashboard.json" + "$DASHBOARD_DIR/task_dashboard.json" + ) - # Validate JSON - if ! jq . "$DASHBOARD_FILE" > /dev/null 2>&1; then - log_error "Dashboard file contains invalid JSON" - exit 1 - fi + for dashboard_file in "${dashboard_files[@]}"; do + if [ ! -f "$dashboard_file" ]; then + log_error "Dashboard file not found: $dashboard_file" + exit 1 + fi + + if ! jq . "$dashboard_file" > /dev/null 2>&1; then + log_error "Dashboard file contains invalid JSON: $dashboard_file" + exit 1 + fi + done - log_success "Dashboard file is valid" + log_success "Dashboard files are valid" } # Create or update Redis datasource @@ -166,41 +173,46 @@ setup_redis_datasource() { rm -f /tmp/datasource_check.json /tmp/datasource_update.json /tmp/datasource_create.json } -# Import dashboard -import_dashboard() { - log_info "Importing GQueue dashboard..." - - # Prepare dashboard JSON for import - DASHBOARD_JSON=$(jq '{ - dashboard: ., - overwrite: true, - inputs: [], - folderId: 0 - }' "$DASHBOARD_FILE") - - # Import dashboard - IMPORT_RESULT=$(curl -s -u "${GRAFANA_USER}:${GRAFANA_PASSWORD}" \ - -H "Content-Type: application/json" \ - -X POST \ - "${GRAFANA_URL}/api/dashboards/db" \ - -d "$DASHBOARD_JSON" \ - -w "%{http_code}" -o /tmp/dashboard_import.json) +# Import dashboards +import_dashboards() { + log_info "Importing GQueue dashboards..." + + local dashboard_files=( + "$DASHBOARD_DIR/backoffice_dashboard.json" + "$DASHBOARD_DIR/pubsub_dashboard.json" + "$DASHBOARD_DIR/task_dashboard.json" + ) + + for dashboard_file in "${dashboard_files[@]}"; do + DASHBOARD_JSON=$(jq '{ + dashboard: ., + overwrite: true, + inputs: [], + folderId: 0 + }' "$dashboard_file") + + IMPORT_RESULT=$(curl -s -u "${GRAFANA_USER}:${GRAFANA_PASSWORD}" \ + -H "Content-Type: application/json" \ + -X POST \ + "${GRAFANA_URL}/api/dashboards/db" \ + -d "$DASHBOARD_JSON" \ + -w "%{http_code}" -o /tmp/dashboard_import.json) - HTTP_CODE=$(echo "$IMPORT_RESULT" | tail -n1) + HTTP_CODE=$(echo "$IMPORT_RESULT" | tail -n1) - if [ "$HTTP_CODE" = "200" ]; then - DASHBOARD_URL=$(jq -r '.url' /tmp/dashboard_import.json) - log_success "Dashboard imported successfully!" - log_info "Dashboard URL: ${GRAFANA_URL}${DASHBOARD_URL}" - else - log_error "Failed to import dashboard (HTTP $HTTP_CODE)" - if [ -f /tmp/dashboard_import.json ]; then - log_error "Response: $(cat /tmp/dashboard_import.json)" + if [ "$HTTP_CODE" = "200" ]; then + DASHBOARD_URL=$(jq -r '.url' /tmp/dashboard_import.json) + log_success "Dashboard imported successfully: $(basename "$dashboard_file")" + log_info "Dashboard URL: ${GRAFANA_URL}${DASHBOARD_URL}" + else + log_error "Failed to import dashboard $(basename "$dashboard_file") (HTTP $HTTP_CODE)" + if [ -f /tmp/dashboard_import.json ]; then + log_error "Response: $(cat /tmp/dashboard_import.json)" + fi + exit 1 fi - exit 1 - fi + done - # Clean up temp file rm -f /tmp/dashboard_import.json } @@ -209,18 +221,27 @@ test_dashboard() { log_info "Testing dashboard accessibility..." # Get dashboard by UID - DASHBOARD_TEST=$(curl -s -u "${GRAFANA_USER}:${GRAFANA_PASSWORD}" \ - "${GRAFANA_URL}/api/dashboards/uid/gqueue-dashboard" \ - -w "%{http_code}" -o /tmp/dashboard_test.json) + local dashboard_uids=( + "adxctbtbackoffice" + "adhqlpfpubsub" + "adxctbttask" + ) - HTTP_CODE=$(echo "$DASHBOARD_TEST" | tail -n1) + for dashboard_uid in "${dashboard_uids[@]}"; do + DASHBOARD_TEST=$(curl -s -u "${GRAFANA_USER}:${GRAFANA_PASSWORD}" \ + "${GRAFANA_URL}/api/dashboards/uid/${dashboard_uid}" \ + -w "%{http_code}" -o /tmp/dashboard_test.json) - if [ "$HTTP_CODE" = "200" ]; then - log_success "Dashboard is accessible and working" - else - log_warning "Dashboard may not be accessible (HTTP $HTTP_CODE)" - fi + HTTP_CODE=$(echo "$DASHBOARD_TEST" | tail -n1) + + if [ "$HTTP_CODE" = "200" ]; then + log_success "Dashboard is accessible and working: ${dashboard_uid}" + else + log_warning "Dashboard may not be accessible: ${dashboard_uid} (HTTP $HTTP_CODE)" + fi + done + # Clean up temp file # Clean up temp file rm -f /tmp/dashboard_test.json } @@ -233,4 +254,51 @@ usage() { echo " -h, --help Show this help message" echo " -u, --url URL Grafana URL (default: http://localhost:3000)" echo " --user USER Grafana username (default: admin)" - echo " --password PASSWORD Grafana + echo " --password PASSWORD Grafana password (default: admin)" + echo " --redis-url URL Redis URL (default: redis://localhost:6379)" +} + +parse_args() { + while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + usage + exit 0 + ;; + -u|--url) + GRAFANA_URL="$2" + shift 2 + ;; + --user) + GRAFANA_USER="$2" + shift 2 + ;; + --password) + GRAFANA_PASSWORD="$2" + shift 2 + ;; + --redis-url) + REDIS_URL="$2" + shift 2 + ;; + *) + log_error "Unknown option: $1" + usage + exit 1 + ;; + esac + done +} + +main() { + parse_args "$@" + check_dependencies + check_grafana + check_dashboard_files + setup_redis_datasource + import_dashboards + test_dashboard + log_success "Grafana setup completed" +} + +main "$@" From d7ab986129e6da9b125fd37d58b7d6cd00e4c777 Mon Sep 17 00:00:00 2001 From: IsaacDSC Date: Fri, 6 Mar 2026 19:30:59 -0300 Subject: [PATCH 6/9] feat: update event payloads and introduce task consumer configuration - Replaced the publisher data file with specific event payloads for pubsub and task services. - Added new JSON files for pubsub and task event payloads to standardize event data structure. - Created a new task consumer configuration file to define the consumer behavior for the 'payment.charged' event. - Updated simulation scripts to utilize the new event payload files for testing purposes. --- example/example.md | 4 ++-- example/pubsub_event_payload.json | 11 ++++++++++ .../simulation/multiples_producer_pubsub.sh | 2 +- example/simulation/multiples_producer_task.sh | 22 +++++++++++++++++++ example/task_consumer.json | 22 +++++++++++++++++++ ...sher_data.json => task_event_payload.json} | 0 6 files changed, 58 insertions(+), 3 deletions(-) create mode 100644 example/pubsub_event_payload.json create mode 100644 example/simulation/multiples_producer_task.sh create mode 100644 example/task_consumer.json rename example/{publisher_data.json => task_event_payload.json} (100%) diff --git a/example/example.md b/example/example.md index e64658e..961c704 100644 --- a/example/example.md +++ b/example/example.md @@ -27,13 +27,13 @@ curl -X POST \ http://localhost:8082/api/v1/pubsub \ -H "Content-Type: application/json" \ -H "Authorization: Basic YWRtaW46cGFzc3dvcmQ=" \ --d @example/publisher_data.json +-d @example/pubsub_event_payload.json curl -X POST \ http://localhost:8083/api/v1/task \ -H "Content-Type: application/json" \ -H "Authorization: Basic YWRtaW46cGFzc3dvcmQ=" \ --d @example/publisher_data.json +-d @example/task_event_payload.json ### DELETE event diff --git a/example/pubsub_event_payload.json b/example/pubsub_event_payload.json new file mode 100644 index 0000000..52e8065 --- /dev/null +++ b/example/pubsub_event_payload.json @@ -0,0 +1,11 @@ +{ + "service_name": "my-app", + "event_name": "payment.charged", + "data": { + "key": "value", + "w-queue":"pubsub" + }, + "metadata": { + "correlation_id": "5e4dd662-9eba-4321-9c97-0b4ee0942f8b" + } +} diff --git a/example/simulation/multiples_producer_pubsub.sh b/example/simulation/multiples_producer_pubsub.sh index b62ec6f..502179a 100644 --- a/example/simulation/multiples_producer_pubsub.sh +++ b/example/simulation/multiples_producer_pubsub.sh @@ -2,7 +2,7 @@ URL="http://localhost:8082/api/v1/pubsub" AUTH="Basic YWRtaW46cGFzc3dvcmQ=" -DATA_FILE="example/publisher_data.json" +DATA_FILE="example/pubsub_event_payload.json" while true; do # 10 chamadas em paralelo diff --git a/example/simulation/multiples_producer_task.sh b/example/simulation/multiples_producer_task.sh new file mode 100644 index 0000000..f3c45aa --- /dev/null +++ b/example/simulation/multiples_producer_task.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash + +URL="http://localhost:8083/api/v1/task" +AUTH="Basic YWRtaW46cGFzc3dvcmQ=" +DATA_FILE="example/task_event_payload.json" + +while true; do + # 10 chamadas em paralelo + for i in {1..10}; do + curl -s -X POST \ + "$URL" \ + -H "Content-Type: application/json" \ + -H "Authorization: $AUTH" \ + -d @"$DATA_FILE" & + done + + # espera as 10 terminarem (opcional, mas recomendado) + wait + + # espera 100ms + sleep 1 +done diff --git a/example/task_consumer.json b/example/task_consumer.json new file mode 100644 index 0000000..082c2b9 --- /dev/null +++ b/example/task_consumer.json @@ -0,0 +1,22 @@ +{ + "name": "payment.charged", + "type": "external", + "option": { + "wq_type": "low_throughput", + "max_retries": 3, + "retention": "168h", + "unique_ttl": "60s", + "schedule_in": "100ms" + }, + "consumers": [ + { + "service_name": "external-service", + "type": "persistent", + "host": "http://consumer:3333", + "path": "/payment/charged", + "headers": { + "Content-Type": "application/json" + } + } + ] +} \ No newline at end of file diff --git a/example/publisher_data.json b/example/task_event_payload.json similarity index 100% rename from example/publisher_data.json rename to example/task_event_payload.json From d0ff5688fda5c40aaa73481220fcc52e0f9b1143 Mon Sep 17 00:00:00 2001 From: IsaacDSC Date: Sat, 7 Mar 2026 15:10:33 -0300 Subject: [PATCH 7/9] chore: update Go toolchain and add PostgreSQL queries documentation - Updated Go toolchain version from 1.25.7 to 1.26.1 in go.mod and CI workflows. - Added a new queries.txt file containing useful PostgreSQL queries for monitoring and performance analysis. --- .github/workflows/ci.yml | 10 ++-- .github/workflows/retest.yml | 2 +- go.mod | 2 +- queries.txt | 109 +++++++++++++++++++++++++++++++++++ 4 files changed, 116 insertions(+), 7 deletions(-) create mode 100644 queries.txt diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 91426e9..03c9c38 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - go-version: ["1.24.13", "1.25.7"] + go-version: ["1.25.8", "1.26.1"] services: redis: @@ -80,7 +80,7 @@ jobs: WQ_CONCURRENCY: 32 - name: Check coverage for new code - if: github.event_name == 'pull_request' && matrix.go-version == '1.25.7' + if: github.event_name == 'pull_request' && matrix.go-version == '1.26.1' run: | # Setup Python for diff-cover python -m pip install --upgrade pip @@ -147,7 +147,7 @@ jobs: - name: Set up Go uses: actions/setup-go@v5 with: - go-version: "1.25.7" + go-version: "1.26.1" cache: true - name: Download dependencies @@ -181,7 +181,7 @@ jobs: - name: Set up Go uses: actions/setup-go@v5 with: - go-version: "1.25.7" + go-version: "1.26.1" cache: true - name: Install govulncheck @@ -208,7 +208,7 @@ jobs: - name: Set up Go uses: actions/setup-go@v5 with: - go-version: "1.25.7" + go-version: "1.26.1" cache: true - name: Build application diff --git a/.github/workflows/retest.yml b/.github/workflows/retest.yml index 18faff6..23bc404 100644 --- a/.github/workflows/retest.yml +++ b/.github/workflows/retest.yml @@ -63,7 +63,7 @@ jobs: - name: Set up Go uses: actions/setup-go@v5 with: - go-version: "1.25.7" + go-version: "1.26.1" cache: true - name: Generate mocks diff --git a/go.mod b/go.mod index 1ea3e54..92ab6b6 100644 --- a/go.mod +++ b/go.mod @@ -2,7 +2,7 @@ module github.com/IsaacDSC/gqueue go 1.24.0 -toolchain go1.25.7 +toolchain go1.26.1 require ( cloud.google.com/go/pubsub v1.49.0 diff --git a/queries.txt b/queries.txt new file mode 100644 index 0000000..6c35086 --- /dev/null +++ b/queries.txt @@ -0,0 +1,109 @@ +Queries úteis +1. Status do Postgres Painel stat + +max(pg_up{job="postgres"}) +2. Total de conexões abertas Painel stat ou timeseries + +sum(pg_stat_database_numbackends{job="postgres", datname="gqueue"}) +3. Conexões por estado Painel timeseries ou pie chart + +sum by (state) (pg_stat_activity_count{job="postgres", datname="gqueue"}) +4. Tamanho do banco em GB Painel stat + +pg_database_size_bytes{job="postgres", datname="gqueue"} / 1024 / 1024 / 1024 +5. TPS de commits Painel timeseries + +rate(pg_stat_database_xact_commit{job="postgres", datname="gqueue"}[5m]) +6. TPS de rollbacks Painel timeseries + +rate(pg_stat_database_xact_rollback{job="postgres", datname="gqueue"}[5m]) +7. Cache hit ratio Painel stat ou gauge + +100 * +sum(rate(pg_stat_database_blks_hit{job="postgres", datname="gqueue"}[5m])) +/ +( + sum(rate(pg_stat_database_blks_hit{job="postgres", datname="gqueue"}[5m])) + + + sum(rate(pg_stat_database_blks_read{job="postgres", datname="gqueue"}[5m])) +) +8. Leituras em disco por segundo Painel timeseries + +rate(pg_stat_database_blks_read{job="postgres", datname="gqueue"}[5m]) + +9. Escritas temporárias em bytes/s Painel timeseries + +rate(pg_stat_database_temp_bytes{job="postgres", datname="gqueue"}[5m]) + +10. Deadlocks Painel stat ou timeseries + +increase(pg_stat_database_deadlocks{job="postgres", datname="gqueue"}[15m]) + +Queries boas para tabelas +11. Top 10 tabelas com mais linhas vivas Painel bar chart + +topk(10, pg_stat_user_tables_n_live_tup{job="postgres", datname="gqueue"}) + +12. Top 10 tabelas com mais dead tuples Painel bar chart + +topk(10, pg_stat_user_tables_n_dead_tup{job="postgres", datname="gqueue"}) + +13. Seq scan por tabela Painel bar chart + +topk(10, rate(pg_stat_user_tables_seq_scan{job="postgres", datname="gqueue"}[5m])) + +14. Index scan por tabela Painel bar chart + +topk(10, rate(pg_stat_user_tables_idx_scan{job="postgres", datname="gqueue"}[5m])) + +15. Inserts por segundo Painel timeseries + +sum(rate(pg_stat_database_tup_inserted{job="postgres", datname="gqueue"}[5m])) + +16. Updates por segundo Painel timeseries + +sum(rate(pg_stat_database_tup_updated{job="postgres", datname="gqueue"}[5m])) + +17. Deletes por segundo Painel timeseries + +sum(rate(pg_stat_database_tup_deleted{job="postgres", datname="gqueue"}[5m])) + +Queries de background writer +18. Buffers escritos por checkpoints Painel timeseries + +rate(pg_stat_bgwriter_buffers_checkpoint{job="postgres"}[5m]) + +19. Buffers escritos pelo backend Painel timeseries + +rate(pg_stat_bgwriter_buffers_backend{job="postgres"}[5m]) + +20. Checkpoints solicitados Painel timeseries + +rate(pg_stat_bgwriter_checkpoints_req{job="postgres"}[5m]) + + + +Se você usar cAdvisor +As queries mais comuns no Grafana ficam assim. + +CPU do container Postgres +sum(rate(container_cpu_usage_seconds_total{container_label_com_docker_compose_service="postgres"}[5m])) + +CPU em percentual + +100 * sum(rate(container_cpu_usage_seconds_total{container_label_com_docker_compose_service="postgres"}[5m])) +Memória em uso + +container_memory_working_set_bytes{container_label_com_docker_compose_service="postgres"} / 1024 / 1024 +Memória total / RSS + +container_memory_rss{container_label_com_docker_compose_service="postgres"} / 1024 / 1024 +Limite de memória + +container_spec_memory_limit_bytes{container_label_com_docker_compose_service="postgres"} / 1024 / 1024 +Percentual de memória usada + +100 * +container_memory_working_set_bytes{container_label_com_docker_compose_service="postgres"} +/ +container_spec_memory_limit_bytes{container_label_com_docker_compose_service="postgres"} From 2362abc88ad7f50d1d9ba1418f1bdcb39ebfc412 Mon Sep 17 00:00:00 2001 From: IsaacDSC Date: Sat, 7 Mar 2026 16:53:58 -0300 Subject: [PATCH 8/9] feat: add metrics documentation for gqueue services - Introduced a comprehensive metrics manual for gqueue, detailing metrics exposed via OpenTelemetry in Prometheus format. - Documented service-specific metrics endpoints, usage instructions for starting the observability stack, and load simulation commands. - Included a reference section for metrics categorized by service (Backoffice, PubSub, Task) to enhance monitoring and observability. --- docs/metrics.md | 247 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 247 insertions(+) create mode 100644 docs/metrics.md diff --git a/docs/metrics.md b/docs/metrics.md new file mode 100644 index 0000000..a5a7787 --- /dev/null +++ b/docs/metrics.md @@ -0,0 +1,247 @@ +# gqueue Metrics and Usage Manual + +This document describes the metrics exposed by gqueue via OpenTelemetry (OTel) in Prometheus format, and provides a **usage manual** with commands to run services, example consumers, and load simulations. + +## Overview + +- **Format:** Prometheus exposition format. +- **Endpoint:** Each HTTP service exposes `GET /metrics` on the same port as the API. +- **Configuration:** Metrics can be disabled with `METRICS_ENABLED=false` (default: `true`). +- **Stack:** Prometheus scrapes the endpoints; Grafana uses the Prometheus datasource for visualization (Docker Compose profile `observability`). + +## Metric endpoints by service + +| Service | Port | Metrics URL | +|----------|------|-----------------------------| +| Backoffice | 8081 | http://localhost:8081/metrics | +| PubSub API | 8082 | http://localhost:8082/metrics | +| Task API | 8083 | http://localhost:8083/metrics | + +The `/metrics` and `/health` routes do **not** generate HTTP metrics (they are excluded by the middleware to avoid noise). + +--- + +## Usage manual + +### Start the stack with observability + +Start all services (Backoffice, PubSub, Task, PostgreSQL, Redis, etc.) plus Prometheus and Grafana: + +```sh +docker compose -f deployment/app-pgsql/docker-compose.yaml --profile observability up -d --build +``` + +- **Grafana:** http://localhost:3000 (Prometheus datasource is provisioned). +- **Prometheus:** scrapes Backoffice (8081), PubSub (8082), and Task (8083) `/metrics`. + +### Start the example consumer + +Run the example consumer (e.g. to process PubSub or Task events) in the foreground: + +```sh +docker compose -f deployment/app-pgsql/docker-compose.yaml --profile example up +``` + +### Generate load (simulate producers) + +From the **project root**, run the simulation scripts to send traffic to the APIs. + +**PubSub producers (POST to PubSub API):** + +```sh +example/simulation/multiples_producer_pubsub.sh +``` + +**Task producers (POST to Task API):** + +```sh +example/simulation/multiples_producer_task.sh +``` + +Ensure the stack is up (`observability` or at least the API services) before running these scripts. + +--- + +## Metrics reference by category + +### Backoffice metrics + +| Metric / concept | Description | +|----------------------------|-------------| +| **CPU** | Process CPU usage (e.g. from Go runtime / Prometheus `process_*` or host metrics). | +| **Memory** | Process memory usage (RSS, heap, etc.). | +| **Number of goroutines** | Active goroutines (e.g. `go_goroutines` if exposed). | +| **Number of threads** | OS threads used by the process. | +| **GC activity** | Garbage collection activity (allocations, GC pauses). | +| **Total connections (Pg)** | Total number of PostgreSQL connections used by the Backoffice. | +| **Total GB on disk** | Disk usage (e.g. PostgreSQL data directory or host disk). | +| **Cache DB ratio** | Database cache hit ratio (e.g. PostgreSQL `cache hit ratio`). | +| **Disk DB reading** | Disk read I/O for the database. | + +HTTP metrics for the Backoffice (request count, duration histogram) are exposed via the metrics middleware; see [HTTP metrics](#http-metrics-middleware) below. + +--- + +### PubSub metrics + +| Metric / concept | Description | +|-----------------------------|-------------| +| **P95 publisher msg** | 95th percentile latency for publishing a message (publisher path). | +| **RPM publisher msg** | Requests per minute for the publisher (messages published per minute). | +| **RPM consumer msg** | Messages consumed per minute (consumer throughput). | +| **RPM client HTTP sent msg**| Rate of messages sent via HTTP client (publish API calls per minute). | +| **Lag consumer msg** | Time between message publish and consumer processing (consumer lag). | +| **Mem store sync activity** | Mem store sync operations (e.g. flushes to persistent storage). | +| **Mem store time execution (ms)** | Mem store operation duration in milliseconds. | +| **CPU** | Process CPU usage. | +| **Memory** | Process memory usage. | +| **Number of goroutines** | Active goroutines. | +| **Number of threads** | OS threads. | +| **GC activity** | Garbage collection activity. | + +Consumer lag is exposed as `pubsub_consumer_lag_seconds` (histogram); see [PubSub consumer lag](#pubsub_consumer_lag_seconds) for PromQL examples. + +--- + +### Task metrics + +| Metric / concept | Description | +|-----------------------------|-------------| +| **Total processing task** | Total tasks currently being processed (or total processed over time). | +| **Total success consumer task** | Total tasks consumed successfully. | +| **Total failure consumer task** | Total tasks that failed consumption (e.g. retries or dead-letter). | +| **P95 publisher msg** | 95th percentile latency for task publish (API). | +| **RPM task publisher** | Task publish requests per minute. | +| **RPM task consumer** | Tasks consumed per minute. | +| **RPM client HTTP sent msg**| Task API publish calls per minute. | +| **CPU** | Process CPU usage. | +| **Memory** | Process memory usage. | +| **Number of goroutines** | Active goroutines. | +| **Number of threads** | OS threads. | +| **GC activity** | Garbage collection activity. | + +Task queue gauges (`task_queue_pending`, `task_queue_active`, etc.) and consumer counters are described below. + +--- + +## HTTP metrics (middleware) + +Recorded automatically by the metrics middleware on all services (Backoffice, PubSub, Task). They follow Prometheus and OTel HTTP semantic conventions. + +### `http_server_requests_total` + +- **Type:** counter (`Int64Counter`). +- **Description:** Total HTTP requests received. +- **Labels:** `http.method`, `http.route`, `http.status_code`, `service.name` (`backoffice`, `pubsub`, `task`). + +### `http_server_request_duration_seconds` + +- **Type:** histogram (`Float64Histogram`). +- **Description:** Request duration in seconds. +- **Labels:** same as above. + +In Prometheus, the histogram is exposed as: + +- `http_server_request_duration_seconds_bucket` +- `http_server_request_duration_seconds_sum` +- `http_server_request_duration_seconds_count` + +**Example (p95 by route and service):** + +```promql +histogram_quantile( + 0.95, + sum(rate(http_server_request_duration_seconds_bucket[5m])) by (le, http_route, service_name) +) +``` + +--- + +## Domain metrics + +### Task: publisher and queue + +- **`task_publisher_requests_total`** — Counter of requests to the task publish endpoint (Task API, `POST /api/v1/task`). + +Queue metrics (Asynq / Task) represent real backlog in Redis; labels include `queue` and `service.name` = `task`: + +| Metric | Type | Description | +|----------------------|-------|-------------| +| `task_queue_pending` | gauge | Tasks waiting to be processed. | +| `task_queue_active` | gauge | Tasks currently being processed. | +| `task_queue_scheduled` | gauge | Tasks scheduled for the future. | +| `task_queue_retry` | gauge | Tasks waiting for retry. | +| `task_queue_archived`| gauge | Tasks archived (e.g. dead-letter). | +| `task_queue_size` | gauge | Total tasks in the queue. | +| `task_queue_latency_seconds` | gauge | Queue latency in seconds (age of oldest pending task). | + +**Task consumer totals (dashboard queries):** + +Success count by event name: + +```promql +sum by (task_event_name) (task_consumer_success_total) +``` + +Failure count by event name: + +```promql +sum by (task_event_name) (task_consumer_failure_total) +``` + +Processing (e.g. in-flight or total processed) by event name: + +```promql +sum by (task_event_name) (task_consumer_total_processing) +``` + +--- + +### `pubsub_consumer_lag_seconds` + +- **Type:** histogram (`Float64Histogram`). +- **Description:** Time in seconds between message publish and the start of processing by the consumer (message age at consume time). +- **Service:** PubSub API (consumer). +- **Labels:** `topic`, `consumer.service_name` (or `consumer_service_name` depending on exporter). + +**Example (p99 lag by topic):** + +```promql +histogram_quantile( + 0.99, + sum(rate(pubsub_consumer_lag_seconds_bucket[5m])) by (le, topic, consumer_service_name) +) +``` + +**Example (p95 by topic):** + +```promql +histogram_quantile(0.95, sum(rate(pubsub_consumer_lag_seconds_bucket[5m])) by (le, topic)) +``` + +**Example (throughput — messages/second with lag):** + +```promql +sum(rate(pubsub_consumer_lag_seconds_count[5m])) by (topic, consumer_service_name) +``` + +--- + +## Prometheus scrape configuration + +In the Docker Compose deployment (profile `observability`), Prometheus is configured to scrape: + +- **Backoffice:** `backoffice:8081/metrics` +- **PubSub:** `pubsub:8082/metrics` +- **Task:** `task:8083/metrics` + +Configuration file: `deployment/app-pgsql/prometheus/prometheus.yml`. + +--- + +## References + +- Implementation spec: [docs/specs/opentelemetry-metrics.md](specs/opentelemetry-metrics.md). +- Metrics middleware: `cmd/setup/middleware/middleware.go` (MetricsMiddleware). +- Telemetry package: `pkg/telemetry`. +- [OpenTelemetry semantic conventions — HTTP](https://opentelemetry.io/docs/specs/semconv/http/). From e9b2553f40399c37926657aec062a6e1e516daaf Mon Sep 17 00:00:00 2001 From: IsaacDSC Date: Sun, 8 Mar 2026 09:50:36 -0300 Subject: [PATCH 9/9] chore: remove obsolete Grafana dashboard and PostgreSQL queries files - Deleted the backup.dashboard.grafana.json file as it was no longer needed. - Removed the queries.txt file containing PostgreSQL queries to streamline the project and eliminate outdated documentation. - Updated related configurations to reflect these deletions and maintain project cleanliness. --- backup.dashboard.grafana.json | 948 ------------------------------- cmd/api/main.go | 2 +- internal/cfg/env.go | 6 + internal/domain/event.go | 3 +- internal/fetcher/notification.go | 4 +- pkg/logs/slog.go | 4 +- pkg/telemetry/telemetry.go | 116 ++-- queries.txt | 109 ---- 8 files changed, 71 insertions(+), 1121 deletions(-) delete mode 100644 backup.dashboard.grafana.json delete mode 100644 queries.txt diff --git a/backup.dashboard.grafana.json b/backup.dashboard.grafana.json deleted file mode 100644 index 5177bab..0000000 --- a/backup.dashboard.grafana.json +++ /dev/null @@ -1,948 +0,0 @@ -{ - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": { - "type": "grafana", - "uid": "-- Grafana --" - }, - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "editable": true, - "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "links": [], - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "showValues": false, - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 0 - }, - "id": 45, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "12.4.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "expr": "100 * sum(rate(container_cpu_usage_seconds_total{container_label_com_docker_compose_service=\"postgres\"}[5m]))", - "instant": false, - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "New panel", - "type": "timeseries" - }, - { - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 24, - "x": 0, - "y": 8 - }, - "id": 20, - "options": { - "code": { - "language": "plaintext", - "showLineNumbers": false, - "showMiniMap": false - }, - "content": "# Backoffice message visualization\n\nData visualization, monitoring activity\n", - "mode": "markdown" - }, - "pluginVersion": "12.4.0", - "title": "Scope Backoffice ", - "type": "text" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "showValues": false, - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 12 - }, - "id": 34, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "12.4.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "expr": "100 * sum(rate(process_cpu_seconds_total{job=\"gqueue-backoffice\"}[5m])) by (job, instance)", - "instant": false, - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "Backoffice Service CPU", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "showValues": false, - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 12 - }, - "id": 35, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "12.4.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "expr": "sum(process_resident_memory_bytes{job=\"gqueue-backoffice\"}) by (job, instance) / 1024 / 1024", - "instant": false, - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "Backoffice Service Mem (Mb)", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "showValues": false, - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 20 - }, - "id": 36, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "12.4.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "expr": "sum(go_goroutines{job=\"gqueue-backoffice\"}) by (job, instance)", - "instant": false, - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "Backoffice Service Goroutines", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "description": "O que a métrica representa na prática\nSe go_threads = 18, significa que o runtime Go está usando 18 threads do SO para executar:\n\ngoroutines da aplicação\ntrabalho de GC\noperações de rede / syscalls\nscheduler interno do runtime", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "showValues": false, - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 20 - }, - "id": 37, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "12.4.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "expr": "sum(go_threads{job=\"gqueue-backoffice\"}) by (job, instance)", - "instant": false, - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "Backoffice Service Threads Go process", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "showValues": false, - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 28 - }, - "id": 40, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "12.4.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "expr": "60 * rate(go_gc_duration_seconds_count{job=\"gqueue-backoffice\"}[1m])", - "instant": false, - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "Backoffice service GC activity (min)", - "type": "timeseries" - }, - { - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 24, - "x": 0, - "y": 36 - }, - "id": 33, - "options": { - "code": { - "language": "plaintext", - "showLineNumbers": false, - "showMiniMap": false - }, - "content": "# Database visualization\n\nData visualization, monitoring activity\n", - "mode": "markdown" - }, - "pluginVersion": "12.4.0", - "title": "Scope Task ", - "type": "text" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "description": "Total de conexões abertas Painel stat ou timeseries", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 7, - "x": 0, - "y": 40 - }, - "id": 41, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "12.4.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "expr": "sum(pg_stat_database_numbackends{job=\"postgres\", datname=\"gqueue\"})", - "instant": false, - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "Total Conn ", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "description": "Tamanho do banco em GB Painel stat", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 7, - "y": 40 - }, - "id": 42, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "12.4.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "expr": "pg_database_size_bytes{job=\"postgres\", datname=\"gqueue\"} / 1024 / 1024 / 1024", - "instant": false, - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "GB disc", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 5, - "x": 13, - "y": 40 - }, - "id": 43, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "12.4.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "expr": "100 *\nsum(rate(pg_stat_database_blks_hit{job=\"postgres\", datname=\"gqueue\"}[5m]))\n/\n(\n sum(rate(pg_stat_database_blks_hit{job=\"postgres\", datname=\"gqueue\"}[5m]))\n +\n sum(rate(pg_stat_database_blks_read{job=\"postgres\", datname=\"gqueue\"}[5m]))\n)", - "instant": false, - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "Cache hit ratio Painel", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 18, - "y": 40 - }, - "id": 44, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "12.4.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "expr": "rate(pg_stat_database_blks_read{job=\"postgres\", datname=\"gqueue\"}[5m])", - "instant": false, - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "Disc Reading", - "type": "stat" - } - ], - "preload": false, - "refresh": "5s", - "schemaVersion": 42, - "tags": [], - "templating": { - "list": [] - }, - "time": { - "from": "now-3h", - "to": "now" - }, - "timepicker": {}, - "timezone": "browser", - "title": "Gqueue - Backoffice Service Dashboard", - "uid": "adxctbtbackoffice", - "version": 4, - "weekStart": "" -} \ No newline at end of file diff --git a/cmd/api/main.go b/cmd/api/main.go index 859fcb9..e2bb9be 100644 --- a/cmd/api/main.go +++ b/cmd/api/main.go @@ -125,7 +125,7 @@ func main() { closeFn() } - shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), 5*time.Second) + shutdownCtx, shutdownCancel := context.WithTimeout(ctx, 5*time.Second) defer shutdownCancel() if err := telemetry.Shutdown(shutdownCtx); err != nil { log.Printf("Error shutting down telemetry: %v", err) diff --git a/internal/cfg/env.go b/internal/cfg/env.go index 9ef6e38..d26c0f1 100644 --- a/internal/cfg/env.go +++ b/internal/cfg/env.go @@ -66,6 +66,8 @@ type Config struct { MetricsEnabled bool `env:"METRICS_ENABLED" env-default:"true"` OTELExporterOTLPEndpoint string `env:"OTEL_EXPORTER_OTLP_ENDPOINT" env-default:""` + MaxConsumers int `env:"MAX_CONSUMERS" env-default:"10"` + LogLevel int `env:"LOG_LEVEL" env-default:"2"` // 0: debug, 1: info, 2: warn, 3: error } var cfg Config @@ -80,6 +82,10 @@ func Get() Config { panic(err) } + if cfg.LogLevel < 0 || cfg.LogLevel > 3 { + panic("invalid log level") + } + return cfg } diff --git a/internal/domain/event.go b/internal/domain/event.go index eb69dd6..b90f678 100644 --- a/internal/domain/event.go +++ b/internal/domain/event.go @@ -5,6 +5,7 @@ import ( "strings" "time" + "github.com/IsaacDSC/gqueue/internal/cfg" "github.com/IsaacDSC/gqueue/pkg/intertime" "github.com/IsaacDSC/gqueue/pkg/pubadapter" "github.com/google/uuid" @@ -34,7 +35,7 @@ func (e *Event) Validate() error { return fmt.Errorf("at least one consumer is required") } - if len(e.Consumers) > 10 { + if len(e.Consumers) > cfg.Get().MaxConsumers { return fmt.Errorf("consumers must be less than 10") } diff --git a/internal/fetcher/notification.go b/internal/fetcher/notification.go index 941b2b8..e28a285 100644 --- a/internal/fetcher/notification.go +++ b/internal/fetcher/notification.go @@ -39,11 +39,11 @@ func (n Notification) Notify(ctx context.Context, data map[string]any, headers m } func (n Notification) NotifyConsumer(ctx context.Context, url string, data map[string]any, headers map[string]string) error { - return fetch(ctx, url, data, headers, notifyopt.LongRunning) + return fetch(ctx, url, data, headers, notifyopt.Default) } func (n Notification) NotifyScheduler(ctx context.Context, url string, data any, headers map[string]string) error { - return fetch(ctx, url, data, headers, notifyopt.LongRunning) + return fetch(ctx, url, data, headers, notifyopt.Default) } func fetch(ctx context.Context, url string, data any, headers map[string]string, opt notifyopt.Kind, settings ...clienthttp.Option) error { diff --git a/pkg/logs/slog.go b/pkg/logs/slog.go index 89a17e1..22cf215 100644 --- a/pkg/logs/slog.go +++ b/pkg/logs/slog.go @@ -6,6 +6,8 @@ import ( "log/slog" "os" "sync" + + "github.com/IsaacDSC/gqueue/internal/cfg" ) var ( @@ -94,7 +96,7 @@ func (l LogLevel) GetLevel() slog.Level { // New creates a new configured logger func New(opts ...LogOption) *Logger { config := &logConfig{ - level: LevelDebug, + level: LogLevel(cfg.Get().LogLevel), output: os.Stdout, addSource: false, jsonFormat: true, // Default to JSON format diff --git a/pkg/telemetry/telemetry.go b/pkg/telemetry/telemetry.go index 13c3fda..7f0dde4 100644 --- a/pkg/telemetry/telemetry.go +++ b/pkg/telemetry/telemetry.go @@ -4,6 +4,7 @@ import ( "context" "net/http" "sync" + "sync/atomic" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/metric" @@ -20,87 +21,84 @@ type Config struct { Enabled bool } +// state holds the current meter provider and metrics handler. +// It is published via atomic.Value after initialization so all reads are lock-free. +// Alternative design: dependency injection — New could return a *Telemetry struct +// (with Handler, Meter, Shutdown methods) and the application would pass it explicitly +// instead of using package-level globals; that would avoid any sync primitive. +type state struct { + provider *sdkmetric.MeterProvider + handler http.Handler + initErr error // non-nil if New failed during init +} + var ( - mu sync.RWMutex - meterProvider *sdkmetric.MeterProvider - metricsHandler http.Handler = http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { - w.WriteHeader(http.StatusOK) - _, _ = w.Write([]byte("# metrics not initialized\n")) - }) + once sync.Once + stateVal atomic.Value ) +func init() { + stateVal.Store(&state{ + handler: http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte("# metrics not initialized\n")) + }), + }) +} + // New initializes the global MeterProvider and the HTTP metrics handler. -// It should be called once at application startup. +// It should be called once at application startup. Initialization runs at most once; +// subsequent calls return the same handler and any initial error. func New(cfg Config) (http.Handler, error) { - mu.Lock() - defer mu.Unlock() - - // If already initialized, just return the current handler. - if meterProvider != nil { - return metricsHandler, nil - } - - var mp *sdkmetric.MeterProvider - - if !cfg.Enabled { - // No-op provider: no metrics will be exported. - mp = sdkmetric.NewMeterProvider() - metricsHandler = http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { - w.WriteHeader(http.StatusOK) - _, _ = w.Write([]byte("# metrics disabled\n")) - }) - } else { - // Prometheus exporter: explicitly use the client_golang default registry - // so promhttp.Handler() serves the same metrics. - exp, err := otelprom.New(otelprom.WithRegisterer(promclient.DefaultRegisterer)) - if err != nil { - return nil, err + once.Do(func() { + var s state + if !cfg.Enabled { + s.provider = sdkmetric.NewMeterProvider() + s.handler = http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte("# metrics disabled\n")) + }) + } else { + exp, err := otelprom.New(otelprom.WithRegisterer(promclient.DefaultRegisterer)) + if err != nil { + s.initErr = err + s.handler = stateVal.Load().(*state).handler // keep placeholder + stateVal.Store(&s) + return + } + s.provider = sdkmetric.NewMeterProvider(sdkmetric.WithReader(exp)) + s.handler = promhttp.Handler() } + otel.SetMeterProvider(s.provider) + stateVal.Store(&s) + }) - // Use the exporter as a reader for the MeterProvider. - mp = sdkmetric.NewMeterProvider( - sdkmetric.WithReader(exp), - ) - - // Prometheus default handler to expose registered metrics. - metricsHandler = promhttp.Handler() + cur := stateVal.Load().(*state) + if cur.initErr != nil { + return nil, cur.initErr } - - otel.SetMeterProvider(mp) - meterProvider = mp - - return metricsHandler, nil + return cur.handler, nil } // Shutdown stops the global MeterProvider and releases resources. func Shutdown(ctx context.Context) error { - mu.RLock() - mp := meterProvider - mu.RUnlock() - - if mp == nil { + cur := stateVal.Load().(*state) + if cur.provider == nil { return nil } - - return mp.Shutdown(ctx) + return cur.provider.Shutdown(ctx) } // Meter returns a Meter from the global provider. func Meter(name string) metric.Meter { - mu.RLock() - defer mu.RUnlock() - - if meterProvider == nil { + cur := stateVal.Load().(*state) + if cur.provider == nil { return otel.Meter(name) } - - return meterProvider.Meter(name) + return cur.provider.Meter(name) } // Handler returns the current HTTP handler that exposes metrics. func Handler() http.Handler { - mu.RLock() - defer mu.RUnlock() - - return metricsHandler + return stateVal.Load().(*state).handler } diff --git a/queries.txt b/queries.txt deleted file mode 100644 index 6c35086..0000000 --- a/queries.txt +++ /dev/null @@ -1,109 +0,0 @@ -Queries úteis -1. Status do Postgres Painel stat - -max(pg_up{job="postgres"}) -2. Total de conexões abertas Painel stat ou timeseries - -sum(pg_stat_database_numbackends{job="postgres", datname="gqueue"}) -3. Conexões por estado Painel timeseries ou pie chart - -sum by (state) (pg_stat_activity_count{job="postgres", datname="gqueue"}) -4. Tamanho do banco em GB Painel stat - -pg_database_size_bytes{job="postgres", datname="gqueue"} / 1024 / 1024 / 1024 -5. TPS de commits Painel timeseries - -rate(pg_stat_database_xact_commit{job="postgres", datname="gqueue"}[5m]) -6. TPS de rollbacks Painel timeseries - -rate(pg_stat_database_xact_rollback{job="postgres", datname="gqueue"}[5m]) -7. Cache hit ratio Painel stat ou gauge - -100 * -sum(rate(pg_stat_database_blks_hit{job="postgres", datname="gqueue"}[5m])) -/ -( - sum(rate(pg_stat_database_blks_hit{job="postgres", datname="gqueue"}[5m])) - + - sum(rate(pg_stat_database_blks_read{job="postgres", datname="gqueue"}[5m])) -) -8. Leituras em disco por segundo Painel timeseries - -rate(pg_stat_database_blks_read{job="postgres", datname="gqueue"}[5m]) - -9. Escritas temporárias em bytes/s Painel timeseries - -rate(pg_stat_database_temp_bytes{job="postgres", datname="gqueue"}[5m]) - -10. Deadlocks Painel stat ou timeseries - -increase(pg_stat_database_deadlocks{job="postgres", datname="gqueue"}[15m]) - -Queries boas para tabelas -11. Top 10 tabelas com mais linhas vivas Painel bar chart - -topk(10, pg_stat_user_tables_n_live_tup{job="postgres", datname="gqueue"}) - -12. Top 10 tabelas com mais dead tuples Painel bar chart - -topk(10, pg_stat_user_tables_n_dead_tup{job="postgres", datname="gqueue"}) - -13. Seq scan por tabela Painel bar chart - -topk(10, rate(pg_stat_user_tables_seq_scan{job="postgres", datname="gqueue"}[5m])) - -14. Index scan por tabela Painel bar chart - -topk(10, rate(pg_stat_user_tables_idx_scan{job="postgres", datname="gqueue"}[5m])) - -15. Inserts por segundo Painel timeseries - -sum(rate(pg_stat_database_tup_inserted{job="postgres", datname="gqueue"}[5m])) - -16. Updates por segundo Painel timeseries - -sum(rate(pg_stat_database_tup_updated{job="postgres", datname="gqueue"}[5m])) - -17. Deletes por segundo Painel timeseries - -sum(rate(pg_stat_database_tup_deleted{job="postgres", datname="gqueue"}[5m])) - -Queries de background writer -18. Buffers escritos por checkpoints Painel timeseries - -rate(pg_stat_bgwriter_buffers_checkpoint{job="postgres"}[5m]) - -19. Buffers escritos pelo backend Painel timeseries - -rate(pg_stat_bgwriter_buffers_backend{job="postgres"}[5m]) - -20. Checkpoints solicitados Painel timeseries - -rate(pg_stat_bgwriter_checkpoints_req{job="postgres"}[5m]) - - - -Se você usar cAdvisor -As queries mais comuns no Grafana ficam assim. - -CPU do container Postgres -sum(rate(container_cpu_usage_seconds_total{container_label_com_docker_compose_service="postgres"}[5m])) - -CPU em percentual - -100 * sum(rate(container_cpu_usage_seconds_total{container_label_com_docker_compose_service="postgres"}[5m])) -Memória em uso - -container_memory_working_set_bytes{container_label_com_docker_compose_service="postgres"} / 1024 / 1024 -Memória total / RSS - -container_memory_rss{container_label_com_docker_compose_service="postgres"} / 1024 / 1024 -Limite de memória - -container_spec_memory_limit_bytes{container_label_com_docker_compose_service="postgres"} / 1024 / 1024 -Percentual de memória usada - -100 * -container_memory_working_set_bytes{container_label_com_docker_compose_service="postgres"} -/ -container_spec_memory_limit_bytes{container_label_com_docker_compose_service="postgres"}