Skip to content

Commit 770d34f

Browse files
committed
feat(metric): upgrade metrics
1 parent b94992a commit 770d34f

15 files changed

Lines changed: 4471 additions & 210 deletions

File tree

docs/grafana_dashboard.json

Lines changed: 3457 additions & 0 deletions
Large diffs are not rendered by default.

docs/metric.md

Lines changed: 782 additions & 0 deletions
Large diffs are not rendered by default.

metric.go

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ var (
1818
boxMetricGauge = metric.Default.NewGaugeVec(
1919
"box_info",
2020
"Information about the box config and environment.",
21-
[]string{"name", "version", "tags", "ip", "localhost", "start"})
21+
[]string{"tags", "ip", "localhost", "start"})
2222
)
2323

2424
func (boxMetric) Name() string {
@@ -27,8 +27,6 @@ func (boxMetric) Name() string {
2727

2828
func (boxMetric) Serve(ctx context.Context) error {
2929
boxMetricGauge.WithLabelValues(
30-
config.ServiceName(),
31-
config.ServiceVersion(),
3230
strings.Join(config.ServiceTag(), ","),
3331
system.IP(),
3432
system.Hostname(),

pkg/client/gormx/metric.go

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -24,19 +24,19 @@ const (
2424
)
2525

2626
var (
27-
metricConnIdle = metric.NewGaugeVec("db_connections_idle", `The number of idle connections.`, []string{labelDriver, labelDatabase})
28-
metricConnInUse = metric.NewGaugeVec("db_connections_in_use", `The number of connections currently in use.`, []string{labelDriver, labelDatabase})
29-
metricConnOpen = metric.NewGaugeVec("db_connections_open", `The number of established connections both in use and idle.`, []string{labelDriver, labelDatabase})
30-
metricConnMaxOpen = metric.NewGaugeVec("db_connections_max_open", `Maximum number of open connections to the database.`, []string{labelDriver, labelDatabase})
31-
metricWaitCount = metric.NewGaugeVec("db_wait_count", `The total number of connections waited for.`, []string{labelDriver, labelDatabase})
32-
metricWaitDuration = metric.NewGaugeVec("db_wait_duration_seconds", `The total time blocked waiting for a new connection.`, []string{labelDriver, labelDatabase})
33-
metricSQLSeconds = metric.NewSummaryVec("db_sql_seconds", `All queries requested seconds`, []string{labelDriver, labelDatabase, labelType, labelError}, map[float64]float64{
34-
0.5: 0.05,
35-
0.75: 0.05,
36-
0.9: 0.01,
37-
0.99: 0.001,
38-
1: 0.001,
39-
})
27+
metricConnIdle = metric.NewGaugeVec("db_client_connections_idle", `The number of idle connections.`, []string{labelDriver, labelDatabase})
28+
metricConnInUse = metric.NewGaugeVec("db_client_connections_in_use", `The number of connections currently in use.`, []string{labelDriver, labelDatabase})
29+
metricConnOpen = metric.NewGaugeVec("db_client_connections_open", `The number of established connections both in use and idle.`, []string{labelDriver, labelDatabase})
30+
metricConnMaxOpen = metric.NewGaugeVec("db_client_connections_max_open", `Maximum number of open connections to the database.`, []string{labelDriver, labelDatabase})
31+
metricWaitCount = metric.NewGaugeVec("db_client_connections_wait_total", `The total number of connections waited for.`, []string{labelDriver, labelDatabase})
32+
metricWaitDuration = metric.NewGaugeVec("db_client_connections_wait_seconds", `The total time blocked waiting for a new connection.`, []string{labelDriver, labelDatabase})
33+
metricSQLDuration = metric.NewHistogramVec(
34+
"db_client_request_duration_seconds",
35+
"The SQL execution latencies in seconds.",
36+
[]string{labelDriver, labelDatabase, labelType, "result"},
37+
// 250us, 500us, 1ms, 2.5ms, 5ms, 10ms, 25ms, 50ms, 100ms, 250ms, 500ms, 1s, 2.5s
38+
[]float64{0.00025, 0.0005, 0.001, 0.0025, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5},
39+
)
4040
)
4141

4242
func newMetric(driver, database string, statsInterval time.Duration) *Metric {
@@ -115,11 +115,11 @@ func (m *Metric) beforeCallback(db *DB) {
115115

116116
func (m *Metric) afterCallback(cmdType string) func(*DB) {
117117
return func(db *DB) {
118-
err := ""
118+
result := "success"
119119
second := 0.0
120120

121121
if db.Statement.Error != nil {
122-
err = db.Statement.Error.Error()
122+
result = "error"
123123
}
124124

125125
if ts, ok := db.InstanceGet("startTime"); ok {
@@ -128,7 +128,7 @@ func (m *Metric) afterCallback(cmdType string) func(*DB) {
128128
}
129129
}
130130

131-
metricSQLSeconds.WithLabelValues(m.driver, m.database, cmdType, err).Observe(second)
131+
metricSQLDuration.WithLabelValues(m.driver, m.database, cmdType, result).Observe(second)
132132
}
133133
}
134134

pkg/client/mongodb/metric.go

Lines changed: 13 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -19,25 +19,20 @@ type (
1919

2020
var (
2121
cmdTotal = metric.NewCounterVec(
22-
"mongo_client_command_total",
23-
"mongodb client command counter",
24-
[]string{"command", "error"},
22+
"mongo_client_requests_total",
23+
"The total number of MongoDB commands executed.",
24+
[]string{"command", "result"},
2525
)
26-
cmdDuration = metric.NewSummaryVec(
27-
"mongo_client_command_duration_seconds",
28-
"mongodb client command duration seconds",
29-
[]string{"command", "error"},
30-
map[float64]float64{
31-
0.5: 0.05,
32-
0.75: 0.05,
33-
0.9: 0.01,
34-
0.99: 0.001,
35-
1: 0.001,
36-
},
26+
cmdDuration = metric.NewHistogramVec(
27+
"mongo_client_request_duration_seconds",
28+
"The MongoDB command latencies in seconds.",
29+
[]string{"command", "result"},
30+
// 250us, 500us, 1ms, 2.5ms, 5ms, 10ms, 25ms, 50ms, 100ms, 250ms, 500ms, 1s, 2.5s
31+
[]float64{0.00025, 0.0005, 0.001, 0.0025, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5},
3732
)
3833
workingSession = metric.NewGaugeVec(
39-
"mongo_client_session_in_progress",
40-
"mongo client session in progress gauge",
34+
"mongo_client_sessions_inflight",
35+
"The number of MongoDB sessions currently in progress.",
4136
[]string{},
4237
)
4338
)
@@ -80,15 +75,15 @@ func (mon *metricMonitor) Started(ctx context.Context, ev *event.CommandStartedE
8075
}
8176

8277
func (mon *metricMonitor) Succeeded(ctx context.Context, ev *event.CommandSucceededEvent) {
83-
labels := []string{ev.CommandName, ""}
78+
labels := []string{ev.CommandName, "success"}
8479
cmdTotal.WithLabelValues(labels...).Inc()
8580
cmdDuration.WithLabelValues(labels...).Observe(time.Duration(ev.DurationNanos).Seconds())
8681

8782
logger.Trace(ctx).Debugf("mongo_command_success cmd: %s, reqId: %d, connId: %s, duration: %s", ev.CommandName, ev.RequestID, ev.ConnectionID, time.Duration(ev.DurationNanos))
8883
}
8984

9085
func (mon *metricMonitor) Failed(ctx context.Context, ev *event.CommandFailedEvent) {
91-
labels := []string{ev.CommandName, ev.Failure}
86+
labels := []string{ev.CommandName, "error"}
9287
cmdTotal.WithLabelValues(labels...).Inc()
9388
cmdDuration.WithLabelValues(labels...).Observe(time.Duration(ev.DurationNanos).Seconds())
9489

pkg/client/redis/logger.go

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,18 @@ import (
1010

1111
type (
1212
Logger struct {
13-
cfg *Config
13+
cfg *Config
14+
addr string
1415
}
1516
)
1617

18+
func newLogger(cfg *Config) *Logger {
19+
return &Logger{
20+
cfg: cfg,
21+
addr: strings.Join(cfg.Address, ","),
22+
}
23+
}
24+
1725
func (inst *Logger) DialHook(next redis.DialHook) redis.DialHook {
1826
return next
1927
}
@@ -58,7 +66,7 @@ func (inst *Logger) log(ctx context.Context, pipe bool, cmds ...redis.Cmder) {
5866

5967
if len(errArr) > 0 {
6068
logger.Trace(ctx).Errorw("Redis.Error",
61-
"address", strings.Join(inst.cfg.Address, ","),
69+
"address", inst.addr,
6270
"db", inst.cfg.DB,
6371
"err", strings.Join(errArr, ";"),
6472
"cmd", strings.Join(cmdArr, ";"),

pkg/client/redis/metric.go

Lines changed: 33 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -2,40 +2,42 @@ package redis
22

33
import (
44
"context"
5-
"fmt"
5+
"strconv"
66
"strings"
77
"time"
88

99
"github.com/boxgo/box/pkg/metric"
10-
"github.com/boxgo/box/pkg/trace"
1110
"github.com/redis/go-redis/v9"
1211
)
1312

1413
type (
1514
Metric struct {
16-
cfg *Config
15+
cfg *Config
16+
addr string
1717
}
1818

1919
startKey struct{}
2020
)
2121

22+
func newMetric(cfg *Config) *Metric {
23+
return &Metric{
24+
cfg: cfg,
25+
addr: strings.Join(cfg.Address, ","),
26+
}
27+
}
28+
2229
var (
2330
cmdTotal = metric.NewCounterVec(
24-
"redis_client_command_total",
25-
"redis command counter",
26-
[]string{"bid", "address", "db", "masterName", "pipe", "cmd", "error"},
31+
"redis_client_requests_total",
32+
"The total number of Redis commands executed.",
33+
[]string{"address", "db", "masterName", "pipe", "cmd", "result"},
2734
)
28-
cmdDuration = metric.NewSummaryVec(
29-
"redis_client_command_duration_seconds",
30-
"redis command duration seconds",
31-
[]string{"bid", "address", "db", "masterName", "pipe", "cmd", "error"},
32-
map[float64]float64{
33-
0.5: 0.05,
34-
0.75: 0.05,
35-
0.9: 0.01,
36-
0.99: 0.001,
37-
1: 0.001,
38-
},
35+
cmdDuration = metric.NewHistogramVec(
36+
"redis_client_request_duration_seconds",
37+
"The Redis command latencies in seconds.",
38+
[]string{"address", "db", "masterName", "pipe", "cmd", "result"},
39+
// 100us, 250us, 500us, 1ms, 2.5ms, 5ms, 10ms, 25ms, 50ms, 100ms, 250ms, 500ms
40+
[]float64{0.0001, 0.00025, 0.0005, 0.001, 0.0025, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5},
3941
)
4042
)
4143

@@ -68,38 +70,33 @@ func (m *Metric) ProcessPipelineHook(next redis.ProcessPipelineHook) redis.Proce
6870
}
6971

7072
func (m *Metric) report(ctx context.Context, pipe bool, elapsed time.Duration, cmds ...redis.Cmder) {
71-
addressStr := strings.Join(m.cfg.Address, ",")
72-
dbStr := fmt.Sprintf("%d", m.cfg.DB)
73-
masterNameStr := m.cfg.MasterName
74-
errStr := ""
7573
cmdStr := ""
76-
pipeStr := fmt.Sprintf("%t", pipe)
74+
result := "success"
75+
masterNameStr := m.cfg.MasterName
76+
addressStr := m.addr
77+
dbStr := strconv.Itoa(m.cfg.DB)
78+
pipeStr := strconv.FormatBool(pipe)
79+
80+
if pipe {
81+
cmdStr = "pipeline"
82+
} else if len(cmds) > 0 {
83+
cmdStr = cmds[0].Name()
84+
}
7785

7886
for _, cmd := range cmds {
79-
cmdStr += cmd.Name() + ";"
80-
8187
if err := cmd.Err(); err != nil && err != redis.Nil {
82-
errStr += err.Error() + ";"
88+
result = "error"
89+
break
8390
}
8491
}
85-
cmdStr = strings.TrimSuffix(cmdStr, ";")
86-
87-
var (
88-
bizID string
89-
)
90-
91-
if bizIDStr, ok := ctx.Value(trace.BizID()).(string); ok {
92-
bizID = bizIDStr
93-
}
9492

9593
values := []string{
96-
bizID,
9794
addressStr,
9895
dbStr,
9996
masterNameStr,
10097
pipeStr,
10198
cmdStr,
102-
errStr,
99+
result,
103100
}
104101

105102
cmdDuration.WithLabelValues(values...).Observe(elapsed.Seconds())

pkg/client/redis/redis.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ func newRedis(cfg *Config) *Redis {
2626
MinIdleConns: cfg.MinIdleConnCnt,
2727
})
2828

29-
client.AddHook(&Metric{cfg: cfg})
30-
client.AddHook(&Logger{cfg: cfg})
29+
client.AddHook(newMetric(cfg))
30+
client.AddHook(newLogger(cfg))
3131

3232
if err := redisotel.InstrumentTracing(client); err != nil {
3333
logger.Panicf("Redis.InstrumentTracing.Error: %s", err)

pkg/client/wukong/metric.go

Lines changed: 31 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
package wukong
22

33
import (
4+
"context"
45
"strconv"
6+
"strings"
57
"time"
68

79
"github.com/boxgo/box/pkg/metric"
8-
"golang.org/x/net/context"
910
)
1011

1112
type (
@@ -18,36 +19,39 @@ const (
1819

1920
var (
2021
requestInflight = metric.NewGaugeVec(
21-
"http_client_request_in_process",
22-
"http client requesting",
22+
"http_client_requests_inflight",
23+
"The number of HTTP client requests currently in flight.",
2324
[]string{"method", "baseUrl", "url"},
2425
)
2526
requestCounter = metric.NewCounterVec(
26-
"http_client_request_total",
27-
"http client request counter",
28-
[]string{"method", "baseUrl", "url", "statusCode", "error"},
27+
"http_client_requests_total",
28+
"The total number of HTTP client requests sent.",
29+
[]string{"method", "baseUrl", "url", "status", "error"},
2930
)
30-
requestDuration = metric.NewSummaryVec(
31-
"http_client_request_seconds",
32-
"http client request duration",
33-
[]string{"method", "baseUrl", "url", "statusCode", "error"},
34-
map[float64]float64{
35-
0.5: 0.05,
36-
0.75: 0.05,
37-
0.9: 0.01,
38-
0.99: 0.001,
39-
1: 0.001,
40-
},
31+
requestDuration = metric.NewHistogramVec(
32+
"http_client_request_duration_seconds",
33+
"The HTTP client request latencies in seconds.",
34+
[]string{"method", "baseUrl", "url", "status", "error"},
35+
// 5ms, 10ms, 25ms, 50ms, 100ms, 250ms, 500ms, 1s, 2.5s, 5s, 10s
36+
[]float64{0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10},
4137
)
4238
)
4339

40+
// stripQuery removes query parameters and fragment from URL
41+
func stripQuery(url string) string {
42+
if idx := strings.IndexAny(url, "?#"); idx != -1 {
43+
return url[:idx]
44+
}
45+
return url
46+
}
47+
4448
func metricStart(request *Request) error {
4549
if val, ok := request.Context.Value(metricSwitchKey).(bool); ok && !val {
4650
return nil
4751
}
4852

49-
requestInflight.WithLabelValues(request.Method, request.BaseUrl, request.Url).Inc()
50-
53+
url := stripQuery(request.Url)
54+
requestInflight.WithLabelValues(request.Method, request.BaseUrl, url).Inc()
5155
request.Context = context.WithValue(request.Context, metricDurationKey{}, time.Now())
5256

5357
return nil
@@ -59,22 +63,23 @@ func metricEnd(request *Request, resp *Response) error {
5963
}
6064

6165
var (
62-
errMsg = ""
63-
duration = time.Duration(0)
64-
statusCode = strconv.Itoa(resp.StatusCode())
66+
errMsg = ""
67+
duration = time.Duration(0)
68+
status = strconv.Itoa(resp.StatusCode())
6569
)
6670

6771
if resp.Error() != nil {
68-
errMsg = resp.Error().Error()
72+
errMsg = "error"
6973
}
7074

7175
if start, ok := request.Context.Value(metricDurationKey{}).(time.Time); ok {
7276
duration = time.Since(start)
7377
}
7478

75-
requestInflight.WithLabelValues(request.Method, request.BaseUrl, request.Url).Dec()
76-
requestCounter.WithLabelValues(request.Method, request.BaseUrl, request.Url, statusCode, errMsg).Inc()
77-
requestDuration.WithLabelValues(request.Method, request.BaseUrl, request.Url, statusCode, errMsg).Observe(duration.Seconds())
79+
url := stripQuery(request.Url)
80+
requestInflight.WithLabelValues(request.Method, request.BaseUrl, url).Dec()
81+
requestCounter.WithLabelValues(request.Method, request.BaseUrl, url, status, errMsg).Inc()
82+
requestDuration.WithLabelValues(request.Method, request.BaseUrl, url, status, errMsg).Observe(duration.Seconds())
7883

7984
return nil
8085
}

pkg/metric/metric.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,9 @@ func (m *Metric) Serve(context.Context) error {
5252
defer ticker.Stop()
5353

5454
pusher := push.
55-
New(m.cfg.PushTargetURL, config.ServiceName()+"-"+config.ServiceVersion()).
55+
New(m.cfg.PushTargetURL, config.ServiceName()).
5656
Gatherer(prometheus.DefaultRegisterer.(prometheus.Gatherer)).
57+
Grouping("namespace", config.ServiceNamespace()).
5758
Grouping("instance", system.Hostname())
5859

5960
for {

0 commit comments

Comments
 (0)