From 72cf229704aa07355610551764d60052f80faf4c Mon Sep 17 00:00:00 2001 From: Jan Grodowski Date: Tue, 26 May 2026 18:48:05 +0200 Subject: [PATCH 1/7] Fix resume data loss: route heartbeat coords through applyEventsQueue (#1684) * Fix resume data loss: route heartbeat coords through applyEventsQueue onChangelogHeartbeatEvent was mutating applier.CurrentCoordinates directly from the streamer goroutine, before any DML that preceded the heartbeat was applied to the ghost table. The checkpoint loop reads CurrentCoordinates as "applied through this GTID" and could persist a checkpoint whose LastTrxCoords was ahead of what was actually applied. If gh-ost crashed before applyEventsQueue drained, --resume read that checkpoint and called StartSyncGTID with the persisted set; MySQL treated the un-applied GTIDs as already-seen and never re-streamed them. The ghost table silently lost those DMLs and cut-over produced a stale table. Fix: enqueue a tableWriteFunc onto applyEventsQueue that performs the coords bump. The apply goroutine executes it in order, after the DMLs the streamer enqueued before the heartbeat, restoring the invariant. Adds TestMigratorHeartbeatDoesNotAdvancePastUnappliedDML, which fails at the previous HEAD and passes after the fix; also asserts queue ordering to guard against future changes that wrap the heartbeat enqueue in a goroutine. Co-authored-by: Bastian Bartmann * Replace direct channel write with SendWithContext Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --------- Co-authored-by: Bastian Bartmann Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- go/logic/migrator.go | 19 ++++++++-- go/logic/migrator_test.go | 75 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+), 3 deletions(-) diff --git a/go/logic/migrator.go b/go/logic/migrator.go index 90fa8c509..226cf13a7 100644 --- a/go/logic/migrator.go +++ b/go/logic/migrator.go @@ -336,13 +336,26 @@ func (mgtr *Migrator) onChangelogHeartbeatEvent(dmlEntry *binlog.BinlogEntry) (e heartbeatTime, err := time.Parse(time.RFC3339Nano, changelogHeartbeatString) if err != nil { return mgtr.migrationContext.Log.Errore(err) - } else { - mgtr.migrationContext.SetLastHeartbeatOnChangelogTime(heartbeatTime) + } + mgtr.migrationContext.SetLastHeartbeatOnChangelogTime(heartbeatTime) + + // Route the coords bump through applyEventsQueue so it is ordered after + // any DMLs the streamer enqueued before this heartbeat. + coords := dmlEntry.Coordinates + var writeFunc tableWriteFunc = func() error { mgtr.applier.CurrentCoordinatesMutex.Lock() - mgtr.applier.CurrentCoordinates = dmlEntry.Coordinates + mgtr.applier.CurrentCoordinates = coords mgtr.applier.CurrentCoordinatesMutex.Unlock() return nil } + if err := base.SendWithContext( + mgtr.migrationContext.GetContext(), + mgtr.applyEventsQueue, + newApplyEventStructByFunc(&writeFunc), + ); err != nil { + return mgtr.migrationContext.Log.Errore(err) + } + return nil } // abort stores the error, cancels the context, and logs the abort. diff --git a/go/logic/migrator_test.go b/go/logic/migrator_test.go index 8fc48e326..95278fc3d 100644 --- a/go/logic/migrator_test.go +++ b/go/logic/migrator_test.go @@ -194,6 +194,81 @@ func TestMigratorOnChangelogEvent(t *testing.T) { }) } +// Regression: heartbeats must not advance applier.CurrentCoordinates past +// DMLs still sitting in applyEventsQueue. If they do, checkpointLoop will +// persist a GTID set that includes un-applied transactions, and resume via +// StartSyncGTID will skip them (the server treats them as already-seen). +func TestMigratorHeartbeatDoesNotAdvancePastUnappliedDML(t *testing.T) { + migrationContext := base.NewMigrationContext() + migrationContext.UseGTIDs = true + migrator := NewMigrator(migrationContext, "test") + migrator.applier = NewApplier(migrationContext) + + const srcUUID = "00000000-0000-0000-0000-000000000001" + + // A DML on the original table at GTID :100 is observed and enqueued, but + // not yet applied. + dmlCoords, err := mysql.NewGTIDBinlogCoordinates(srcUUID + ":1-100") + require.NoError(t, err) + migrator.applyEventsQueue <- newApplyEventStructByDML(&binlog.BinlogEntry{ + DmlEvent: &binlog.BinlogDMLEvent{ + DatabaseName: "test", + TableName: migrationContext.OriginalTableName, + DML: binlog.UpdateDML, + }, + Coordinates: dmlCoords, + }) + require.Equal(t, 1, len(migrator.applyEventsQueue), + "DML must be sitting un-applied in the queue") + + // A heartbeat row is then written; its GTID set includes the un-applied + // DML plus a few additional transactions. + heartbeatCoords, err := mysql.NewGTIDBinlogCoordinates(srcUUID + ":1-105") + require.NoError(t, err) + heartbeatColumnValues := sql.ToColumnValues([]interface{}{ + 123, + time.Now().Unix(), + "heartbeat", + time.Now().Format(time.RFC3339Nano), + }) + require.NoError(t, migrator.onChangelogHeartbeatEvent(&binlog.BinlogEntry{ + DmlEvent: &binlog.BinlogDMLEvent{ + DatabaseName: "test", + DML: binlog.InsertDML, + NewColumnValues: heartbeatColumnValues, + }, + Coordinates: heartbeatCoords, + })) + + // The DML is still un-applied; the heartbeat's coords-bump sentinel has + // been enqueued behind it. + require.Equal(t, 2, len(migrator.applyEventsQueue), + "queue must hold the un-applied DML and the heartbeat sentinel; "+ + "this test does not drain the queue") + + // Invariant: CurrentCoordinates must NOT have advanced past the queued DML. + currentCoords := migrator.applier.CurrentCoordinates + require.False(t, currentCoords != nil && dmlCoords.SmallerThanOrEquals(currentCoords), + "CurrentCoordinates must not cover the un-applied DML at %s (got %v)", + dmlCoords.DisplayString(), currentCoords) + + // Consequence: the checkpoint gate in Migrator.Checkpoint must NOT fire + // for streamer coords that include the un-applied DML. + require.False(t, currentCoords != nil && heartbeatCoords.SmallerThanOrEquals(currentCoords), + "checkpoint gate must not fire while DML at %s is un-applied", + dmlCoords.DisplayString()) + + // Ordering: the DML must come first, then the heartbeat sentinel. If a + // future change ever wraps the heartbeat enqueue in `go func()`, this + // invariant breaks and the bug returns. + firstQueued := <-migrator.applyEventsQueue + secondQueued := <-migrator.applyEventsQueue + require.NotNil(t, firstQueued.dmlEvent, "first queued event must be the DML") + require.Nil(t, firstQueued.writeFunc, "first queued event must not be a sentinel") + require.Nil(t, secondQueued.dmlEvent, "second queued event must not be a DML") + require.NotNil(t, secondQueued.writeFunc, "second queued event must be the heartbeat sentinel") +} + func TestMigratorValidateStatement(t *testing.T) { t.Run("add-column", func(t *testing.T) { migrationContext := base.NewMigrationContext() From e59af3a44fd20e78bb68a6c25c1ac9d49baea12e Mon Sep 17 00:00:00 2001 From: Patrick Begley <210335+forge33@users.noreply.github.com> Date: Wed, 27 May 2026 12:32:15 -0400 Subject: [PATCH 2/7] Add Datadog/statsd with simple client emitting startup (#1689) --- go.mod | 1 + go.sum | 22 + go/base/context.go | 3 + go/cmd/gh-ost/main.go | 26 + go/metrics/client.go | 69 ++ go/metrics/client_test.go | 57 ++ .../DataDog/datadog-go/v5/LICENSE.txt | 19 + .../DataDog/datadog-go/v5/statsd/README.md | 4 + .../datadog-go/v5/statsd/aggregator.go | 349 +++++++ .../DataDog/datadog-go/v5/statsd/buffer.go | 208 ++++ .../datadog-go/v5/statsd/buffer_pool.go | 40 + .../v5/statsd/buffered_metric_context.go | 104 ++ .../DataDog/datadog-go/v5/statsd/container.go | 19 + .../datadog-go/v5/statsd/container_linux.go | 219 ++++ .../datadog-go/v5/statsd/container_stub.go | 17 + .../datadog-go/v5/statsd/error_handler.go | 22 + .../DataDog/datadog-go/v5/statsd/event.go | 75 ++ .../datadog-go/v5/statsd/external_env.go | 46 + .../DataDog/datadog-go/v5/statsd/fnv1a.go | 39 + .../DataDog/datadog-go/v5/statsd/format.go | 306 ++++++ .../DataDog/datadog-go/v5/statsd/metrics.go | 283 ++++++ .../DataDog/datadog-go/v5/statsd/noop.go | 118 +++ .../DataDog/datadog-go/v5/statsd/options.go | 443 ++++++++ .../DataDog/datadog-go/v5/statsd/pipe.go | 13 + .../datadog-go/v5/statsd/pipe_windows.go | 81 ++ .../DataDog/datadog-go/v5/statsd/sender.go | 145 +++ .../datadog-go/v5/statsd/service_check.go | 57 ++ .../DataDog/datadog-go/v5/statsd/statsd.go | 318 ++++++ .../datadog-go/v5/statsd/statsd_direct.go | 69 ++ .../DataDog/datadog-go/v5/statsd/statsdex.go | 953 ++++++++++++++++++ .../datadog-go/v5/statsd/tag_cardinality.go | 78 ++ .../DataDog/datadog-go/v5/statsd/telemetry.go | 307 ++++++ .../DataDog/datadog-go/v5/statsd/udp.go | 39 + .../DataDog/datadog-go/v5/statsd/uds.go | 190 ++++ .../datadog-go/v5/statsd/uds_windows.go | 15 + .../DataDog/datadog-go/v5/statsd/utils.go | 32 + .../DataDog/datadog-go/v5/statsd/worker.go | 158 +++ vendor/modules.txt | 3 + 38 files changed, 4947 insertions(+) create mode 100644 go/metrics/client.go create mode 100644 go/metrics/client_test.go create mode 100644 vendor/github.com/DataDog/datadog-go/v5/LICENSE.txt create mode 100644 vendor/github.com/DataDog/datadog-go/v5/statsd/README.md create mode 100644 vendor/github.com/DataDog/datadog-go/v5/statsd/aggregator.go create mode 100644 vendor/github.com/DataDog/datadog-go/v5/statsd/buffer.go create mode 100644 vendor/github.com/DataDog/datadog-go/v5/statsd/buffer_pool.go create mode 100644 vendor/github.com/DataDog/datadog-go/v5/statsd/buffered_metric_context.go create mode 100644 vendor/github.com/DataDog/datadog-go/v5/statsd/container.go create mode 100644 vendor/github.com/DataDog/datadog-go/v5/statsd/container_linux.go create mode 100644 vendor/github.com/DataDog/datadog-go/v5/statsd/container_stub.go create mode 100644 vendor/github.com/DataDog/datadog-go/v5/statsd/error_handler.go create mode 100644 vendor/github.com/DataDog/datadog-go/v5/statsd/event.go create mode 100644 vendor/github.com/DataDog/datadog-go/v5/statsd/external_env.go create mode 100644 vendor/github.com/DataDog/datadog-go/v5/statsd/fnv1a.go create mode 100644 vendor/github.com/DataDog/datadog-go/v5/statsd/format.go create mode 100644 vendor/github.com/DataDog/datadog-go/v5/statsd/metrics.go create mode 100644 vendor/github.com/DataDog/datadog-go/v5/statsd/noop.go create mode 100644 vendor/github.com/DataDog/datadog-go/v5/statsd/options.go create mode 100644 vendor/github.com/DataDog/datadog-go/v5/statsd/pipe.go create mode 100644 vendor/github.com/DataDog/datadog-go/v5/statsd/pipe_windows.go create mode 100644 vendor/github.com/DataDog/datadog-go/v5/statsd/sender.go create mode 100644 vendor/github.com/DataDog/datadog-go/v5/statsd/service_check.go create mode 100644 vendor/github.com/DataDog/datadog-go/v5/statsd/statsd.go create mode 100644 vendor/github.com/DataDog/datadog-go/v5/statsd/statsd_direct.go create mode 100644 vendor/github.com/DataDog/datadog-go/v5/statsd/statsdex.go create mode 100644 vendor/github.com/DataDog/datadog-go/v5/statsd/tag_cardinality.go create mode 100644 vendor/github.com/DataDog/datadog-go/v5/statsd/telemetry.go create mode 100644 vendor/github.com/DataDog/datadog-go/v5/statsd/udp.go create mode 100644 vendor/github.com/DataDog/datadog-go/v5/statsd/uds.go create mode 100644 vendor/github.com/DataDog/datadog-go/v5/statsd/uds_windows.go create mode 100644 vendor/github.com/DataDog/datadog-go/v5/statsd/utils.go create mode 100644 vendor/github.com/DataDog/datadog-go/v5/statsd/worker.go diff --git a/go.mod b/go.mod index ad236e09e..91b81bc54 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module github.com/github/gh-ost go 1.25.9 require ( + github.com/DataDog/datadog-go/v5 v5.8.3 github.com/go-ini/ini v1.67.0 github.com/go-mysql-org/go-mysql v1.11.0 github.com/go-sql-driver/mysql v1.8.1 diff --git a/go.sum b/go.sum index 39fd9cd48..e39b587e8 100644 --- a/go.sum +++ b/go.sum @@ -7,8 +7,11 @@ github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24/go.mod h github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 h1:UQHMgLO+TxOElx5B5HZ4hJQsoJ/PvUvKRhJHDQXO8P8= github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/DataDog/datadog-go/v5 v5.8.3 h1:s58CUJ9s8lezjhTNJO/SxkPBv2qZjS3ktpRSqGF5n0s= +github.com/DataDog/datadog-go/v5 v5.8.3/go.mod h1:K9kcYBlxkcPP8tvvjZZKs/m1edNAUFzBbdpTUKfCsuw= github.com/Masterminds/semver v1.5.0 h1:H65muMkzWKEuNDnfl9d70GUjFniHKHRbFPGBuZ3QEww= github.com/Masterminds/semver v1.5.0/go.mod h1:MB6lktGJrhw8PrUyiEoblNEGEQ+RzHPF078ddwwvV3Y= +github.com/Microsoft/go-winio v0.5.0/go.mod h1:JPGBdM1cNvN/6ISo+n8V5iA4v8pBzdOpzfwIujj1a84= github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY= github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU= github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= @@ -54,6 +57,7 @@ github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/golang/mock v1.6.0/go.mod h1:p6yTPP+5HYm5mzsMV8JkE6ZKdX+/wYM6Hr+LicevLPs= github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= @@ -120,14 +124,21 @@ github.com/shopspring/decimal v1.2.0 h1:abSATXmQEYyShuxI4/vyW3tV1MrKAJzCZ/0zLUXY github.com/shopspring/decimal v1.2.0/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o= github.com/siddontang/go-log v0.0.0-20180807004314-8d05993dda07 h1:oI+RNwuC9jF2g2lP0u0cVEEZrc/AYBCuFdvwrLWM/6Q= github.com/siddontang/go-log v0.0.0-20180807004314-8d05993dda07/go.mod h1:yFdBgwXP24JziuRl2NMUahT7nGLNOKi1SIiFxMttVD4= +github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/testcontainers/testcontainers-go v0.37.0 h1:L2Qc0vkTw2EHWQ08djon0D2uw7Z/PtHS/QzZZ5Ra/hg= @@ -140,6 +151,7 @@ github.com/tklauser/numcpus v0.6.1 h1:ng9scYS7az0Bk4OZLvrNXNSAO2Pxr1XXRAPyjhIx+F github.com/tklauser/numcpus v0.6.1/go.mod h1:1XfjsgE2zo8GVw7POkMbHENHzVg3GzmoZ9fESEdAacY= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0= github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= @@ -183,29 +195,38 @@ golang.org/x/crypto v0.45.0/go.mod h1:XTGrrkGJve7CYK7J8PEww4aY7gM3qMCElcJQ8n8JdX golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY= golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.18.0 h1:kr88TuHDroi+UVf+0hZnirlk8o8T+4MrK6mr60WkH/I= golang.org/x/sync v0.18.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc= golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.37.0 h1:8EGAD0qCmHYZg6J17DvsMy9/wJ7/D/4pV/wfnld5lTU= golang.org/x/term v0.37.0/go.mod h1:5pB4lxRNYYVZuTLmy8oR2BH8dflOR+IbTYFD8fi3254= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -221,6 +242,7 @@ golang.org/x/tools v0.0.0-20191108193012-7d206e10da11/go.mod h1:b+2E5dAYhXwXZwtn golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/go/base/context.go b/go/base/context.go index 617e5bb13..26d13fe07 100644 --- a/go/base/context.go +++ b/go/base/context.go @@ -19,6 +19,7 @@ import ( uuid "github.com/google/uuid" + "github.com/github/gh-ost/go/metrics" "github.com/github/gh-ost/go/mysql" "github.com/github/gh-ost/go/sql" "github.com/openark/golib/log" @@ -237,6 +238,8 @@ type MigrationContext struct { AbortError error abortMutex *sync.Mutex + Metrics *metrics.Client + OriginalTableColumnsOnApplier *sql.ColumnList OriginalTableColumns *sql.ColumnList OriginalTableVirtualColumns *sql.ColumnList diff --git a/go/cmd/gh-ost/main.go b/go/cmd/gh-ost/main.go index 567137fd5..f30c439a4 100644 --- a/go/cmd/gh-ost/main.go +++ b/go/cmd/gh-ost/main.go @@ -16,6 +16,7 @@ import ( "github.com/github/gh-ost/go/base" "github.com/github/gh-ost/go/logic" + "github.com/github/gh-ost/go/metrics" "github.com/github/gh-ost/go/sql" _ "github.com/go-sql-driver/mysql" "github.com/openark/golib/log" @@ -25,6 +26,20 @@ import ( var AppVersion, GitCommit string +type statsdTagList []string + +func (s *statsdTagList) String() string { + if s == nil || len(*s) == 0 { + return "" + } + return fmt.Sprint([]string(*s)) +} + +func (s *statsdTagList) Set(value string) error { + *s = append(*s, value) + return nil +} + // acceptSignals registers for OS signals func acceptSignals(migrationContext *base.MigrationContext) { c := make(chan os.Signal, 1) @@ -156,6 +171,9 @@ func main() { criticalLoad := flag.String("critical-load", "", "Comma delimited status-name=threshold, same format as --max-load. When status exceeds threshold, app panics and quits") flag.Int64Var(&migrationContext.CriticalLoadIntervalMilliseconds, "critical-load-interval-millis", 0, "When 0, migration immediately bails out upon meeting critical-load. When non-zero, a second check is done after given interval, and migration only bails out if 2nd check still meets critical load") flag.Int64Var(&migrationContext.CriticalLoadHibernateSeconds, "critical-load-hibernate-seconds", 0, "When non-zero, critical-load does not panic and bail out; instead, gh-ost goes into hibernation for the specified duration. It will not read/write anything from/to any server") + statsdAddr := flag.String("statsd-addr", "", "StatsD endpoint (host:port or unix socket); empty disables StatsD") + var statsdTags statsdTagList + flag.Var(&statsdTags, "statsd-tags", "global StatsD tags applied to every metric (repeatable), format key:value. Example: --statsd-tags 'env:prod,service:my-service'") quiet := flag.Bool("quiet", false, "quiet") verbose := flag.Bool("verbose", false, "verbose") debug := flag.Bool("debug", false, "debug mode (very verbose)") @@ -375,6 +393,14 @@ func main() { log.Infof("starting gh-ost %+v (git commit: %s)", AppVersion, GitCommit) acceptSignals(migrationContext) + metricsClient, metricsErr := metrics.NewClient(*statsdAddr, []string(statsdTags), "gh_ost.") + if metricsErr != nil { + log.Fatalf("metrics: %v", metricsErr) + } + defer func() { _ = metricsClient.Close() }() + migrationContext.Metrics = metricsClient + metricsClient.Count("startup", 1) + migrator := logic.NewMigrator(migrationContext, AppVersion) var err error if migrationContext.Revert { diff --git a/go/metrics/client.go b/go/metrics/client.go new file mode 100644 index 000000000..ed6acc096 --- /dev/null +++ b/go/metrics/client.go @@ -0,0 +1,69 @@ +/* + Copyright 2022 GitHub Inc. + See https://github.com/github/gh-ost/blob/master/LICENSE +*/ + +package metrics + +import ( + "time" + + "github.com/DataDog/datadog-go/v5/statsd" + "github.com/openark/golib/log" +) + +// Noop is a StatsD client that discards all metrics. NewClient("", ...) returns +// this exact pointer so callers can use `client == metrics.Noop`. +var Noop = &Client{} + +// Client wraps a StatsD client with namespace and global tags (from --statsd-tags). +type Client struct { + sd *statsd.Client +} + +// NewClient connects to addr for StatsD. If addr is empty, returns Noop and nil error. +// namespace is typically "gh_ost." (metrics are named namespace + short name, e.g. gh_ost.startup). +// tags are global tags applied to every metric (repeatable --statsd-tags). +func NewClient(addr string, tags []string, namespace string) (*Client, error) { + if addr == "" { + return Noop, nil + } + sd, err := statsd.New(addr, + statsd.WithNamespace(namespace), + statsd.WithTags(tags), + statsd.WithoutTelemetry(), + statsd.WithoutOriginDetection(), + statsd.WithClientSideAggregation(), + statsd.WithExtendedClientSideAggregation(), + statsd.WithMaxSamplesPerContext(1_000), + statsd.WithMaxBytesPerPayload(8_172), + statsd.WithAggregationInterval(5*time.Second), + ) + if err != nil { + return nil, err + } + log.Infof("metrics: DogStatsD client connected to %s (namespace: %s)", addr, namespace) + return &Client{sd: sd}, nil +} + +func (c *Client) Gauge(name string, value float64, tags ...string) { + if c.sd == nil { + return + } + _ = c.sd.Gauge(name, value, tags, 1.0) +} + +func (c *Client) Count(name string, value int64, tags ...string) { + if c.sd == nil { + return + } + _ = c.sd.Count(name, value, tags, 1.0) +} + +// Close flushes buffered metrics; safe for Noop. +func (c *Client) Close() error { + if c.sd == nil { + return nil + } + return c.sd.Close() +} diff --git a/go/metrics/client_test.go b/go/metrics/client_test.go new file mode 100644 index 000000000..a2fb81261 --- /dev/null +++ b/go/metrics/client_test.go @@ -0,0 +1,57 @@ +/* + Copyright 2022 GitHub Inc. + See https://github.com/github/gh-ost/blob/master/LICENSE +*/ + +package metrics + +import ( + "slices" + "testing" +) + +func TestNewClient_NoAddr_ReturnsNoopSingleton(t *testing.T) { + c, err := NewClient("", []string{"env:test"}, "gh_ost.") + if err != nil { + t.Fatal(err) + } + if c != Noop || c.sd != nil { + t.Fatalf("expected Noop singleton without statsd connection, got %p sd=%v", c, c.sd) + } + if err := c.Close(); err != nil { + t.Fatal(err) + } +} + +func TestMergeTagSlices(t *testing.T) { + tests := []struct { + name string + global []string + perCall []string + want []string + }{ + {"nil_global", nil, []string{"k:v"}, []string{"k:v"}}, + {"empty_extra", []string{"env:prod"}, nil, []string{"env:prod"}}, + {"combined", []string{"env:prod"}, []string{"shard:1"}, []string{"env:prod", "shard:1"}}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := mergeTagSlices(tt.global, tt.perCall) + if !slices.Equal(got, tt.want) { + t.Fatalf("got %#v want %#v", got, tt.want) + } + }) + } +} + +func mergeTagSlices(global, perCall []string) []string { + if len(global) == 0 { + return perCall + } + if len(perCall) == 0 { + return global + } + out := make([]string, 0, len(global)+len(perCall)) + return append(append(out, global...), perCall...) +} diff --git a/vendor/github.com/DataDog/datadog-go/v5/LICENSE.txt b/vendor/github.com/DataDog/datadog-go/v5/LICENSE.txt new file mode 100644 index 000000000..97cd06d7f --- /dev/null +++ b/vendor/github.com/DataDog/datadog-go/v5/LICENSE.txt @@ -0,0 +1,19 @@ +Copyright (c) 2015 Datadog, Inc + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/vendor/github.com/DataDog/datadog-go/v5/statsd/README.md b/vendor/github.com/DataDog/datadog-go/v5/statsd/README.md new file mode 100644 index 000000000..2fc899687 --- /dev/null +++ b/vendor/github.com/DataDog/datadog-go/v5/statsd/README.md @@ -0,0 +1,4 @@ +## Overview + +Package `statsd` provides a Go [dogstatsd](http://docs.datadoghq.com/guides/dogstatsd/) client. Dogstatsd extends Statsd, adding tags +and histograms. diff --git a/vendor/github.com/DataDog/datadog-go/v5/statsd/aggregator.go b/vendor/github.com/DataDog/datadog-go/v5/statsd/aggregator.go new file mode 100644 index 000000000..ed18f8f5c --- /dev/null +++ b/vendor/github.com/DataDog/datadog-go/v5/statsd/aggregator.go @@ -0,0 +1,349 @@ +package statsd + +import ( + "strings" + "sync" + "sync/atomic" + "time" +) + +type ( + countsMap map[string]*countMetric + gaugesMap map[string]*gaugeMetric + setsMap map[string]*setMetric + bufferedMetricMap map[string]*bufferedMetric +) + +type countShard struct { + sync.RWMutex + counts countsMap +} + +type gaugeShard struct { + sync.RWMutex + gauges gaugesMap +} + +type setShard struct { + sync.RWMutex + sets setsMap +} + +type aggregator struct { + nbContextGauge uint64 + nbContextCount uint64 + nbContextSet uint64 + + shardsCount int + countShards []*countShard + gaugeShards []*gaugeShard + setShards []*setShard + + histograms bufferedMetricContexts + distributions bufferedMetricContexts + timings bufferedMetricContexts + + closed chan struct{} + + client *ClientEx + + // aggregator implements channelMode mechanism to receive histograms, + // distributions and timings. Since they need sampling they need to + // lock for random. When using both channelMode and ExtendedAggregation + // we don't want goroutine to fight over the lock. + inputMetrics chan metric + stopChannelMode chan struct{} + wg sync.WaitGroup +} + +func newAggregator(c *ClientEx, maxSamplesPerContext int64, shardsCount int) *aggregator { + agg := &aggregator{ + client: c, + shardsCount: shardsCount, + countShards: make([]*countShard, shardsCount), + gaugeShards: make([]*gaugeShard, shardsCount), + setShards: make([]*setShard, shardsCount), + histograms: newBufferedContexts(newHistogramMetric, maxSamplesPerContext), + distributions: newBufferedContexts(newDistributionMetric, maxSamplesPerContext), + timings: newBufferedContexts(newTimingMetric, maxSamplesPerContext), + closed: make(chan struct{}), + stopChannelMode: make(chan struct{}), + } + for i := 0; i < shardsCount; i++ { + agg.countShards[i] = &countShard{counts: countsMap{}} + agg.gaugeShards[i] = &gaugeShard{gauges: gaugesMap{}} + agg.setShards[i] = &setShard{sets: setsMap{}} + } + return agg +} + +func (a *aggregator) start(flushInterval time.Duration) { + ticker := time.NewTicker(flushInterval) + + go func() { + for { + select { + case <-ticker.C: + a.flush() + case <-a.closed: + ticker.Stop() + return + } + } + }() +} + +func (a *aggregator) startReceivingMetric(bufferSize int, nbWorkers int) { + a.inputMetrics = make(chan metric, bufferSize) + for i := 0; i < nbWorkers; i++ { + a.wg.Add(1) + go a.pullMetric() + } +} + +func (a *aggregator) stopReceivingMetric() { + close(a.stopChannelMode) + a.wg.Wait() +} + +func (a *aggregator) stop() { + a.closed <- struct{}{} +} + +func (a *aggregator) pullMetric() { + for { + select { + case m := <-a.inputMetrics: + switch m.metricType { + case histogram: + a.histogram(m.name, m.fvalue, m.tags, m.rate, m.cardinality) + case distribution: + a.distribution(m.name, m.fvalue, m.tags, m.rate, m.cardinality) + case timing: + a.timing(m.name, m.fvalue, m.tags, m.rate, m.cardinality) + } + case <-a.stopChannelMode: + a.wg.Done() + return + } + } +} + +func (a *aggregator) flush() { + for _, m := range a.flushMetrics() { + a.client.sendBlocking(m) + } +} + +func (a *aggregator) flushTelemetryMetrics(t *Telemetry) { + if a == nil { + // aggregation is disabled + return + } + + t.AggregationNbContextGauge = atomic.LoadUint64(&a.nbContextGauge) + t.AggregationNbContextCount = atomic.LoadUint64(&a.nbContextCount) + t.AggregationNbContextSet = atomic.LoadUint64(&a.nbContextSet) + t.AggregationNbContextHistogram = a.histograms.getNbContext() + t.AggregationNbContextDistribution = a.distributions.getNbContext() + t.AggregationNbContextTiming = a.timings.getNbContext() +} + +func (a *aggregator) flushMetrics() []metric { + metrics := []metric{} + + // We reset the values to avoid sending 'zero' values for metrics not + // sampled during this flush interval + + for _, shard := range a.setShards { + shard.Lock() + sets := shard.sets + shard.sets = setsMap{} + shard.Unlock() + for _, s := range sets { + metrics = append(metrics, s.flushUnsafe()...) + } + atomic.AddUint64(&a.nbContextSet, uint64(len(sets))) + } + + for _, shard := range a.gaugeShards { + shard.Lock() + gauges := shard.gauges + shard.gauges = gaugesMap{} + shard.Unlock() + for _, g := range gauges { + metrics = append(metrics, g.flushUnsafe()) + } + atomic.AddUint64(&a.nbContextGauge, uint64(len(gauges))) + } + + for _, shard := range a.countShards { + shard.Lock() + counts := shard.counts + shard.counts = countsMap{} + shard.Unlock() + for _, c := range counts { + metrics = append(metrics, c.flushUnsafe()) + } + atomic.AddUint64(&a.nbContextCount, uint64(len(counts))) + } + + metrics = a.histograms.flush(metrics) + metrics = a.distributions.flush(metrics) + metrics = a.timings.flush(metrics) + + return metrics +} + +// getContext returns the context for a metric name, tags, and cardinality. +// +// The context is the metric name, tags, and cardinality separated by separator symbols. +// It is not intended to be used as a metric name but as a unique key to aggregate +func getContext(name string, tags []string, cardinality Cardinality) string { + c, _ := getContextAndTags(name, tags, cardinality) + return c +} + +// getContextAndTags returns the context and tags for a metric name, tags, and cardinality. +// +// See getContext for usage for context +// The tags are the tags separated by a separator symbol and can be re-used to pass down to the writer +func getContextAndTags(name string, tags []string, cardinality Cardinality) (string, string) { + cardString := cardinality.String() + if len(tags) == 0 { + if cardString == "" { + return name, "" + } + return name + nameSeparatorSymbol + cardString, "" + } + + n := len(name) + len(nameSeparatorSymbol) + len(tagSeparatorSymbol)*(len(tags)-1) + for _, s := range tags { + n += len(s) + } + var cardStringLen = 0 + if cardString != "" { + n += len(cardString) + len(cardSeparatorSymbol) + cardStringLen = len(cardString) + len(cardSeparatorSymbol) + } + + var sb strings.Builder + sb.Grow(n) + sb.WriteString(name) + sb.WriteString(nameSeparatorSymbol) + if cardString != "" { + sb.WriteString(cardString) + sb.WriteString(cardSeparatorSymbol) + } + sb.WriteString(tags[0]) + for _, s := range tags[1:] { + sb.WriteString(tagSeparatorSymbol) + sb.WriteString(s) + } + + s := sb.String() + + return s, s[len(name)+len(nameSeparatorSymbol)+cardStringLen:] +} + +func getShardIndex(shardsCount int, context string) int { + if shardsCount <= 1 { + return 0 + } + return int(hashString32(context) % uint32(shardsCount)) +} + +func (a *aggregator) count(name string, value int64, tags []string, cardinality Cardinality) error { + context := getContext(name, tags, cardinality) + shard := a.countShards[getShardIndex(a.shardsCount, context)] + shard.RLock() + if count, found := shard.counts[context]; found { + count.sample(value) + shard.RUnlock() + return nil + } + shard.RUnlock() + + metric := newCountMetric(name, value, tags, cardinality) + + shard.Lock() + // Check if another goroutines hasn't created the value between the RUnlock and 'Lock' + if count, found := shard.counts[context]; found { + count.sample(value) + shard.Unlock() + return nil + } + + shard.counts[context] = metric + shard.Unlock() + return nil +} + +func (a *aggregator) gauge(name string, value float64, tags []string, cardinality Cardinality) error { + context := getContext(name, tags, cardinality) + shard := a.gaugeShards[getShardIndex(a.shardsCount, context)] + shard.RLock() + if gauge, found := shard.gauges[context]; found { + gauge.sample(value) + shard.RUnlock() + return nil + } + shard.RUnlock() + + gauge := newGaugeMetric(name, value, tags, cardinality) + + shard.Lock() + // Check if another goroutines hasn't created the value between the 'RUnlock' and 'Lock' + if gauge, found := shard.gauges[context]; found { + gauge.sample(value) + shard.Unlock() + return nil + } + shard.gauges[context] = gauge + shard.Unlock() + return nil +} + +func (a *aggregator) set(name string, value string, tags []string, cardinality Cardinality) error { + context := getContext(name, tags, cardinality) + shard := a.setShards[getShardIndex(a.shardsCount, context)] + shard.RLock() + if set, found := shard.sets[context]; found { + set.sample(value) + shard.RUnlock() + return nil + } + shard.RUnlock() + + metric := newSetMetric(name, value, tags, cardinality) + + shard.Lock() + // Check if another goroutines hasn't created the value between the 'RUnlock' and 'Lock' + if set, found := shard.sets[context]; found { + set.sample(value) + shard.Unlock() + return nil + } + shard.sets[context] = metric + shard.Unlock() + return nil +} + +// Only histograms, distributions and timings are sampled with a rate since we +// only pack them in on message instead of aggregating them. Discarding the +// sample rate will have impacts on the CPU and memory usage of the Agent. + +// type alias for Client.sendToAggregator +type bufferedMetricSampleFunc func(name string, value float64, tags []string, rate float64, cardinality Cardinality) error + +func (a *aggregator) histogram(name string, value float64, tags []string, rate float64, cardinality Cardinality) error { + return a.histograms.sample(name, value, tags, rate, cardinality) +} + +func (a *aggregator) distribution(name string, value float64, tags []string, rate float64, cardinality Cardinality) error { + return a.distributions.sample(name, value, tags, rate, cardinality) +} + +func (a *aggregator) timing(name string, value float64, tags []string, rate float64, cardinality Cardinality) error { + return a.timings.sample(name, value, tags, rate, cardinality) +} diff --git a/vendor/github.com/DataDog/datadog-go/v5/statsd/buffer.go b/vendor/github.com/DataDog/datadog-go/v5/statsd/buffer.go new file mode 100644 index 000000000..2b604090c --- /dev/null +++ b/vendor/github.com/DataDog/datadog-go/v5/statsd/buffer.go @@ -0,0 +1,208 @@ +package statsd + +import ( + "strconv" +) + +// MessageTooLongError is an error returned when a sample, event or service check is too large once serialized. See +// WithMaxBytesPerPayload option for more details. +type MessageTooLongError struct{} + +func (e MessageTooLongError) Error() string { + return "message too long. See 'WithMaxBytesPerPayload' documentation." +} + +var errBufferFull = MessageTooLongError{} + +type partialWriteError string + +func (e partialWriteError) Error() string { return string(e) } + +const errPartialWrite = partialWriteError("value partially written") + +const metricOverhead = 512 + +// statsdBuffer is a buffer containing statsd messages +// this struct methods are NOT safe for concurrent use +type statsdBuffer struct { + buffer []byte + maxSize int + maxElements int + elementCount int +} + +func newStatsdBuffer(maxSize, maxElements int) *statsdBuffer { + return &statsdBuffer{ + buffer: make([]byte, 0, maxSize+metricOverhead), // pre-allocate the needed size + metricOverhead to avoid having Go re-allocate on it's own if an element does not fit + maxSize: maxSize, + maxElements: maxElements, + } +} + +func (b *statsdBuffer) writeGauge(namespace string, globalTags []string, name string, value float64, tags []string, rate float64, timestamp int64, originDetection bool, cardinality Cardinality) error { + if b.elementCount >= b.maxElements { + return errBufferFull + } + originalBuffer := b.buffer + b.buffer = appendGauge(b.buffer, namespace, globalTags, name, value, tags, rate, originDetection) + b.buffer = appendTimestamp(b.buffer, timestamp) + b.buffer = appendTagCardinality(b.buffer, cardinality) + b.writeSeparator() + return b.validateNewElement(originalBuffer) +} + +func (b *statsdBuffer) writeCount(namespace string, globalTags []string, name string, value int64, tags []string, rate float64, timestamp int64, originDetection bool, cardinality Cardinality) error { + if b.elementCount >= b.maxElements { + return errBufferFull + } + originalBuffer := b.buffer + b.buffer = appendCount(b.buffer, namespace, globalTags, name, value, tags, rate, originDetection) + b.buffer = appendTimestamp(b.buffer, timestamp) + b.buffer = appendTagCardinality(b.buffer, cardinality) + b.writeSeparator() + return b.validateNewElement(originalBuffer) +} + +func (b *statsdBuffer) writeHistogram(namespace string, globalTags []string, name string, value float64, tags []string, rate float64, originDetection bool, cardinality Cardinality) error { + if b.elementCount >= b.maxElements { + return errBufferFull + } + originalBuffer := b.buffer + b.buffer = appendHistogram(b.buffer, namespace, globalTags, name, value, tags, rate, originDetection) + b.buffer = appendTagCardinality(b.buffer, cardinality) + b.writeSeparator() + return b.validateNewElement(originalBuffer) +} + +// writeAggregated serialized as many values as possible in the current buffer and return the position in values where it stopped. +func (b *statsdBuffer) writeAggregated(metricSymbol []byte, namespace string, globalTags []string, name string, values []float64, tags string, tagSize int, precision int, rate float64, originDetection bool, cardinality Cardinality) (int, error) { + if b.elementCount >= b.maxElements { + return 0, errBufferFull + } + + originalBuffer := b.buffer + b.buffer = appendHeader(b.buffer, namespace, name) + + // buffer already full + if len(b.buffer)+tagSize > b.maxSize { + b.buffer = originalBuffer + return 0, errBufferFull + } + + // We add as many value as possible + var position int + for idx, v := range values { + previousBuffer := b.buffer + if idx != 0 { + b.buffer = append(b.buffer, ':') + } + + b.buffer = strconv.AppendFloat(b.buffer, v, 'f', precision, 64) + + // Should we stop serializing and switch to another buffer + if len(b.buffer)+tagSize > b.maxSize { + b.buffer = previousBuffer + break + } + position = idx + 1 + } + + // we could not add a single value + if position == 0 { + b.buffer = originalBuffer + return 0, errBufferFull + } + + b.buffer = append(b.buffer, '|') + b.buffer = append(b.buffer, metricSymbol...) + b.buffer = appendRate(b.buffer, rate) + b.buffer = appendTagsAggregated(b.buffer, globalTags, tags) + b.buffer = appendContainerID(b.buffer) + b.buffer = appendExternalEnv(b.buffer, originDetection) + b.buffer = appendTagCardinality(b.buffer, cardinality) + b.writeSeparator() + b.elementCount++ + + if position != len(values) { + return position, errPartialWrite + } + return position, nil + +} + +func (b *statsdBuffer) writeDistribution(namespace string, globalTags []string, name string, value float64, tags []string, rate float64, originDetection bool, cardinality Cardinality) error { + if b.elementCount >= b.maxElements { + return errBufferFull + } + originalBuffer := b.buffer + b.buffer = appendDistribution(b.buffer, namespace, globalTags, name, value, tags, rate, originDetection) + b.buffer = appendTagCardinality(b.buffer, cardinality) + b.writeSeparator() + return b.validateNewElement(originalBuffer) +} + +func (b *statsdBuffer) writeSet(namespace string, globalTags []string, name string, value string, tags []string, rate float64, originDetection bool, cardinality Cardinality) error { + if b.elementCount >= b.maxElements { + return errBufferFull + } + originalBuffer := b.buffer + b.buffer = appendSet(b.buffer, namespace, globalTags, name, value, tags, rate, originDetection) + b.buffer = appendTagCardinality(b.buffer, cardinality) + b.writeSeparator() + return b.validateNewElement(originalBuffer) +} + +func (b *statsdBuffer) writeTiming(namespace string, globalTags []string, name string, value float64, tags []string, rate float64, originDetection bool, cardinality Cardinality) error { + if b.elementCount >= b.maxElements { + return errBufferFull + } + originalBuffer := b.buffer + b.buffer = appendTiming(b.buffer, namespace, globalTags, name, value, tags, rate, originDetection) + b.buffer = appendTagCardinality(b.buffer, cardinality) + b.writeSeparator() + return b.validateNewElement(originalBuffer) +} + +func (b *statsdBuffer) writeEvent(event *Event, globalTags []string, originDetection bool, cardinality Cardinality) error { + if b.elementCount >= b.maxElements { + return errBufferFull + } + originalBuffer := b.buffer + b.buffer = appendEvent(b.buffer, event, globalTags, originDetection) + b.buffer = appendTagCardinality(b.buffer, cardinality) + b.writeSeparator() + return b.validateNewElement(originalBuffer) +} + +func (b *statsdBuffer) writeServiceCheck(serviceCheck *ServiceCheck, globalTags []string, originDetection bool, cardinality Cardinality) error { + if b.elementCount >= b.maxElements { + return errBufferFull + } + originalBuffer := b.buffer + b.buffer = appendServiceCheck(b.buffer, serviceCheck, globalTags, originDetection) + b.buffer = appendTagCardinality(b.buffer, cardinality) + b.writeSeparator() + return b.validateNewElement(originalBuffer) +} + +func (b *statsdBuffer) validateNewElement(originalBuffer []byte) error { + if len(b.buffer) > b.maxSize { + b.buffer = originalBuffer + return errBufferFull + } + b.elementCount++ + return nil +} + +func (b *statsdBuffer) writeSeparator() { + b.buffer = append(b.buffer, '\n') +} + +func (b *statsdBuffer) reset() { + b.buffer = b.buffer[:0] + b.elementCount = 0 +} + +func (b *statsdBuffer) bytes() []byte { + return b.buffer +} diff --git a/vendor/github.com/DataDog/datadog-go/v5/statsd/buffer_pool.go b/vendor/github.com/DataDog/datadog-go/v5/statsd/buffer_pool.go new file mode 100644 index 000000000..7a3e3c9d2 --- /dev/null +++ b/vendor/github.com/DataDog/datadog-go/v5/statsd/buffer_pool.go @@ -0,0 +1,40 @@ +package statsd + +type bufferPool struct { + pool chan *statsdBuffer + bufferMaxSize int + bufferMaxElements int +} + +func newBufferPool(poolSize, bufferMaxSize, bufferMaxElements int) *bufferPool { + p := &bufferPool{ + pool: make(chan *statsdBuffer, poolSize), + bufferMaxSize: bufferMaxSize, + bufferMaxElements: bufferMaxElements, + } + for i := 0; i < poolSize; i++ { + p.addNewBuffer() + } + return p +} + +func (p *bufferPool) addNewBuffer() { + p.pool <- newStatsdBuffer(p.bufferMaxSize, p.bufferMaxElements) +} + +func (p *bufferPool) borrowBuffer() *statsdBuffer { + select { + case b := <-p.pool: + return b + default: + return newStatsdBuffer(p.bufferMaxSize, p.bufferMaxElements) + } +} + +func (p *bufferPool) returnBuffer(buffer *statsdBuffer) { + buffer.reset() + select { + case p.pool <- buffer: + default: + } +} diff --git a/vendor/github.com/DataDog/datadog-go/v5/statsd/buffered_metric_context.go b/vendor/github.com/DataDog/datadog-go/v5/statsd/buffered_metric_context.go new file mode 100644 index 000000000..85cab2a17 --- /dev/null +++ b/vendor/github.com/DataDog/datadog-go/v5/statsd/buffered_metric_context.go @@ -0,0 +1,104 @@ +package statsd + +import ( + "math/rand" + "sync" + "sync/atomic" + "time" +) + +// bufferedMetricContexts represent the contexts for Histograms, Distributions +// and Timing. Since those 3 metric types behave the same way and are sampled +// with the same type they're represented by the same class. +type bufferedMetricContexts struct { + nbContext uint64 + mutex sync.RWMutex + values bufferedMetricMap + newMetric func(string, float64, string, float64, Cardinality) *bufferedMetric + + // Each bufferedMetricContexts uses its own random source and random + // lock to prevent goroutines from contending for the lock on the + // "math/rand" package-global random source (e.g. calls like + // "rand.Float64()" must acquire a shared lock to get the next + // pseudorandom number). + random *rand.Rand + randomLock sync.Mutex +} + +func newBufferedContexts(newMetric func(string, float64, string, int64, float64, Cardinality) *bufferedMetric, maxSamples int64) bufferedMetricContexts { + return bufferedMetricContexts{ + values: bufferedMetricMap{}, + newMetric: func(name string, value float64, stringTags string, rate float64, cardinality Cardinality) *bufferedMetric { + return newMetric(name, value, stringTags, maxSamples, rate, cardinality) + }, + // Note that calling "time.Now().UnixNano()" repeatedly quickly may return + // very similar values. That's fine for seeding the worker-specific random + // source because we just need an evenly distributed stream of float values. + // Do not use this random source for cryptographic randomness. + random: rand.New(rand.NewSource(time.Now().UnixNano())), + } +} + +func (bc *bufferedMetricContexts) flush(metrics []metric) []metric { + bc.mutex.Lock() + values := bc.values + bc.values = bufferedMetricMap{} + bc.mutex.Unlock() + + for _, d := range values { + d.Lock() + metrics = append(metrics, d.flushUnsafe()) + d.Unlock() + } + atomic.AddUint64(&bc.nbContext, uint64(len(values))) + return metrics +} + +func (bc *bufferedMetricContexts) sample(name string, value float64, tags []string, rate float64, cardinality Cardinality) error { + keepingSample := shouldSample(rate, bc.random, &bc.randomLock) + + // If we don't keep the sample, return early. If we do keep the sample + // we end up storing the *first* observed sampling rate in the metric. + // This is the *wrong* behavior but it's the one we had before and the alternative would increase lock contention too + // much with the current code. + // TODO: change this behavior in the future, probably by introducing thread-local storage and lockless stuctures. + // If this code is removed, also remove the observed sampling rate in the metric and fix `bufferedMetric.flushUnsafe()` + if !keepingSample { + return nil + } + + context, stringTags := getContextAndTags(name, tags, cardinality) + var v *bufferedMetric + + bc.mutex.RLock() + v, _ = bc.values[context] + bc.mutex.RUnlock() + + // Create it if it wasn't found + if v == nil { + bc.mutex.Lock() + // It might have been created by another goroutine since last call + v, _ = bc.values[context] + if v == nil { + // If we might keep a sample that we should have skipped, but that should not drastically affect performances. + bc.values[context] = bc.newMetric(name, value, stringTags, rate, cardinality) + // We added a new value, we need to unlock the mutex and quit + bc.mutex.Unlock() + return nil + } + bc.mutex.Unlock() + } + + // Now we can keep the sample or skip it + if keepingSample { + v.maybeKeepSample(value, bc.random, &bc.randomLock) + } else { + v.skipSample() + } + + return nil +} + +func (bc *bufferedMetricContexts) getNbContext() uint64 { + return atomic.LoadUint64(&bc.nbContext) +} diff --git a/vendor/github.com/DataDog/datadog-go/v5/statsd/container.go b/vendor/github.com/DataDog/datadog-go/v5/statsd/container.go new file mode 100644 index 000000000..20d69ef63 --- /dev/null +++ b/vendor/github.com/DataDog/datadog-go/v5/statsd/container.go @@ -0,0 +1,19 @@ +package statsd + +import ( + "sync" +) + +var ( + // containerID holds the container ID. + containerID = "" + + initOnce sync.Once +) + +// getContainerID returns the container ID configured at the client creation +// It can either be auto-discovered with origin detection or provided by the user. +// User-defined container ID is prioritized. +func getContainerID() string { + return containerID +} diff --git a/vendor/github.com/DataDog/datadog-go/v5/statsd/container_linux.go b/vendor/github.com/DataDog/datadog-go/v5/statsd/container_linux.go new file mode 100644 index 000000000..125132349 --- /dev/null +++ b/vendor/github.com/DataDog/datadog-go/v5/statsd/container_linux.go @@ -0,0 +1,219 @@ +//go:build linux +// +build linux + +package statsd + +import ( + "bufio" + "fmt" + "io" + "os" + "path" + "regexp" + "strings" + "syscall" +) + +const ( + // cgroupPath is the path to the cgroup file where we can find the container id if one exists. + cgroupPath = "/proc/self/cgroup" + + // selfMountinfo is the path to the mountinfo path where we can find the container id in case cgroup namespace is preventing the use of /proc/self/cgroup + selfMountInfoPath = "/proc/self/mountinfo" + + // defaultCgroupMountPath is the default path to the cgroup mount point. + defaultCgroupMountPath = "/sys/fs/cgroup" + + // cgroupV1BaseController is the controller used to identify the container-id for cgroup v1 + cgroupV1BaseController = "memory" + + uuidSource = "[0-9a-f]{8}[-_][0-9a-f]{4}[-_][0-9a-f]{4}[-_][0-9a-f]{4}[-_][0-9a-f]{12}" + containerSource = "[0-9a-f]{64}" + taskSource = "[0-9a-f]{32}-\\d+" + + containerdSandboxPrefix = "sandboxes" + + // ContainerRegexpStr defines the regexp used to match container IDs + // ([0-9a-f]{64}) is standard container id used pretty much everywhere + // ([0-9a-f]{32}-\d+) is container id used by AWS ECS + // ([0-9a-f]{8}(-[0-9a-f]{4}){4}$) is container id used by Garden + containerRegexpStr = "([0-9a-f]{64})|([0-9a-f]{32}-\\d+)|([0-9a-f]{8}(-[0-9a-f]{4}){4}$)" + // cIDRegexpStr defines the regexp used to match container IDs in /proc/self/mountinfo + cIDRegexpStr = `.*/([^\s/]+)/(` + containerRegexpStr + `)/[\S]*hostname` + + // From https://github.com/torvalds/linux/blob/5859a2b1991101d6b978f3feb5325dad39421f29/include/linux/proc_ns.h#L41-L49 + // Currently, host namespace inode number are hardcoded, which can be used to detect + // if we're running in host namespace or not (does not work when running in DinD) + hostCgroupNamespaceInode = 0xEFFFFFFB +) + +var ( + // expLine matches a line in the /proc/self/cgroup file. It has a submatch for the last element (path), which contains the container ID. + expLine = regexp.MustCompile(`^\d+:[^:]*:(.+)$`) + + // expContainerID matches contained IDs and sources. Source: https://github.com/Qard/container-info/blob/master/index.js + expContainerID = regexp.MustCompile(fmt.Sprintf(`(%s|%s|%s)(?:.scope)?$`, uuidSource, containerSource, taskSource)) + + cIDMountInfoRegexp = regexp.MustCompile(cIDRegexpStr) + + // initContainerID initializes the container ID. + initContainerID = internalInitContainerID +) + +// parseContainerID finds the first container ID reading from r and returns it. +func parseContainerID(r io.Reader) string { + scn := bufio.NewScanner(r) + for scn.Scan() { + path := expLine.FindStringSubmatch(scn.Text()) + if len(path) != 2 { + // invalid entry, continue + continue + } + if parts := expContainerID.FindStringSubmatch(path[1]); len(parts) == 2 { + return parts[1] + } + } + return "" +} + +// readContainerID attempts to return the container ID from the provided file path or empty on failure. +func readContainerID(fpath string) string { + f, err := os.Open(fpath) + if err != nil { + return "" + } + defer f.Close() + return parseContainerID(f) +} + +// Parsing /proc/self/mountinfo is not always reliable in Kubernetes+containerd (at least) +// We're still trying to use it as it may help in some cgroupv2 configurations (Docker, ECS, raw containerd) +func parseMountinfo(r io.Reader) string { + scn := bufio.NewScanner(r) + for scn.Scan() { + line := scn.Text() + allMatches := cIDMountInfoRegexp.FindAllStringSubmatch(line, -1) + if len(allMatches) == 0 { + continue + } + + // We're interest in rightmost match + matches := allMatches[len(allMatches)-1] + if len(matches) > 0 && matches[1] != containerdSandboxPrefix { + return matches[2] + } + } + + return "" +} + +func readMountinfo(path string) string { + f, err := os.Open(path) + if err != nil { + return "" + } + defer f.Close() + return parseMountinfo(f) +} + +func isHostCgroupNamespace() bool { + fi, err := os.Stat("/proc/self/ns/cgroup") + if err != nil { + return false + } + + inode := fi.Sys().(*syscall.Stat_t).Ino + + return inode == hostCgroupNamespaceInode +} + +// parseCgroupNodePath parses /proc/self/cgroup and returns a map of controller to its associated cgroup node path. +func parseCgroupNodePath(r io.Reader) map[string]string { + res := make(map[string]string) + scn := bufio.NewScanner(r) + for scn.Scan() { + line := scn.Text() + tokens := strings.Split(line, ":") + if len(tokens) != 3 { + continue + } + if tokens[1] == cgroupV1BaseController || tokens[1] == "" { + res[tokens[1]] = tokens[2] + } + } + return res +} + +// getCgroupInode returns the cgroup controller inode if it exists otherwise an empty string. +// The inode is prefixed by "in-" and is used by the agent to retrieve the container ID. +// For cgroup v1, we use the memory controller. +func getCgroupInode(cgroupMountPath, procSelfCgroupPath string) string { + // Parse /proc/self/cgroup to retrieve the paths to the memory controller (cgroupv1) and the cgroup node (cgroupv2) + f, err := os.Open(procSelfCgroupPath) + if err != nil { + return "" + } + defer f.Close() + cgroupControllersPaths := parseCgroupNodePath(f) + // Retrieve the cgroup inode from /sys/fs/cgroup+controller+cgroupNodePath + for _, controller := range []string{cgroupV1BaseController, ""} { + cgroupNodePath, ok := cgroupControllersPaths[controller] + if !ok { + continue + } + inode := inodeForPath(path.Join(cgroupMountPath, controller, cgroupNodePath)) + if inode != "" { + return inode + } + } + return "" +} + +// inodeForPath returns the inode for the provided path or empty on failure. +func inodeForPath(path string) string { + fi, err := os.Stat(path) + if err != nil { + return "" + } + stats, ok := fi.Sys().(*syscall.Stat_t) + if !ok { + return "" + } + return fmt.Sprintf("in-%d", stats.Ino) +} + +// internalInitContainerID initializes the container ID. +// It can either be provided by the user or read from cgroups. +func internalInitContainerID(userProvidedID string, cgroupFallback, isHostCgroupNs bool) { + initOnce.Do(func() { + readCIDOrInode(userProvidedID, cgroupPath, selfMountInfoPath, defaultCgroupMountPath, cgroupFallback, isHostCgroupNs) + }) +} + +// readCIDOrInode reads the container ID from the user provided ID, cgroups or mountinfo. +func readCIDOrInode(userProvidedID, cgroupPath, selfMountInfoPath, defaultCgroupMountPath string, cgroupFallback, isHostCgroupNs bool) { + if userProvidedID != "" { + containerID = userProvidedID + return + } + + if cgroupFallback { + containerID = readContainerID(cgroupPath) + if containerID != "" { + return + } + + containerID = readMountinfo(selfMountInfoPath) + if containerID != "" { + return + } + + // If we're in the host cgroup namespace, the cid should be retrievable in /proc/self/cgroup + // In private cgroup namespace, we can retrieve the cgroup controller inode. + if containerID == "" && isHostCgroupNs { + return + } + + containerID = getCgroupInode(defaultCgroupMountPath, cgroupPath) + } +} diff --git a/vendor/github.com/DataDog/datadog-go/v5/statsd/container_stub.go b/vendor/github.com/DataDog/datadog-go/v5/statsd/container_stub.go new file mode 100644 index 000000000..29ab7f2c9 --- /dev/null +++ b/vendor/github.com/DataDog/datadog-go/v5/statsd/container_stub.go @@ -0,0 +1,17 @@ +//go:build !linux +// +build !linux + +package statsd + +func isHostCgroupNamespace() bool { + return false +} + +var initContainerID = func(userProvidedID string, _, _ bool) { + initOnce.Do(func() { + if userProvidedID != "" { + containerID = userProvidedID + return + } + }) +} diff --git a/vendor/github.com/DataDog/datadog-go/v5/statsd/error_handler.go b/vendor/github.com/DataDog/datadog-go/v5/statsd/error_handler.go new file mode 100644 index 000000000..007626273 --- /dev/null +++ b/vendor/github.com/DataDog/datadog-go/v5/statsd/error_handler.go @@ -0,0 +1,22 @@ +package statsd + +import ( + "log" +) + +func LoggingErrorHandler(err error) { + if e, ok := err.(*ErrorInputChannelFull); ok { + log.Printf( + "Input Queue is full (%d elements): %s %s dropped - %s - increase channel buffer size with `WithChannelModeBufferSize()`", + e.ChannelSize, e.Metric.name, e.Metric.tags, e.Msg, + ) + return + } else if e, ok := err.(*ErrorSenderChannelFull); ok { + log.Printf( + "Sender Queue is full (%d elements): %d metrics dropped - %s - increase sender queue size with `WithSenderQueueSize()`", + e.ChannelSize, e.LostElements, e.Msg, + ) + } else { + log.Printf("Error: %v", err) + } +} diff --git a/vendor/github.com/DataDog/datadog-go/v5/statsd/event.go b/vendor/github.com/DataDog/datadog-go/v5/statsd/event.go new file mode 100644 index 000000000..a2ca4faf7 --- /dev/null +++ b/vendor/github.com/DataDog/datadog-go/v5/statsd/event.go @@ -0,0 +1,75 @@ +package statsd + +import ( + "fmt" + "time" +) + +// Events support +// EventAlertType and EventAlertPriority became exported types after this issue was submitted: https://github.com/DataDog/datadog-go/issues/41 +// The reason why they got exported is so that client code can directly use the types. + +// EventAlertType is the alert type for events +type EventAlertType string + +const ( + // Info is the "info" AlertType for events + Info EventAlertType = "info" + // Error is the "error" AlertType for events + Error EventAlertType = "error" + // Warning is the "warning" AlertType for events + Warning EventAlertType = "warning" + // Success is the "success" AlertType for events + Success EventAlertType = "success" +) + +// EventPriority is the event priority for events +type EventPriority string + +const ( + // Normal is the "normal" Priority for events + Normal EventPriority = "normal" + // Low is the "low" Priority for events + Low EventPriority = "low" +) + +// An Event is an object that can be posted to your DataDog event stream. +type Event struct { + // Title of the event. Required. + Title string + // Text is the description of the event. + Text string + // Timestamp is a timestamp for the event. If not provided, the dogstatsd + // server will set this to the current time. + Timestamp time.Time + // Hostname for the event. + Hostname string + // AggregationKey groups this event with others of the same key. + AggregationKey string + // Priority of the event. Can be statsd.Low or statsd.Normal. + Priority EventPriority + // SourceTypeName is a source type for the event. + SourceTypeName string + // AlertType can be statsd.Info, statsd.Error, statsd.Warning, or statsd.Success. + // If absent, the default value applied by the dogstatsd server is Info. + AlertType EventAlertType + // Tags for the event. + Tags []string +} + +// NewEvent creates a new event with the given title and text. Error checking +// against these values is done at send-time, or upon running e.Check. +func NewEvent(title, text string) *Event { + return &Event{ + Title: title, + Text: text, + } +} + +// Check verifies that an event is valid. +func (e *Event) Check() error { + if len(e.Title) == 0 { + return fmt.Errorf("statsd.Event title is required") + } + return nil +} diff --git a/vendor/github.com/DataDog/datadog-go/v5/statsd/external_env.go b/vendor/github.com/DataDog/datadog-go/v5/statsd/external_env.go new file mode 100644 index 000000000..2c9b13a4c --- /dev/null +++ b/vendor/github.com/DataDog/datadog-go/v5/statsd/external_env.go @@ -0,0 +1,46 @@ +package statsd + +import ( + "os" + "sync" + "unicode" +) + +// ddExternalEnvVarName specifies the env var to inject the environment name. +const ddExternalEnvVarName = "DD_EXTERNAL_ENV" + +var ( + externalEnv = "" + externalEnvMu sync.RWMutex // Protects concurrent access to externalEnv +) + +// initExternalEnv initializes the external environment name. +func initExternalEnv() { + var value = os.Getenv(ddExternalEnvVarName) + if value != "" { + externalEnvMu.Lock() + externalEnv = sanitizeExternalEnv(value) + externalEnvMu.Unlock() + } +} + +// sanitizeExternalEnv removes non-printable characters and pipe characters from the external environment name. +func sanitizeExternalEnv(externalEnv string) string { + if externalEnv == "" { + return "" + } + var output string + for _, r := range externalEnv { + if unicode.IsPrint(r) && r != '|' { + output += string(r) + } + } + + return output +} + +func getExternalEnv() string { + externalEnvMu.RLock() + defer externalEnvMu.RUnlock() + return externalEnv +} diff --git a/vendor/github.com/DataDog/datadog-go/v5/statsd/fnv1a.go b/vendor/github.com/DataDog/datadog-go/v5/statsd/fnv1a.go new file mode 100644 index 000000000..03dc8a07c --- /dev/null +++ b/vendor/github.com/DataDog/datadog-go/v5/statsd/fnv1a.go @@ -0,0 +1,39 @@ +package statsd + +const ( + // FNV-1a + offset32 = uint32(2166136261) + prime32 = uint32(16777619) + + // init32 is what 32 bits hash values should be initialized with. + init32 = offset32 +) + +// HashString32 returns the hash of s. +func hashString32(s string) uint32 { + return addString32(init32, s) +} + +// AddString32 adds the hash of s to the precomputed hash value h. +func addString32(h uint32, s string) uint32 { + i := 0 + n := (len(s) / 8) * 8 + + for i != n { + h = (h ^ uint32(s[i])) * prime32 + h = (h ^ uint32(s[i+1])) * prime32 + h = (h ^ uint32(s[i+2])) * prime32 + h = (h ^ uint32(s[i+3])) * prime32 + h = (h ^ uint32(s[i+4])) * prime32 + h = (h ^ uint32(s[i+5])) * prime32 + h = (h ^ uint32(s[i+6])) * prime32 + h = (h ^ uint32(s[i+7])) * prime32 + i += 8 + } + + for _, c := range s[i:] { + h = (h ^ uint32(c)) * prime32 + } + + return h +} diff --git a/vendor/github.com/DataDog/datadog-go/v5/statsd/format.go b/vendor/github.com/DataDog/datadog-go/v5/statsd/format.go new file mode 100644 index 000000000..52f906355 --- /dev/null +++ b/vendor/github.com/DataDog/datadog-go/v5/statsd/format.go @@ -0,0 +1,306 @@ +package statsd + +import ( + "strconv" + "strings" +) + +var ( + gaugeSymbol = []byte("g") + countSymbol = []byte("c") + histogramSymbol = []byte("h") + distributionSymbol = []byte("d") + setSymbol = []byte("s") + timingSymbol = []byte("ms") +) + +const ( + tagSeparatorSymbol = "," + nameSeparatorSymbol = ":" + cardSeparatorSymbol = "|" +) + +func appendHeader(buffer []byte, namespace string, name string) []byte { + if namespace != "" { + buffer = append(buffer, namespace...) + } + buffer = append(buffer, name...) + buffer = append(buffer, ':') + return buffer +} + +func appendRate(buffer []byte, rate float64) []byte { + if rate < 1 { + buffer = append(buffer, "|@"...) + buffer = strconv.AppendFloat(buffer, rate, 'f', -1, 64) + } + return buffer +} + +func appendWithoutNewlines(buffer []byte, s string) []byte { + // fastpath for strings without newlines + if strings.IndexByte(s, '\n') == -1 { + return append(buffer, s...) + } + + for _, b := range []byte(s) { + if b != '\n' { + buffer = append(buffer, b) + } + } + return buffer +} + +func appendTags(buffer []byte, globalTags []string, tags []string) []byte { + if len(globalTags) == 0 && len(tags) == 0 { + return buffer + } + buffer = append(buffer, "|#"...) + firstTag := true + + for _, tag := range globalTags { + if !firstTag { + buffer = append(buffer, tagSeparatorSymbol...) + } + buffer = appendWithoutNewlines(buffer, tag) + firstTag = false + } + for _, tag := range tags { + if !firstTag { + buffer = append(buffer, tagSeparatorSymbol...) + } + buffer = appendWithoutNewlines(buffer, tag) + firstTag = false + } + return buffer +} + +func appendTagsAggregated(buffer []byte, globalTags []string, tags string) []byte { + if len(globalTags) == 0 && tags == "" { + return buffer + } + + buffer = append(buffer, "|#"...) + firstTag := true + + for _, tag := range globalTags { + if !firstTag { + buffer = append(buffer, tagSeparatorSymbol...) + } + buffer = appendWithoutNewlines(buffer, tag) + firstTag = false + } + if tags != "" { + if !firstTag { + buffer = append(buffer, tagSeparatorSymbol...) + } + buffer = appendWithoutNewlines(buffer, tags) + } + return buffer +} + +func appendFloatMetric(buffer []byte, typeSymbol []byte, namespace string, globalTags []string, name string, value float64, tags []string, rate float64, precision int, originDetection bool) []byte { + buffer = appendHeader(buffer, namespace, name) + buffer = strconv.AppendFloat(buffer, value, 'f', precision, 64) + buffer = append(buffer, '|') + buffer = append(buffer, typeSymbol...) + buffer = appendRate(buffer, rate) + buffer = appendTags(buffer, globalTags, tags) + buffer = appendContainerID(buffer) + buffer = appendExternalEnv(buffer, originDetection) + return buffer +} + +func appendIntegerMetric(buffer []byte, typeSymbol []byte, namespace string, globalTags []string, name string, value int64, tags []string, rate float64, originDetection bool) []byte { + buffer = appendHeader(buffer, namespace, name) + buffer = strconv.AppendInt(buffer, value, 10) + buffer = append(buffer, '|') + buffer = append(buffer, typeSymbol...) + buffer = appendRate(buffer, rate) + buffer = appendTags(buffer, globalTags, tags) + buffer = appendContainerID(buffer) + buffer = appendExternalEnv(buffer, originDetection) + return buffer +} + +func appendStringMetric(buffer []byte, typeSymbol []byte, namespace string, globalTags []string, name string, value string, tags []string, rate float64, originDetection bool) []byte { + buffer = appendHeader(buffer, namespace, name) + buffer = append(buffer, value...) + buffer = append(buffer, '|') + buffer = append(buffer, typeSymbol...) + buffer = appendRate(buffer, rate) + buffer = appendTags(buffer, globalTags, tags) + buffer = appendContainerID(buffer) + buffer = appendExternalEnv(buffer, originDetection) + return buffer +} + +func appendGauge(buffer []byte, namespace string, globalTags []string, name string, value float64, tags []string, rate float64, originDetection bool) []byte { + return appendFloatMetric(buffer, gaugeSymbol, namespace, globalTags, name, value, tags, rate, -1, originDetection) +} + +func appendCount(buffer []byte, namespace string, globalTags []string, name string, value int64, tags []string, rate float64, originDetection bool) []byte { + return appendIntegerMetric(buffer, countSymbol, namespace, globalTags, name, value, tags, rate, originDetection) +} + +func appendHistogram(buffer []byte, namespace string, globalTags []string, name string, value float64, tags []string, rate float64, originDetection bool) []byte { + return appendFloatMetric(buffer, histogramSymbol, namespace, globalTags, name, value, tags, rate, -1, originDetection) +} + +func appendDistribution(buffer []byte, namespace string, globalTags []string, name string, value float64, tags []string, rate float64, originDetection bool) []byte { + return appendFloatMetric(buffer, distributionSymbol, namespace, globalTags, name, value, tags, rate, -1, originDetection) +} + +func appendSet(buffer []byte, namespace string, globalTags []string, name string, value string, tags []string, rate float64, originDetection bool) []byte { + return appendStringMetric(buffer, setSymbol, namespace, globalTags, name, value, tags, rate, originDetection) +} + +func appendTiming(buffer []byte, namespace string, globalTags []string, name string, value float64, tags []string, rate float64, originDetection bool) []byte { + return appendFloatMetric(buffer, timingSymbol, namespace, globalTags, name, value, tags, rate, 6, originDetection) +} + +func escapedEventTextLen(text string) int { + return len(text) + strings.Count(text, "\n") +} + +func appendEscapedEventText(buffer []byte, text string) []byte { + for _, b := range []byte(text) { + if b != '\n' { + buffer = append(buffer, b) + } else { + buffer = append(buffer, "\\n"...) + } + } + return buffer +} + +func appendEvent(buffer []byte, event *Event, globalTags []string, originDetection bool) []byte { + escapedTextLen := escapedEventTextLen(event.Text) + + buffer = append(buffer, "_e{"...) + buffer = strconv.AppendInt(buffer, int64(len(event.Title)), 10) + buffer = append(buffer, tagSeparatorSymbol...) + buffer = strconv.AppendInt(buffer, int64(escapedTextLen), 10) + buffer = append(buffer, "}:"...) + buffer = append(buffer, event.Title...) + buffer = append(buffer, '|') + if escapedTextLen != len(event.Text) { + buffer = appendEscapedEventText(buffer, event.Text) + } else { + buffer = append(buffer, event.Text...) + } + + if !event.Timestamp.IsZero() { + buffer = append(buffer, "|d:"...) + buffer = strconv.AppendInt(buffer, int64(event.Timestamp.Unix()), 10) + } + + if len(event.Hostname) != 0 { + buffer = append(buffer, "|h:"...) + buffer = append(buffer, event.Hostname...) + } + + if len(event.AggregationKey) != 0 { + buffer = append(buffer, "|k:"...) + buffer = append(buffer, event.AggregationKey...) + } + + if len(event.Priority) != 0 { + buffer = append(buffer, "|p:"...) + buffer = append(buffer, event.Priority...) + } + + if len(event.SourceTypeName) != 0 { + buffer = append(buffer, "|s:"...) + buffer = append(buffer, event.SourceTypeName...) + } + + if len(event.AlertType) != 0 { + buffer = append(buffer, "|t:"...) + buffer = append(buffer, string(event.AlertType)...) + } + + buffer = appendTags(buffer, globalTags, event.Tags) + buffer = appendContainerID(buffer) + buffer = appendExternalEnv(buffer, originDetection) + return buffer +} + +func appendEscapedServiceCheckText(buffer []byte, text string) []byte { + for i := 0; i < len(text); i++ { + if text[i] == '\n' { + buffer = append(buffer, "\\n"...) + } else if text[i] == 'm' && i+1 < len(text) && text[i+1] == ':' { + buffer = append(buffer, "m\\:"...) + i++ + } else { + buffer = append(buffer, text[i]) + } + } + return buffer +} + +func appendServiceCheck(buffer []byte, serviceCheck *ServiceCheck, globalTags []string, originDetection bool) []byte { + buffer = append(buffer, "_sc|"...) + buffer = append(buffer, serviceCheck.Name...) + buffer = append(buffer, '|') + buffer = strconv.AppendInt(buffer, int64(serviceCheck.Status), 10) + + if !serviceCheck.Timestamp.IsZero() { + buffer = append(buffer, "|d:"...) + buffer = strconv.AppendInt(buffer, int64(serviceCheck.Timestamp.Unix()), 10) + } + + if len(serviceCheck.Hostname) != 0 { + buffer = append(buffer, "|h:"...) + buffer = append(buffer, serviceCheck.Hostname...) + } + + buffer = appendTags(buffer, globalTags, serviceCheck.Tags) + + if len(serviceCheck.Message) != 0 { + buffer = append(buffer, "|m:"...) + buffer = appendEscapedServiceCheckText(buffer, serviceCheck.Message) + } + + buffer = appendContainerID(buffer) + buffer = appendExternalEnv(buffer, originDetection) + return buffer +} + +func appendSeparator(buffer []byte) []byte { + return append(buffer, '\n') +} + +func appendContainerID(buffer []byte) []byte { + if containerID := getContainerID(); len(containerID) > 0 { + buffer = append(buffer, "|c:"...) + buffer = append(buffer, containerID...) + } + return buffer +} + +func appendTimestamp(buffer []byte, timestamp int64) []byte { + if timestamp > noTimestamp { + buffer = append(buffer, "|T"...) + buffer = strconv.AppendInt(buffer, timestamp, 10) + } + return buffer +} + +func appendExternalEnv(buffer []byte, originDetection bool) []byte { + if externalEnv := getExternalEnv(); externalEnv != "" && originDetection { + buffer = append(buffer, "|e:"...) + buffer = append(buffer, externalEnv...) + } + return buffer +} + +func appendTagCardinality(buffer []byte, cardinality Cardinality) []byte { + cardString := cardinality.String() + if cardString != "" { + buffer = append(buffer, "|card:"...) + buffer = append(buffer, cardString...) + } + return buffer +} diff --git a/vendor/github.com/DataDog/datadog-go/v5/statsd/metrics.go b/vendor/github.com/DataDog/datadog-go/v5/statsd/metrics.go new file mode 100644 index 000000000..ea78730ea --- /dev/null +++ b/vendor/github.com/DataDog/datadog-go/v5/statsd/metrics.go @@ -0,0 +1,283 @@ +package statsd + +import ( + "math" + "math/rand" + "sync" + "sync/atomic" +) + +/* +Those are metrics type that can be aggregated on the client side: + - Gauge + - Count + - Set +*/ + +type countMetric struct { + value int64 + name string + tags []string + cardinality Cardinality +} + +func newCountMetric(name string, value int64, tags []string, cardinality Cardinality) *countMetric { + return &countMetric{ + value: value, + name: name, + tags: copySlice(tags), + cardinality: cardinality, + } +} + +func (c *countMetric) sample(v int64) { + atomic.AddInt64(&c.value, v) +} + +func (c *countMetric) flushUnsafe() metric { + return metric{ + metricType: count, + name: c.name, + tags: c.tags, + rate: 1, + ivalue: c.value, + cardinality: c.cardinality, + } +} + +// Gauge + +type gaugeMetric struct { + value uint64 + name string + tags []string + cardinality Cardinality +} + +func newGaugeMetric(name string, value float64, tags []string, cardinality Cardinality) *gaugeMetric { + return &gaugeMetric{ + value: math.Float64bits(value), + name: name, + tags: copySlice(tags), + cardinality: cardinality, + } +} + +func (g *gaugeMetric) sample(v float64) { + atomic.StoreUint64(&g.value, math.Float64bits(v)) +} + +func (g *gaugeMetric) flushUnsafe() metric { + return metric{ + metricType: gauge, + name: g.name, + tags: g.tags, + rate: 1, + fvalue: math.Float64frombits(g.value), + cardinality: g.cardinality, + } +} + +// Set + +type setMetric struct { + data map[string]struct{} + name string + tags []string + cardinality Cardinality + sync.Mutex +} + +func newSetMetric(name string, value string, tags []string, cardinality Cardinality) *setMetric { + set := &setMetric{ + data: map[string]struct{}{}, + name: name, + tags: copySlice(tags), + cardinality: cardinality, + } + set.data[value] = struct{}{} + return set +} + +func (s *setMetric) sample(v string) { + s.Lock() + defer s.Unlock() + s.data[v] = struct{}{} +} + +// Sets are aggregated on the agent side too. We flush the keys so a set from +// multiple application can be correctly aggregated on the agent side. +func (s *setMetric) flushUnsafe() []metric { + if len(s.data) == 0 { + return nil + } + + metrics := make([]metric, len(s.data)) + i := 0 + for value := range s.data { + metrics[i] = metric{ + metricType: set, + name: s.name, + tags: s.tags, + rate: 1, + svalue: value, + cardinality: s.cardinality, + } + i++ + } + return metrics +} + +// Histograms, Distributions and Timings + +type bufferedMetric struct { + sync.Mutex + + // Kept samples (after sampling) + data []float64 + // Total stored samples (after sampling) + storedSamples int64 + // Total number of observed samples (before sampling). This is used to keep + // the sampling rate correct. + totalSamples int64 + + name string + // Histograms and Distributions store tags as one string since we need + // to compute its size multiple time when serializing. + tags string + mtype metricType + + // maxSamples is the maximum number of samples we keep in memory + maxSamples int64 + + // The first observed user-specified sample rate. When specified + // it is used because we don't know better. + specifiedRate float64 + + cardinality Cardinality +} + +func (s *bufferedMetric) sample(v float64) { + s.Lock() + defer s.Unlock() + s.sampleUnsafe(v) +} + +func (s *bufferedMetric) sampleUnsafe(v float64) { + s.data = append(s.data, v) + s.storedSamples++ + // Total samples needs to be incremented though an atomic because it can be accessed without the lock. + atomic.AddInt64(&s.totalSamples, 1) +} + +func (s *bufferedMetric) maybeKeepSample(v float64, rand *rand.Rand, randLock *sync.Mutex) { + s.Lock() + defer s.Unlock() + if s.maxSamples > 0 { + if s.storedSamples >= s.maxSamples { + // We reached the maximum number of samples we can keep in memory, so we randomly + // replace a sample. + randLock.Lock() + i := rand.Int63n(atomic.LoadInt64(&s.totalSamples)) + randLock.Unlock() + if i < s.maxSamples { + s.data[i] = v + } + } else { + s.data[s.storedSamples] = v + s.storedSamples++ + } + s.totalSamples++ + } else { + // This code path appends to the slice since we did not pre-allocate memory in this case. + s.sampleUnsafe(v) + } +} + +func (s *bufferedMetric) skipSample() { + atomic.AddInt64(&s.totalSamples, 1) +} + +func (s *bufferedMetric) flushUnsafe() metric { + totalSamples := atomic.LoadInt64(&s.totalSamples) + var rate float64 + + // If the user had a specified rate send it because we don't know better. + // This code should be removed once we can also remove the early return at the top of + // `bufferedMetricContexts.sample` + if s.specifiedRate != 1.0 { + rate = s.specifiedRate + } else { + rate = float64(s.storedSamples) / float64(totalSamples) + } + + return metric{ + metricType: s.mtype, + name: s.name, + stags: s.tags, + rate: rate, + fvalues: s.data[:s.storedSamples], + cardinality: s.cardinality, + } +} + +type histogramMetric = bufferedMetric + +func newHistogramMetric(name string, value float64, stringTags string, maxSamples int64, rate float64, cardinality Cardinality) *histogramMetric { + return &histogramMetric{ + data: newData(value, maxSamples), + totalSamples: 1, + storedSamples: 1, + name: name, + tags: stringTags, + mtype: histogramAggregated, + maxSamples: maxSamples, + specifiedRate: rate, + cardinality: cardinality, + } +} + +type distributionMetric = bufferedMetric + +func newDistributionMetric(name string, value float64, stringTags string, maxSamples int64, rate float64, cardinality Cardinality) *distributionMetric { + return &distributionMetric{ + data: newData(value, maxSamples), + totalSamples: 1, + storedSamples: 1, + name: name, + tags: stringTags, + mtype: distributionAggregated, + maxSamples: maxSamples, + specifiedRate: rate, + cardinality: cardinality, + } +} + +type timingMetric = bufferedMetric + +func newTimingMetric(name string, value float64, stringTags string, maxSamples int64, rate float64, cardinality Cardinality) *timingMetric { + return &timingMetric{ + data: newData(value, maxSamples), + totalSamples: 1, + storedSamples: 1, + name: name, + tags: stringTags, + mtype: timingAggregated, + maxSamples: maxSamples, + specifiedRate: rate, + cardinality: cardinality, + } +} + +// newData creates a new slice of float64 with the given capacity. If maxSample +// is less than or equal to 0, it returns a slice with the given value as the +// only element. +func newData(value float64, maxSample int64) []float64 { + if maxSample <= 0 { + return []float64{value} + } else { + data := make([]float64, maxSample) + data[0] = value + return data + } +} diff --git a/vendor/github.com/DataDog/datadog-go/v5/statsd/noop.go b/vendor/github.com/DataDog/datadog-go/v5/statsd/noop.go new file mode 100644 index 000000000..6500cde9a --- /dev/null +++ b/vendor/github.com/DataDog/datadog-go/v5/statsd/noop.go @@ -0,0 +1,118 @@ +package statsd + +import "time" + +// NoOpClient is a statsd client that does nothing. Can be useful in testing +// situations for library users. +type NoOpClient struct{} + +// Gauge does nothing and returns nil +func (n *NoOpClient) Gauge(name string, value float64, tags []string, rate float64) error { + return nil +} + +// GaugeWithTimestamp does nothing and returns nil +func (n *NoOpClient) GaugeWithTimestamp(name string, value float64, tags []string, rate float64, timestamp time.Time) error { + return nil +} + +// Count does nothing and returns nil +func (n *NoOpClient) Count(name string, value int64, tags []string, rate float64) error { + return nil +} + +// CountWithTimestamp does nothing and returns nil +func (n *NoOpClient) CountWithTimestamp(name string, value int64, tags []string, rate float64, timestamp time.Time) error { + return nil +} + +// Histogram does nothing and returns nil +func (n *NoOpClient) Histogram(name string, value float64, tags []string, rate float64) error { + return nil +} + +// Distribution does nothing and returns nil +func (n *NoOpClient) Distribution(name string, value float64, tags []string, rate float64) error { + return nil +} + +// Decr does nothing and returns nil +func (n *NoOpClient) Decr(name string, tags []string, rate float64) error { + return nil +} + +// Incr does nothing and returns nil +func (n *NoOpClient) Incr(name string, tags []string, rate float64) error { + return nil +} + +// Set does nothing and returns nil +func (n *NoOpClient) Set(name string, value string, tags []string, rate float64) error { + return nil +} + +// Timing does nothing and returns nil +func (n *NoOpClient) Timing(name string, value time.Duration, tags []string, rate float64) error { + return nil +} + +// TimeInMilliseconds does nothing and returns nil +func (n *NoOpClient) TimeInMilliseconds(name string, value float64, tags []string, rate float64) error { + return nil +} + +// Event does nothing and returns nil +func (n *NoOpClient) Event(e *Event) error { + return nil +} + +// SimpleEvent does nothing and returns nil +func (n *NoOpClient) SimpleEvent(title, text string) error { + return nil +} + +// ServiceCheck does nothing and returns nil +func (n *NoOpClient) ServiceCheck(sc *ServiceCheck) error { + return nil +} + +// SimpleServiceCheck does nothing and returns nil +func (n *NoOpClient) SimpleServiceCheck(name string, status ServiceCheckStatus) error { + return nil +} + +// Close does nothing and returns nil +func (n *NoOpClient) Close() error { + return nil +} + +// Flush does nothing and returns nil +func (n *NoOpClient) Flush() error { + return nil +} + +// IsClosed does nothing and return false +func (n *NoOpClient) IsClosed() bool { + return false +} + +// GetTelemetry does nothing and returns an empty Telemetry +func (n *NoOpClient) GetTelemetry() Telemetry { + return Telemetry{} +} + +// Verify that NoOpClient implements the ClientInterface. +// https://golang.org/doc/faq#guarantee_satisfies_interface +var _ ClientInterface = &NoOpClient{} + +// NoOpClientDirect implements ClientDirectInterface and does nothing. +type NoOpClientDirect struct { + NoOpClient +} + +// DistributionSamples does nothing and returns nil +func (n *NoOpClientDirect) DistributionSamples(name string, values []float64, tags []string, rate float64) error { + return nil +} + +var _ ClientDirectInterface = &NoOpClientDirect{} diff --git a/vendor/github.com/DataDog/datadog-go/v5/statsd/options.go b/vendor/github.com/DataDog/datadog-go/v5/statsd/options.go new file mode 100644 index 000000000..225a5aea5 --- /dev/null +++ b/vendor/github.com/DataDog/datadog-go/v5/statsd/options.go @@ -0,0 +1,443 @@ +package statsd + +import ( + "fmt" + "math" + "strings" + "time" +) + +var ( + defaultNamespace = "" + defaultTags = []string{} + defaultMaxBytesPerPayload = 0 + defaultMaxMessagesPerPayload = math.MaxInt32 + defaultBufferPoolSize = 0 + defaultBufferFlushInterval = 100 * time.Millisecond + defaultWorkerCount = 32 + defaultSenderQueueSize = 0 + defaultWriteTimeout = 100 * time.Millisecond + defaultConnectTimeout = 1000 * time.Millisecond + defaultTelemetry = true + defaultReceivingMode = mutexMode + defaultChannelModeBufferSize = 4096 + defaultAggregationFlushInterval = 2 * time.Second + defaultAggregation = true + defaultExtendedAggregation = false + defaultMaxBufferedSamplesPerContext = -1 + defaultOriginDetection = true + defaultChannelModeErrorsWhenFull = false + defaultErrorHandler = func(error) {} + defaultAggregatorShardCount = 1 +) + +// Options contains the configuration options for a client. +type Options struct { + namespace string + tags []string + maxBytesPerPayload int + maxMessagesPerPayload int + bufferPoolSize int + bufferFlushInterval time.Duration + workersCount int + senderQueueSize int + writeTimeout time.Duration + connectTimeout time.Duration + telemetry bool + receiveMode receivingMode + channelModeBufferSize int + aggregationFlushInterval time.Duration + aggregation bool + extendedAggregation bool + maxBufferedSamplesPerContext int + aggregatorShardCount int + telemetryAddr string + originDetection bool + containerID string + channelModeErrorsWhenFull bool + errorHandler ErrorHandler + tagCardinality *Cardinality +} + +func resolveOptions(options []Option) (*Options, error) { + o := &Options{ + namespace: defaultNamespace, + tags: defaultTags, + maxBytesPerPayload: defaultMaxBytesPerPayload, + maxMessagesPerPayload: defaultMaxMessagesPerPayload, + bufferPoolSize: defaultBufferPoolSize, + bufferFlushInterval: defaultBufferFlushInterval, + workersCount: defaultWorkerCount, + senderQueueSize: defaultSenderQueueSize, + writeTimeout: defaultWriteTimeout, + connectTimeout: defaultConnectTimeout, + telemetry: defaultTelemetry, + receiveMode: defaultReceivingMode, + channelModeBufferSize: defaultChannelModeBufferSize, + aggregationFlushInterval: defaultAggregationFlushInterval, + aggregation: defaultAggregation, + extendedAggregation: defaultExtendedAggregation, + maxBufferedSamplesPerContext: defaultMaxBufferedSamplesPerContext, + originDetection: defaultOriginDetection, + channelModeErrorsWhenFull: defaultChannelModeErrorsWhenFull, + errorHandler: defaultErrorHandler, + aggregatorShardCount: defaultAggregatorShardCount, + } + + for _, option := range options { + err := option(o) + if err != nil { + return nil, err + } + } + + return o, nil +} + +// Option is a client option. Can return an error if validation fails. +type Option func(*Options) error + +// WithNamespace sets a string to be prepend to all metrics, events and service checks name. +// +// A '.' will automatically be added after the namespace if needed. For example a metrics 'test' with a namespace 'prod' +// will produce a final metric named 'prod.test'. +func WithNamespace(namespace string) Option { + return func(o *Options) error { + if strings.HasSuffix(namespace, ".") { + o.namespace = namespace + } else { + o.namespace = namespace + "." + } + return nil + } +} + +// WithTags sets global tags to be applied to every metrics, events and service checks. +func WithTags(tags []string) Option { + return func(o *Options) error { + o.tags = tags + return nil + } +} + +// WithMaxMessagesPerPayload sets the maximum number of metrics, events and/or service checks that a single payload can +// contain. +// +// The default is 'math.MaxInt32' which will most likely let the WithMaxBytesPerPayload option take precedence. This +// option can be set to `1` to create an unbuffered client (each metrics/event/service check will be send in its own +// payload to the agent). +func WithMaxMessagesPerPayload(maxMessagesPerPayload int) Option { + return func(o *Options) error { + o.maxMessagesPerPayload = maxMessagesPerPayload + return nil + } +} + +// WithMaxBytesPerPayload sets the maximum number of bytes a single payload can contain. Each sample, even and service +// check must be lower than this value once serialized or an `MessageTooLongError` is returned. +// +// The default value 0 which will set the option to the optimal size for the transport protocol used: 1432 for UDP and +// named pipe and 8192 for UDS. Those values offer the best performances. +// Be careful when changing this option, see +// https://docs.datadoghq.com/developers/dogstatsd/high_throughput/#ensure-proper-packet-sizes. +func WithMaxBytesPerPayload(MaxBytesPerPayload int) Option { + return func(o *Options) error { + o.maxBytesPerPayload = MaxBytesPerPayload + return nil + } +} + +// WithBufferPoolSize sets the size of the pool of buffers used to serialized metrics, events and service_checks. +// +// The default, 0, will set the option to the optimal size for the transport protocol used: 2048 for UDP and named pipe +// and 512 for UDS. +func WithBufferPoolSize(bufferPoolSize int) Option { + return func(o *Options) error { + o.bufferPoolSize = bufferPoolSize + return nil + } +} + +// WithBufferFlushInterval sets the interval after which the current buffer is flushed. +// +// A buffers are used to serialized data, they're flushed either when full (see WithMaxBytesPerPayload) or when it's +// been open for longer than this interval. +// +// With apps sending a high number of metrics/events/service_checks the interval rarely timeout. But with slow sending +// apps increasing this value will reduce the number of payload sent on the wire as more data is serialized in the same +// payload. +// +// Default is 100ms +func WithBufferFlushInterval(bufferFlushInterval time.Duration) Option { + return func(o *Options) error { + o.bufferFlushInterval = bufferFlushInterval + return nil + } +} + +// WithWorkersCount sets the number of workers that will be used to serialized data. +// +// Those workers allow the use of multiple buffers at the same time (see WithBufferPoolSize) to reduce lock contention. +// +// Default is 32. +func WithWorkersCount(workersCount int) Option { + return func(o *Options) error { + if workersCount < 1 { + return fmt.Errorf("workersCount must be a positive integer") + } + o.workersCount = workersCount + return nil + } +} + +// WithSenderQueueSize sets the size of the sender queue in number of buffers. +// +// After data has been serialized in a buffer they're pushed to a queue that the sender will consume and then each one +// ot the agent. +// +// The default value 0 will set the option to the optimal size for the transport protocol used: 2048 for UDP and named +// pipe and 512 for UDS. +func WithSenderQueueSize(senderQueueSize int) Option { + return func(o *Options) error { + o.senderQueueSize = senderQueueSize + return nil + } +} + +// WithWriteTimeout sets the timeout for network communication with the Agent, after this interval a payload is +// dropped. This is only used for UDS and named pipes connection. +func WithWriteTimeout(writeTimeout time.Duration) Option { + return func(o *Options) error { + o.writeTimeout = writeTimeout + return nil + } +} + +// WithConnectTimeout sets the timeout for network connection with the Agent, after this interval the connection +// attempt is aborted. This is only used for UDS connection. This will also reset the connection if nothing can be +// written to it for this duration. +func WithConnectTimeout(connectTimeout time.Duration) Option { + return func(o *Options) error { + o.connectTimeout = connectTimeout + return nil + } +} + +// WithChannelMode make the client use channels to receive metrics +// +// This determines how the client receive metrics from the app (for example when calling the `Gauge()` method). +// The client will either drop the metrics if its buffers are full (WithChannelMode option) or block the caller until the +// metric can be handled (WithMutexMode option). By default, the client use mutexes. +// +// WithChannelMode uses a channel (see WithChannelModeBufferSize to configure its size) to receive metrics and drops metrics if +// the channel is full. Sending metrics in this mode is much slower that WithMutexMode (because of the channel), but will not +// block the application. This mode is made for application using statsd directly into the application code instead of +// a separated periodic reporter. The goal is to not slow down the application at the cost of dropping metrics and having a lower max +// throughput. +func WithChannelMode() Option { + return func(o *Options) error { + o.receiveMode = channelMode + return nil + } +} + +// WithMutexMode will use mutex to receive metrics from the app through the API. +// +// This determines how the client receive metrics from the app (for example when calling the `Gauge()` method). +// The client will either drop the metrics if its buffers are full (WithChannelMode option) or block the caller until the +// metric can be handled (WithMutexMode option). By default the client use mutexes. +// +// WithMutexMode uses mutexes to receive metrics which is much faster than channels but can cause some lock contention +// when used with a high number of goroutines sending the same metrics. Mutexes are sharded based on the metrics name +// which limit mutex contention when multiple goroutines send different metrics (see WithWorkersCount). This is the +// default behavior which will produce the best throughput. +func WithMutexMode() Option { + return func(o *Options) error { + o.receiveMode = mutexMode + return nil + } +} + +// WithChannelModeBufferSize sets the size of the channel holding incoming metrics when WithChannelMode is used. +func WithChannelModeBufferSize(bufferSize int) Option { + return func(o *Options) error { + o.channelModeBufferSize = bufferSize + return nil + } +} + +// WithChannelModeErrorsWhenFull makes the client return an error when the channel is full. +// This should be enabled if you want to be notified when the client is dropping metrics. You +// will also need to set `WithErrorHandler` to be notified of sender error. This might have +// a small performance impact. +func WithChannelModeErrorsWhenFull() Option { + return func(o *Options) error { + o.channelModeErrorsWhenFull = true + return nil + } +} + +// WithoutChannelModeErrorsWhenFull makes the client not return an error when the channel is full. +func WithoutChannelModeErrorsWhenFull() Option { + return func(o *Options) error { + o.channelModeErrorsWhenFull = false + return nil + } +} + +// WithErrorHandler sets a function that will be called when an error occurs. +func WithErrorHandler(errorHandler ErrorHandler) Option { + return func(o *Options) error { + o.errorHandler = errorHandler + return nil + } +} + +// WithAggregationInterval sets the interval at which aggregated metrics are flushed. See WithClientSideAggregation and +// WithExtendedClientSideAggregation for more. +// +// The default interval is 2s. The interval must divide the Agent reporting period (default=10s) evenly to reduce "aliasing" +// that can cause values to appear irregular/spiky. +// +// For example a 3s aggregation interval will create spikes in the final graph: a application sending a count metric +// that increments at a constant 1000 time per second will appear noisy with an interval of 3s. This is because +// client-side aggregation would report every 3 seconds, while the agent is reporting every 10 seconds. This means in +// each agent bucket, the values are: 9000, 9000, 12000. +func WithAggregationInterval(interval time.Duration) Option { + return func(o *Options) error { + o.aggregationFlushInterval = interval + return nil + } +} + +// WithClientSideAggregation enables client side aggregation for Gauges, Counts and Sets. +func WithClientSideAggregation() Option { + return func(o *Options) error { + o.aggregation = true + return nil + } +} + +// WithoutClientSideAggregation disables client side aggregation. +func WithoutClientSideAggregation() Option { + return func(o *Options) error { + o.aggregation = false + o.extendedAggregation = false + return nil + } +} + +// WithExtendedClientSideAggregation enables client side aggregation for all types. This feature is only compatible with +// Agent's version >=6.25.0 && <7.0.0 or Agent's versions >=7.25.0. +// When enabled, the use of `rate` with distribution is discouraged and `WithMaxSamplesPerContext()` should be used. +// If `rate` is used with different values of `rate` the resulting rate is not guaranteed to be correct. +func WithExtendedClientSideAggregation() Option { + return func(o *Options) error { + o.aggregation = true + o.extendedAggregation = true + return nil + } +} + +// WithMaxSamplesPerContext limits the number of sample for metric types that require multiple samples to be send +// over statsd to the agent, such as distributions or timings. This limits the number of sample per +// context for a distribution to a given number. Gauges and counts will not be affected as a single sample per context +// is sent with client side aggregation. +// - This will enable client side aggregation for all metrics. +// - This feature should be used with `WithExtendedClientSideAggregation` for optimal results. +func WithMaxSamplesPerContext(maxSamplesPerDistribution int) Option { + return func(o *Options) error { + o.aggregation = true + o.maxBufferedSamplesPerContext = maxSamplesPerDistribution + return nil + } +} + +// WithoutTelemetry disables the client telemetry. +// +// More on this here: https://docs.datadoghq.com/developers/dogstatsd/high_throughput/#client-side-telemetry +func WithoutTelemetry() Option { + return func(o *Options) error { + o.telemetry = false + return nil + } +} + +// WithTelemetryAddr sets a different address for telemetry metrics. By default the same address as the client is used +// for telemetry. +// +// More on this here: https://docs.datadoghq.com/developers/dogstatsd/high_throughput/#client-side-telemetry +func WithTelemetryAddr(addr string) Option { + return func(o *Options) error { + o.telemetryAddr = addr + return nil + } +} + +// WithoutOriginDetection disables the client origin detection. +// When enabled, the client tries to discover its container ID and sends it to the Agent +// to enrich the metrics with container tags. +// If the container id is not found and the client is running in a private cgroup namespace, the client +// sends the base cgroup controller inode. +// Origin detection can also be disabled by configuring the environment variabe DD_ORIGIN_DETECTION_ENABLED=false +// The client tries to read the container ID by parsing the file /proc/self/cgroup, this is not supported on Windows. +// +// More on this here: https://docs.datadoghq.com/developers/dogstatsd/?tab=kubernetes#origin-detection-over-udp +func WithoutOriginDetection() Option { + return func(o *Options) error { + o.originDetection = false + return nil + } +} + +// WithOriginDetection enables the client origin detection. +// This feature requires Datadog Agent version >=6.35.0 && <7.0.0 or Agent versions >=7.35.0. +// When enabled, the client tries to discover its container ID and sends it to the Agent +// to enrich the metrics with container tags. +// If the container id is not found and the client is running in a private cgroup namespace, the client +// sends the base cgroup controller inode. +// Origin detection can be disabled by configuring the environment variable DD_ORIGIN_DETECTION_ENABLED=false +// +// More on this here: https://docs.datadoghq.com/developers/dogstatsd/?tab=kubernetes#origin-detection-over-udp +func WithOriginDetection() Option { + return func(o *Options) error { + o.originDetection = true + return nil + } +} + +// WithContainerID allows passing the container ID, this will be used by the Agent to enrich metrics with container tags. +// This feature requires Datadog Agent version >=6.35.0 && <7.0.0 or Agent versions >=7.35.0. +// When configured, the provided container ID is prioritized over the container ID discovered via Origin Detection. +// The client prioritizes the value passed via DD_ENTITY_ID (if set) over the container ID. +func WithContainerID(id string) Option { + return func(o *Options) error { + o.containerID = id + return nil + } +} + +// WithCardinality sets the tag cardinality of the metric. +func WithCardinality(card Cardinality) Option { + return func(o *Options) error { + if !card.isValid() { + return fmt.Errorf("invalid cardinality %d", card) + } + o.tagCardinality = &card + return nil + } +} + +// WithAggregatorShardCount sets the number of shards used for the aggregator. +// Higher values reduce lock contention but increase memory usage. +// +// The default is 1 as to mimic current behavior. +func WithAggregatorShardCount(shardCount int) Option { + return func(o *Options) error { + if shardCount < 1 { + return fmt.Errorf("shardCount must be a positive integer") + } + o.aggregatorShardCount = shardCount + return nil + } +} diff --git a/vendor/github.com/DataDog/datadog-go/v5/statsd/pipe.go b/vendor/github.com/DataDog/datadog-go/v5/statsd/pipe.go new file mode 100644 index 000000000..1188b00f3 --- /dev/null +++ b/vendor/github.com/DataDog/datadog-go/v5/statsd/pipe.go @@ -0,0 +1,13 @@ +//go:build !windows +// +build !windows + +package statsd + +import ( + "errors" + "time" +) + +func newWindowsPipeWriter(pipepath string, writeTimeout time.Duration) (Transport, error) { + return nil, errors.New("Windows Named Pipes are only supported on Windows") +} diff --git a/vendor/github.com/DataDog/datadog-go/v5/statsd/pipe_windows.go b/vendor/github.com/DataDog/datadog-go/v5/statsd/pipe_windows.go new file mode 100644 index 000000000..c27434ccf --- /dev/null +++ b/vendor/github.com/DataDog/datadog-go/v5/statsd/pipe_windows.go @@ -0,0 +1,81 @@ +//go:build windows +// +build windows + +package statsd + +import ( + "net" + "sync" + "time" + + "github.com/Microsoft/go-winio" +) + +type pipeWriter struct { + mu sync.RWMutex + conn net.Conn + timeout time.Duration + pipepath string +} + +func (p *pipeWriter) Write(data []byte) (n int, err error) { + conn, err := p.ensureConnection() + if err != nil { + return 0, err + } + + p.mu.RLock() + conn.SetWriteDeadline(time.Now().Add(p.timeout)) + p.mu.RUnlock() + + n, err = conn.Write(data) + if err != nil { + if e, ok := err.(net.Error); !ok || !e.Temporary() { + // disconnected; retry again on next attempt + p.mu.Lock() + p.conn = nil + p.mu.Unlock() + } + } + return n, err +} + +func (p *pipeWriter) ensureConnection() (net.Conn, error) { + p.mu.RLock() + conn := p.conn + p.mu.RUnlock() + if conn != nil { + return conn, nil + } + + // looks like we might need to connect - try again with write locking. + p.mu.Lock() + defer p.mu.Unlock() + if p.conn != nil { + return p.conn, nil + } + newconn, err := winio.DialPipe(p.pipepath, nil) + if err != nil { + return nil, err + } + p.conn = newconn + return newconn, nil +} + +func (p *pipeWriter) Close() error { + return p.conn.Close() +} + +// GetTransportName returns the name of the transport +func (p *pipeWriter) GetTransportName() string { + return writerWindowsPipe +} + +func newWindowsPipeWriter(pipepath string, writeTimeout time.Duration) (*pipeWriter, error) { + // Defer connection establishment to first write + return &pipeWriter{ + conn: nil, + timeout: writeTimeout, + pipepath: pipepath, + }, nil +} diff --git a/vendor/github.com/DataDog/datadog-go/v5/statsd/sender.go b/vendor/github.com/DataDog/datadog-go/v5/statsd/sender.go new file mode 100644 index 000000000..fc80395c3 --- /dev/null +++ b/vendor/github.com/DataDog/datadog-go/v5/statsd/sender.go @@ -0,0 +1,145 @@ +package statsd + +import ( + "io" + "sync/atomic" +) + +// senderTelemetry contains telemetry about the health of the sender +type senderTelemetry struct { + totalPayloadsSent uint64 + totalPayloadsDroppedQueueFull uint64 + totalPayloadsDroppedWriter uint64 + totalBytesSent uint64 + totalBytesDroppedQueueFull uint64 + totalBytesDroppedWriter uint64 +} + +type Transport interface { + io.WriteCloser + + // GetTransportName returns the name of the transport + GetTransportName() string +} + +type sender struct { + transport Transport + pool *bufferPool + queue chan *statsdBuffer + telemetry *senderTelemetry + stop chan struct{} + flushSignal chan struct{} + errorHandler ErrorHandler +} + +type ErrorSenderChannelFull struct { + LostElements int + ChannelSize int + Msg string +} + +func (e *ErrorSenderChannelFull) Error() string { + return e.Msg +} + +func newSender(transport Transport, queueSize int, pool *bufferPool, errorHandler ErrorHandler) *sender { + sender := &sender{ + transport: transport, + pool: pool, + queue: make(chan *statsdBuffer, queueSize), + telemetry: &senderTelemetry{}, + stop: make(chan struct{}), + flushSignal: make(chan struct{}), + errorHandler: errorHandler, + } + + go sender.sendLoop() + return sender +} + +func (s *sender) send(buffer *statsdBuffer) { + select { + case s.queue <- buffer: + default: + if s.errorHandler != nil { + err := &ErrorSenderChannelFull{ + LostElements: buffer.elementCount, + ChannelSize: len(s.queue), + Msg: "Sender queue is full", + } + s.errorHandler(err) + } + atomic.AddUint64(&s.telemetry.totalPayloadsDroppedQueueFull, 1) + atomic.AddUint64(&s.telemetry.totalBytesDroppedQueueFull, uint64(len(buffer.bytes()))) + s.pool.returnBuffer(buffer) + } +} + +func (s *sender) write(buffer *statsdBuffer) { + _, err := s.transport.Write(buffer.bytes()) + if err != nil { + atomic.AddUint64(&s.telemetry.totalPayloadsDroppedWriter, 1) + atomic.AddUint64(&s.telemetry.totalBytesDroppedWriter, uint64(len(buffer.bytes()))) + if s.errorHandler != nil { + s.errorHandler(err) + } + } else { + atomic.AddUint64(&s.telemetry.totalPayloadsSent, 1) + atomic.AddUint64(&s.telemetry.totalBytesSent, uint64(len(buffer.bytes()))) + } + s.pool.returnBuffer(buffer) +} + +func (s *sender) flushTelemetryMetrics(t *Telemetry) { + t.TotalPayloadsSent = atomic.LoadUint64(&s.telemetry.totalPayloadsSent) + t.TotalPayloadsDroppedQueueFull = atomic.LoadUint64(&s.telemetry.totalPayloadsDroppedQueueFull) + t.TotalPayloadsDroppedWriter = atomic.LoadUint64(&s.telemetry.totalPayloadsDroppedWriter) + + t.TotalBytesSent = atomic.LoadUint64(&s.telemetry.totalBytesSent) + t.TotalBytesDroppedQueueFull = atomic.LoadUint64(&s.telemetry.totalBytesDroppedQueueFull) + t.TotalBytesDroppedWriter = atomic.LoadUint64(&s.telemetry.totalBytesDroppedWriter) +} + +func (s *sender) sendLoop() { + defer close(s.stop) + for { + select { + case buffer := <-s.queue: + s.write(buffer) + case <-s.stop: + return + case <-s.flushSignal: + // At that point we know that the workers are paused (the statsd client + // will pause them before calling sender.flush()). + // So we can fully flush the input queue + s.flushInputQueue() + s.flushSignal <- struct{}{} + } + } +} + +func (s *sender) flushInputQueue() { + for { + select { + case buffer := <-s.queue: + s.write(buffer) + default: + return + } + } +} +func (s *sender) flush() { + s.flushSignal <- struct{}{} + <-s.flushSignal +} + +func (s *sender) close() error { + s.stop <- struct{}{} + <-s.stop + s.flushInputQueue() + return s.transport.Close() +} + +func (s *sender) getTransportName() string { + return s.transport.GetTransportName() +} diff --git a/vendor/github.com/DataDog/datadog-go/v5/statsd/service_check.go b/vendor/github.com/DataDog/datadog-go/v5/statsd/service_check.go new file mode 100644 index 000000000..e2850465c --- /dev/null +++ b/vendor/github.com/DataDog/datadog-go/v5/statsd/service_check.go @@ -0,0 +1,57 @@ +package statsd + +import ( + "fmt" + "time" +) + +// ServiceCheckStatus support +type ServiceCheckStatus byte + +const ( + // Ok is the "ok" ServiceCheck status + Ok ServiceCheckStatus = 0 + // Warn is the "warning" ServiceCheck status + Warn ServiceCheckStatus = 1 + // Critical is the "critical" ServiceCheck status + Critical ServiceCheckStatus = 2 + // Unknown is the "unknown" ServiceCheck status + Unknown ServiceCheckStatus = 3 +) + +// A ServiceCheck is an object that contains status of DataDog service check. +type ServiceCheck struct { + // Name of the service check. Required. + Name string + // Status of service check. Required. + Status ServiceCheckStatus + // Timestamp is a timestamp for the serviceCheck. If not provided, the dogstatsd + // server will set this to the current time. + Timestamp time.Time + // Hostname for the serviceCheck. + Hostname string + // A message describing the current state of the serviceCheck. + Message string + // Tags for the serviceCheck. + Tags []string +} + +// NewServiceCheck creates a new serviceCheck with the given name and status. Error checking +// against these values is done at send-time, or upon running sc.Check. +func NewServiceCheck(name string, status ServiceCheckStatus) *ServiceCheck { + return &ServiceCheck{ + Name: name, + Status: status, + } +} + +// Check verifies that a service check is valid. +func (sc *ServiceCheck) Check() error { + if len(sc.Name) == 0 { + return fmt.Errorf("statsd.ServiceCheck name is required") + } + if byte(sc.Status) < 0 || byte(sc.Status) > 3 { + return fmt.Errorf("statsd.ServiceCheck status has invalid value") + } + return nil +} diff --git a/vendor/github.com/DataDog/datadog-go/v5/statsd/statsd.go b/vendor/github.com/DataDog/datadog-go/v5/statsd/statsd.go new file mode 100644 index 000000000..1f09ec79a --- /dev/null +++ b/vendor/github.com/DataDog/datadog-go/v5/statsd/statsd.go @@ -0,0 +1,318 @@ +// Copyright 2013 Ooyala, Inc. + +/* +Package statsd provides a Go dogstatsd client. Dogstatsd extends the popular statsd, +adding tags and histograms and pushing upstream to Datadog. + +Refer to http://docs.datadoghq.com/guides/dogstatsd/ for information about DogStatsD. + +statsd is based on go-statsd-client. +*/ +package statsd + +//go:generate mockgen -source=statsd.go -destination=mocks/statsd.go + +import ( + "io" + "time" +) + +// ClientInterface is an interface that exposes the common client functions for the +// purpose of being able to provide a no-op client or even mocking. This can aid +// downstream users' with their testing. +type ClientInterface interface { + // Gauge measures the value of a metric at a particular time. + Gauge(name string, value float64, tags []string, rate float64) error + + // GaugeWithTimestamp measures the value of a metric at a given time. + // BETA - Please contact our support team for more information to use this feature: https://www.datadoghq.com/support/ + // The value will bypass any aggregation on the client side and agent side, this is + // useful when sending points in the past. + // + // Minimum Datadog Agent version: 7.40.0 + GaugeWithTimestamp(name string, value float64, tags []string, rate float64, timestamp time.Time) error + + // Count tracks how many times something happened per second. + Count(name string, value int64, tags []string, rate float64) error + + // CountWithTimestamp tracks how many times something happened at the given second. + // BETA - Please contact our support team for more information to use this feature: https://www.datadoghq.com/support/ + // The value will bypass any aggregation on the client side and agent side, this is + // useful when sending points in the past. + // + // Minimum Datadog Agent version: 7.40.0 + CountWithTimestamp(name string, value int64, tags []string, rate float64, timestamp time.Time) error + + // Histogram tracks the statistical distribution of a set of values on each host. + Histogram(name string, value float64, tags []string, rate float64) error + + // Distribution tracks the statistical distribution of a set of values across your infrastructure. + // + // It is recommended to use `WithMaxBufferedMetricsPerContext` to avoid dropping metrics at high throughput, `rate` can + // also be used to limit the load. Both options can *not* be used together. + Distribution(name string, value float64, tags []string, rate float64) error + + // Decr is just Count of -1 + Decr(name string, tags []string, rate float64) error + + // Incr is just Count of 1 + Incr(name string, tags []string, rate float64) error + + // Set counts the number of unique elements in a group. + Set(name string, value string, tags []string, rate float64) error + + // Timing sends timing information, it is an alias for TimeInMilliseconds + Timing(name string, value time.Duration, tags []string, rate float64) error + + // TimeInMilliseconds sends timing information in milliseconds. + // It is flushed by statsd with percentiles, mean and other info (https://github.com/etsy/statsd/blob/master/docs/metric_types.md#timing) + TimeInMilliseconds(name string, value float64, tags []string, rate float64) error + + // Event sends the provided Event. + Event(e *Event) error + + // SimpleEvent sends an event with the provided title and text. + SimpleEvent(title, text string) error + + // ServiceCheck sends the provided ServiceCheck. + ServiceCheck(sc *ServiceCheck) error + + // SimpleServiceCheck sends an serviceCheck with the provided name and status. + SimpleServiceCheck(name string, status ServiceCheckStatus) error + + // Close the client connection. + Close() error + + // Flush forces a flush of all the queued dogstatsd payloads. + Flush() error + + // IsClosed returns if the client has been closed. + IsClosed() bool + + // GetTelemetry return the telemetry metrics for the client since it started. + GetTelemetry() Telemetry +} + +// A Client is a handle for sending messages to dogstatsd. It is safe to +// use one Client from multiple goroutines simultaneously. +type Client struct { + clientEx *ClientEx +} + +// Verify that Client implements the ClientInterface. +// https://golang.org/doc/faq#guarantee_satisfies_interface +var _ ClientInterface = &Client{} + +// New returns a pointer to a new Client given an addr in the format "hostname:port" for UDP, +// "unix:///path/to/socket" for UDS or "\\.\pipe\path\to\pipe" for Windows Named Pipes. +func New(addr string, options ...Option) (*Client, error) { + clientEx, err := NewEx(addr, options...) + if err != nil { + return nil, err + } + + return &Client{ + clientEx: clientEx, + }, nil +} + +// NewWithWriter creates a new Client with given writer. Writer is a +// io.WriteCloser +func NewWithWriter(w io.WriteCloser, options ...Option) (*Client, error) { + clientEx, err := NewWithWriterEx(w, options...) + if err != nil { + return nil, err + } + + return &Client{ + clientEx: clientEx, + }, nil +} + +// CloneWithExtraOptions create a new Client with extra options +func CloneWithExtraOptions(c *Client, options ...Option) (*Client, error) { + if c == nil { + return nil, ErrNoClient + } + + clientEx, err := CloneWithExtraOptionsEx(c.clientEx, options...) + if err != nil { + return nil, err + } + + return &Client{ + clientEx: clientEx, + }, nil +} + +// Flush forces a flush of all the queued dogstatsd payloads This method is +// blocking and will not return until everything is sent through the network. +// In mutexMode, this will also block sampling new data to the client while the +// workers and sender are flushed. +func (c *Client) Flush() error { + if c == nil { + return ErrNoClient + } + return c.clientEx.Flush() +} + +// IsClosed returns if the client has been closed. +func (c *Client) IsClosed() bool { + return c.clientEx.IsClosed() +} + +// GetTelemetry return the telemetry metrics for the client since it started. +func (c *Client) GetTelemetry() Telemetry { + return c.clientEx.GetTelemetry() +} + +// GetTransport return the name of the transport used. +func (c *Client) GetTransport() string { + return c.clientEx.GetTransport() +} + +// Gauge measures the value of a metric at a particular time. +func (c *Client) Gauge(name string, value float64, tags []string, rate float64) error { + if c == nil { + return ErrNoClient + } + return c.clientEx.Gauge(name, value, tags, rate) +} + +// GaugeWithTimestamp measures the value of a metric at a given time. +// BETA - Please contact our support team for more information to use this feature: https://www.datadoghq.com/support/ +// The value will bypass any aggregation on the client side and agent side, this is +// useful when sending points in the past. +// +// Minimum Datadog Agent version: 7.40.0 +func (c *Client) GaugeWithTimestamp(name string, value float64, tags []string, rate float64, timestamp time.Time) error { + if c == nil { + return ErrNoClient + } + return c.clientEx.GaugeWithTimestamp(name, value, tags, rate, timestamp) +} + +// Count tracks how many times something happened per second. +func (c *Client) Count(name string, value int64, tags []string, rate float64) error { + if c == nil { + return ErrNoClient + } + return c.clientEx.Count(name, value, tags, rate) +} + +// CountWithTimestamp tracks how many times something happened at the given second. +// BETA - Please contact our support team for more information to use this feature: https://www.datadoghq.com/support/ +// The value will bypass any aggregation on the client side and agent side, this is +// useful when sending points in the past. +// +// Minimum Datadog Agent version: 7.40.0 +func (c *Client) CountWithTimestamp(name string, value int64, tags []string, rate float64, timestamp time.Time) error { + if c == nil { + return ErrNoClient + } + return c.clientEx.CountWithTimestamp(name, value, tags, rate, timestamp) +} + +// Histogram tracks the statistical distribution of a set of values on each host. +func (c *Client) Histogram(name string, value float64, tags []string, rate float64) error { + if c == nil { + return ErrNoClient + } + return c.clientEx.Histogram(name, value, tags, rate) +} + +// Distribution tracks the statistical distribution of a set of values across your infrastructure. +func (c *Client) Distribution(name string, value float64, tags []string, rate float64) error { + if c == nil { + return ErrNoClient + } + return c.clientEx.Distribution(name, value, tags, rate) +} + +// Decr is just Count of -1 +func (c *Client) Decr(name string, tags []string, rate float64) error { + if c == nil { + return ErrNoClient + } + return c.clientEx.Decr(name, tags, rate) +} + +// Incr is just Count of 1 +func (c *Client) Incr(name string, tags []string, rate float64) error { + if c == nil { + return ErrNoClient + } + return c.clientEx.Incr(name, tags, rate) +} + +// Set counts the number of unique elements in a group. +func (c *Client) Set(name string, value string, tags []string, rate float64) error { + if c == nil { + return ErrNoClient + } + return c.clientEx.Set(name, value, tags, rate) + +} + +// Timing sends timing information, it is an alias for TimeInMilliseconds +func (c *Client) Timing(name string, value time.Duration, tags []string, rate float64) error { + if c == nil { + return ErrNoClient + } + return c.clientEx.Timing(name, value, tags, rate) +} + +// TimeInMilliseconds sends timing information in milliseconds. +// It is flushed by statsd with percentiles, mean and other info (https://github.com/etsy/statsd/blob/master/docs/metric_types.md#timing) +func (c *Client) TimeInMilliseconds(name string, value float64, tags []string, rate float64) error { + if c == nil { + return ErrNoClient + } + return c.clientEx.TimeInMilliseconds(name, value, tags, rate) +} + +// Event sends the provided Event. +func (c *Client) Event(e *Event) error { + if c == nil { + return ErrNoClient + } + return c.clientEx.Event(e) +} + +// SimpleEvent sends an event with the provided title and text. +func (c *Client) SimpleEvent(title, text string) error { + if c == nil { + return ErrNoClient + } + return c.clientEx.SimpleEvent(title, text) +} + +// ServiceCheck sends the provided ServiceCheck. +func (c *Client) ServiceCheck(sc *ServiceCheck) error { + if c == nil { + return ErrNoClient + } + return c.clientEx.ServiceCheck(sc) +} + +// SimpleServiceCheck sends an serviceCheck with the provided name and status. +func (c *Client) SimpleServiceCheck(name string, status ServiceCheckStatus) error { + if c == nil { + return ErrNoClient + } + return c.clientEx.SimpleServiceCheck(name, status) + +} + +// Close the client connection. +func (c *Client) Close() error { + if c == nil { + return ErrNoClient + } + return c.clientEx.Close() +} + +// sendBlocking is used by the aggregator to inject aggregated metrics. +func (c *Client) sendBlocking(m metric) error { + return c.clientEx.sendBlocking(m) +} diff --git a/vendor/github.com/DataDog/datadog-go/v5/statsd/statsd_direct.go b/vendor/github.com/DataDog/datadog-go/v5/statsd/statsd_direct.go new file mode 100644 index 000000000..150ee2c81 --- /dev/null +++ b/vendor/github.com/DataDog/datadog-go/v5/statsd/statsd_direct.go @@ -0,0 +1,69 @@ +package statsd + +import ( + "io" + "strings" + "sync/atomic" +) + +type ClientDirectInterface interface { + DistributionSamples(name string, values []float64, tags []string, rate float64) error +} + +// ClientDirect is an *experimental* statsd client that gives direct access to some dogstatsd features. +// +// It is not recommended to use this client in production. This client might allow you to take advantage of +// new features in the agent before they are released, but it might also break your application. +type ClientDirect struct { + *Client +} + +// NewDirect returns a pointer to a new ClientDirect given an addr in the format "hostname:port" for UDP, +// "unix:///path/to/socket" for UDS or "\\.\pipe\path\to\pipe" for Windows Named Pipes. +func NewDirect(addr string, options ...Option) (*ClientDirect, error) { + client, err := New(addr, options...) + if err != nil { + return nil, err + } + return &ClientDirect{ + client, + }, nil +} + +func NewDirectWithWriter(writer io.WriteCloser, options ...Option) (*ClientDirect, error) { + client, err := NewWithWriter(writer, options...) + if err != nil { + return nil, err + } + return &ClientDirect{ + client, + }, nil +} + +// DistributionSamples is similar to Distribution, but it lets the client deals with the sampling. +// +// The provided `rate` is the sampling rate applied by the client and will *not* be used to apply further +// sampling. This is recommended in high performance cases were the overhead of the statsd library might be +// significant and the sampling is already done by the client. +// +// `WithMaxBufferedMetricsPerContext` is ignored when using this method. +func (c *ClientDirect) DistributionSamples(name string, values []float64, tags []string, rate float64) error { + if c == nil { + return ErrNoClient + } + atomic.AddUint64(&c.clientEx.telemetry.totalMetricsDistribution, uint64(len(values))) + return c.clientEx.send(metric{ + metricType: distributionAggregated, + name: name, + fvalues: values, + tags: tags, + stags: strings.Join(tags, tagSeparatorSymbol), + rate: rate, + globalTags: c.clientEx.tags, + namespace: c.clientEx.namespace, + }) +} + +// Validate that ClientDirect implements ClientDirectInterface and ClientInterface. +var _ ClientDirectInterface = (*ClientDirect)(nil) +var _ ClientInterface = (*ClientDirect)(nil) diff --git a/vendor/github.com/DataDog/datadog-go/v5/statsd/statsdex.go b/vendor/github.com/DataDog/datadog-go/v5/statsd/statsdex.go new file mode 100644 index 000000000..faa3a1947 --- /dev/null +++ b/vendor/github.com/DataDog/datadog-go/v5/statsd/statsdex.go @@ -0,0 +1,953 @@ +// Copyright 2013 Ooyala, Inc. + +/* +Package statsd provides a Go dogstatsd client. Dogstatsd extends the popular statsd, +adding tags and histograms and pushing upstream to Datadog. + +Refer to http://docs.datadoghq.com/guides/dogstatsd/ for information about DogStatsD. + +statsd is based on go-statsd-client. +*/ +package statsd + +//go:generate mockgen -source=statsd.go -destination=mocks/statsd.go + +import ( + "errors" + "fmt" + "io" + "net/url" + "os" + "strconv" + "strings" + "sync" + "sync/atomic" + "time" +) + +/* +OptimalUDPPayloadSize defines the optimal payload size for a UDP datagram, 1432 bytes +is optimal for regular networks with an MTU of 1500 so datagrams don't get +fragmented. It's generally recommended not to fragment UDP datagrams as losing +a single fragment will cause the entire datagram to be lost. +*/ +const OptimalUDPPayloadSize = 1432 + +/* +MaxUDPPayloadSize defines the maximum payload size for a UDP datagram. +Its value comes from the calculation: 65535 bytes Max UDP datagram size - +8byte UDP header - 60byte max IP headers +any number greater than that will see frames being cut out. +*/ +const MaxUDPPayloadSize = 65467 + +// DefaultUDPBufferPoolSize is the default size of the buffer pool for UDP clients. +const DefaultUDPBufferPoolSize = 2048 + +// DefaultUDSBufferPoolSize is the default size of the buffer pool for UDS clients. +const DefaultUDSBufferPoolSize = 512 + +/* +DefaultMaxAgentPayloadSize is the default maximum payload size the agent +can receive. This can be adjusted by changing dogstatsd_buffer_size in the +agent configuration file datadog.yaml. This is also used as the optimal payload size +for UDS datagrams. +*/ +const DefaultMaxAgentPayloadSize = 8192 + +/* +UnixAddressPrefix holds the prefix to use to enable Unix Domain Socket +traffic instead of UDP. The type of the socket will be guessed. +*/ +const UnixAddressPrefix = "unix://" + +/* +UnixDatagramAddressPrefix holds the prefix to use to enable Unix Domain Socket +datagram traffic instead of UDP. +*/ +const UnixAddressDatagramPrefix = "unixgram://" + +/* +UnixAddressStreamPrefix holds the prefix to use to enable Unix Domain Socket +stream traffic instead of UDP. +*/ +const UnixAddressStreamPrefix = "unixstream://" + +/* +WindowsPipeAddressPrefix holds the prefix to use to enable Windows Named Pipes +traffic instead of UDP. +*/ +const WindowsPipeAddressPrefix = `\\.\pipe\` + +var ( + AddressPrefixes = []string{UnixAddressPrefix, UnixAddressDatagramPrefix, UnixAddressStreamPrefix, WindowsPipeAddressPrefix} +) + +const ( + agentHostEnvVarName = "DD_AGENT_HOST" + agentPortEnvVarName = "DD_DOGSTATSD_PORT" + agentURLEnvVarName = "DD_DOGSTATSD_URL" + defaultUDPPort = "8125" +) + +const ( + // ddEntityID specifies client-side user-specified entity ID injection. + // This env var can be set to the Pod UID on Kubernetes via the downward API. + // Docs: https://docs.datadoghq.com/developers/dogstatsd/?tab=kubernetes#origin-detection-over-udp + ddEntityID = "DD_ENTITY_ID" + + // ddEntityIDTag specifies the tag name for the client-side entity ID injection + // The Agent expects this tag to contain a non-prefixed Kubernetes Pod UID. + ddEntityIDTag = "dd.internal.entity_id" + + // originDetectionEnabled specifies the env var to enable/disable sending the container ID field. + originDetectionEnabled = "DD_ORIGIN_DETECTION_ENABLED" +) + +/* +ddEnvTagsMapping is a mapping of each "DD_" prefixed environment variable +to a specific tag name. We use a slice to keep the order and simplify tests. +*/ +var ddEnvTagsMapping = []struct{ envName, tagName string }{ + {ddEntityID, ddEntityIDTag}, // Client-side entity ID injection for container tagging. + {"DD_ENV", "env"}, // The name of the env in which the service runs. + {"DD_SERVICE", "service"}, // The name of the running service. + {"DD_VERSION", "version"}, // The current version of the running service. +} + +type metricType int + +const ( + gauge metricType = iota + count + histogram + histogramAggregated + distribution + distributionAggregated + set + timing + timingAggregated + event + serviceCheck +) + +type receivingMode int + +const ( + mutexMode receivingMode = iota + channelMode +) + +const ( + writerNameUDP string = "udp" + writerNameUDS string = "uds" + writerNameUDSStream string = "uds-stream" + writerWindowsPipe string = "pipe" + writerNameCustom string = "custom" +) + +// noTimestamp is used as a value for metric without a given timestamp. +const noTimestamp = int64(0) + +type metric struct { + metricType metricType + namespace string + globalTags []string + name string + fvalue float64 + fvalues []float64 + ivalue int64 + svalue string + evalue *Event + scvalue *ServiceCheck + tags []string + stags string + rate float64 + timestamp int64 + originDetection bool + cardinality Cardinality +} + +type noClientErr string + +// ErrNoClient is returned if statsd reporting methods are invoked on +// a nil client. +const ErrNoClient = noClientErr("statsd client is nil") + +func (e noClientErr) Error() string { + return string(e) +} + +type invalidTimestampErr string + +// InvalidTimestamp is returned if a provided timestamp is invalid. +const InvalidTimestamp = invalidTimestampErr("invalid timestamp") + +func (e invalidTimestampErr) Error() string { + return string(e) +} + +// ClientInterfaceEx is an temporary interface that is similar to ClientInterface +// but with the addition of a `...Parameter` for the telemetry functions. This is currently +// just used to specify the tag cardinality. We want to avoid changing ClientInterface +// at present as that would require a new major release. +// Users should avoid implementing this interface as it will be deprecated in the next version. +type ClientInterfaceEx interface { + // Gauge measures the value of a metric at a particular time. + Gauge(name string, value float64, tags []string, rate float64, parameters ...Parameter) error + + // GaugeWithTimestamp measures the value of a metric at a given time. + // BETA - Please contact our support team for more information to use this feature: https://www.datadoghq.com/support/ + // The value will bypass any aggregation on the client side and agent side, this is + // useful when sending points in the past. + // + // Minimum Datadog Agent version: 7.40.0 + GaugeWithTimestamp(name string, value float64, tags []string, rate float64, timestamp time.Time, parameters ...Parameter) error + + // Count tracks how many times something happened per second. + Count(name string, value int64, tags []string, rate float64, parameters ...Parameter) error + + // CountWithTimestamp tracks how many times something happened at the given second. + // BETA - Please contact our support team for more information to use this feature: https://www.datadoghq.com/support/ + // The value will bypass any aggregation on the client side and agent side, this is + // useful when sending points in the past. + // + // Minimum Datadog Agent version: 7.40.0 + CountWithTimestamp(name string, value int64, tags []string, rate float64, timestamp time.Time, parameters ...Parameter) error + + // Histogram tracks the statistical distribution of a set of values on each host. + Histogram(name string, value float64, tags []string, rate float64, parameters ...Parameter) error + + // Distribution tracks the statistical distribution of a set of values across your infrastructure. + // + // It is recommended to use `WithMaxBufferedMetricsPerContext` to avoid dropping metrics at high throughput, `rate` can + // also be used to limit the load. Both options can *not* be used together. + Distribution(name string, value float64, tags []string, rate float64, parameters ...Parameter) error + + // Decr is just Count of -1 + Decr(name string, tags []string, rate float64, parameters ...Parameter) error + + // Incr is just Count of 1 + Incr(name string, tags []string, rate float64, parameters ...Parameter) error + + // Set counts the number of unique elements in a group. + Set(name string, value string, tags []string, rate float64, parameters ...Parameter) error + + // Timing sends timing information, it is an alias for TimeInMilliseconds + Timing(name string, value time.Duration, tags []string, rate float64, parameters ...Parameter) error + + // TimeInMilliseconds sends timing information in milliseconds. + // It is flushed by statsd with percentiles, mean and other info (https://github.com/etsy/statsd/blob/master/docs/metric_types.md#timing) + TimeInMilliseconds(name string, value float64, tags []string, rate float64, parameters ...Parameter) error + + // Event sends the provided Event. + Event(e *Event, parameters ...Parameter) error + + // SimpleEvent sends an event with the provided title and text. + SimpleEvent(title, text string, parameters ...Parameter) error + + // ServiceCheck sends the provided ServiceCheck. + ServiceCheck(sc *ServiceCheck, parameters ...Parameter) error + + // SimpleServiceCheck sends an serviceCheck with the provided name and status. + SimpleServiceCheck(name string, status ServiceCheckStatus, parameters ...Parameter) error + + // Close the client connection. + Close() error + + // Flush forces a flush of all the queued dogstatsd payloads. + Flush() error + + // IsClosed returns if the client has been closed. + IsClosed() bool + + // GetTelemetry return the telemetry metrics for the client since it started. + GetTelemetry() Telemetry + + // Ensure this interface can't be implemented outside of this package. + // ClientInterfaceEx is a temporary measure to allow us to release a version of the library with the + // extra `...Parameter` parameter (currently used to specify the tag cardinality) in the metric functions + // without having to release a new major version. + // This interface will be deprecated with the next release. + private() +} + +type ErrorHandler func(error) + +// A Client is a handle for sending messages to dogstatsd. It is safe to +// use one Client from multiple goroutines simultaneously. +type ClientEx struct { + // Sender handles the underlying networking protocol + sender *sender + // namespace to prepend to all statsd calls + namespace string + // tags are global tags to be added to every statsd call + tags []string + flushTime time.Duration + telemetry *statsdTelemetry + telemetryClient *telemetryClient + stop chan struct{} + wg sync.WaitGroup + workers []*worker + closerLock sync.Mutex + workersMode receivingMode + aggregatorMode receivingMode + agg *aggregator + aggExtended *aggregator + options []Option + addrOption string + isClosed bool + errorOnBlockedChannel bool + errorHandler ErrorHandler + originDetection bool + defaultCardinality Cardinality +} + +// statsdTelemetry contains telemetry metrics about the client +type statsdTelemetry struct { + totalMetricsGauge uint64 + totalMetricsCount uint64 + totalMetricsHistogram uint64 + totalMetricsDistribution uint64 + totalMetricsSet uint64 + totalMetricsTiming uint64 + totalEvents uint64 + totalServiceChecks uint64 + totalDroppedOnReceive uint64 +} + +// Verify that ClientEx implements the ClientInterfaceEx interface. +// https://golang.org/doc/faq#guarantee_satisfies_interface +var _ ClientInterfaceEx = &ClientEx{} + +func resolveAddr(addr string) string { + envPort := "" + + if addr == "" { + addr = os.Getenv(agentHostEnvVarName) + envPort = os.Getenv(agentPortEnvVarName) + agentURL, _ := os.LookupEnv(agentURLEnvVarName) + agentURL = parseAgentURL(agentURL) + + // agentURLEnvVarName has priority over agentHostEnvVarName + if agentURL != "" { + return agentURL + } + } + + if addr == "" { + return "" + } + + for _, prefix := range AddressPrefixes { + if strings.HasPrefix(addr, prefix) { + return addr + } + } + // TODO: How does this work for IPv6? + if strings.Contains(addr, ":") { + return addr + } + if envPort != "" { + addr = fmt.Sprintf("%s:%s", addr, envPort) + } else { + addr = fmt.Sprintf("%s:%s", addr, defaultUDPPort) + } + return addr +} + +func parseAgentURL(agentURL string) string { + if agentURL != "" { + if strings.HasPrefix(agentURL, WindowsPipeAddressPrefix) { + return agentURL + } + + parsedURL, err := url.Parse(agentURL) + if err != nil { + return "" + } + + if parsedURL.Scheme == "udp" { + if strings.Contains(parsedURL.Host, ":") { + return parsedURL.Host + } + return fmt.Sprintf("%s:%s", parsedURL.Host, defaultUDPPort) + } + + if parsedURL.Scheme == "unix" { + return agentURL + } + } + return "" +} + +func createWriter(addr string, writeTimeout time.Duration, connectTimeout time.Duration) (Transport, string, error) { + if addr == "" { + return nil, "", errors.New("No address passed and autodetection from environment failed") + } + + switch { + case strings.HasPrefix(addr, WindowsPipeAddressPrefix): + w, err := newWindowsPipeWriter(addr, writeTimeout) + return w, writerWindowsPipe, err + case strings.HasPrefix(addr, UnixAddressPrefix): + w, err := newUDSWriter(addr[len(UnixAddressPrefix):], writeTimeout, connectTimeout, "") + return w, writerNameUDS, err + case strings.HasPrefix(addr, UnixAddressDatagramPrefix): + w, err := newUDSWriter(addr[len(UnixAddressDatagramPrefix):], writeTimeout, connectTimeout, "unixgram") + return w, writerNameUDS, err + case strings.HasPrefix(addr, UnixAddressStreamPrefix): + w, err := newUDSWriter(addr[len(UnixAddressStreamPrefix):], writeTimeout, connectTimeout, "unix") + return w, writerNameUDS, err + default: + w, err := newUDPWriter(addr, writeTimeout) + return w, writerNameUDP, err + } +} + +// New returns a pointer to a new Client given an addr in the format "hostname:port" for UDP, +// "unix:///path/to/socket" for UDS or "\\.\pipe\path\to\pipe" for Windows Named Pipes. +func NewEx(addr string, options ...Option) (*ClientEx, error) { + o, err := resolveOptions(options) + if err != nil { + return nil, err + } + + addr = resolveAddr(addr) + w, writerType, err := createWriter(addr, o.writeTimeout, o.connectTimeout) + if err != nil { + return nil, err + } + + client, err := newWithWriter(w, o, writerType) + if err == nil { + client.options = append(client.options, options...) + client.addrOption = addr + } + return client, err +} + +type customWriter struct { + io.WriteCloser +} + +func (w *customWriter) GetTransportName() string { + return writerNameCustom +} + +// NewWithWriter creates a new ClientEx with given writer. Writer is a +// io.WriteCloser +func NewWithWriterEx(w io.WriteCloser, options ...Option) (*ClientEx, error) { + o, err := resolveOptions(options) + if err != nil { + return nil, err + } + return newWithWriter(&customWriter{w}, o, writerNameCustom) +} + +// CloneWithExtraOptions create a new ClientEx with extra options +func CloneWithExtraOptionsEx(c *ClientEx, options ...Option) (*ClientEx, error) { + if c == nil { + return nil, ErrNoClient + } + + if c.addrOption == "" { + return nil, fmt.Errorf("can't clone client with no addrOption") + } + opt := append(c.options, options...) + return NewEx(c.addrOption, opt...) +} + +func newWithWriter(w Transport, o *Options, writerName string) (*ClientEx, error) { + c := ClientEx{ + namespace: o.namespace, + tags: o.tags, + telemetry: &statsdTelemetry{}, + errorOnBlockedChannel: o.channelModeErrorsWhenFull, + errorHandler: o.errorHandler, + originDetection: isOriginDetectionEnabled(o), + } + + // Inject values of DD_* environment variables as global tags. + for _, mapping := range ddEnvTagsMapping { + if value := os.Getenv(mapping.envName); value != "" { + c.tags = append(c.tags, fmt.Sprintf("%s:%s", mapping.tagName, value)) + } + } + // Whether origin detection is enabled or not for this client, we need to initialize the global + // external environment variable in case another client has enabled it and needs to access it. + initExternalEnv() + + if o.tagCardinality != nil { + c.defaultCardinality = *o.tagCardinality + } else if card, ok := envTagCardinality(); ok { + c.defaultCardinality = card + } else { + c.defaultCardinality = CardinalityNotSet + } + + initContainerID(o.containerID, fillInContainerID(o), isHostCgroupNamespace()) + isUDS := writerName == writerNameUDS + + if o.maxBytesPerPayload == 0 { + if isUDS { + o.maxBytesPerPayload = DefaultMaxAgentPayloadSize + } else { + o.maxBytesPerPayload = OptimalUDPPayloadSize + } + } + if o.bufferPoolSize == 0 { + if isUDS { + o.bufferPoolSize = DefaultUDSBufferPoolSize + } else { + o.bufferPoolSize = DefaultUDPBufferPoolSize + } + } + if o.senderQueueSize == 0 { + if isUDS { + o.senderQueueSize = DefaultUDSBufferPoolSize + } else { + o.senderQueueSize = DefaultUDPBufferPoolSize + } + } + + bufferPool := newBufferPool(o.bufferPoolSize, o.maxBytesPerPayload, o.maxMessagesPerPayload) + c.sender = newSender(w, o.senderQueueSize, bufferPool, o.errorHandler) + c.aggregatorMode = o.receiveMode + + c.workersMode = o.receiveMode + // channelMode mode at the worker level is not enabled when + // ExtendedAggregation is since the user app will not directly + // use the worker (the aggregator sit between the app and the + // workers). + if o.extendedAggregation { + c.workersMode = mutexMode + } + + if o.aggregation || o.extendedAggregation || o.maxBufferedSamplesPerContext > 0 { + c.agg = newAggregator(&c, int64(o.maxBufferedSamplesPerContext), o.aggregatorShardCount) + c.agg.start(o.aggregationFlushInterval) + + if o.extendedAggregation { + c.aggExtended = c.agg + + if c.aggregatorMode == channelMode { + c.agg.startReceivingMetric(o.channelModeBufferSize, o.workersCount) + } + } + } + + for i := 0; i < o.workersCount; i++ { + w := newWorker(bufferPool, c.sender) + c.workers = append(c.workers, w) + + if c.workersMode == channelMode { + w.startReceivingMetric(o.channelModeBufferSize) + } + } + + c.flushTime = o.bufferFlushInterval + c.stop = make(chan struct{}, 1) + + c.wg.Add(1) + go func() { + defer c.wg.Done() + c.watch() + }() + + if o.telemetry { + if o.telemetryAddr == "" { + c.telemetryClient = newTelemetryClient(&c, c.agg != nil) + } else { + var err error + c.telemetryClient, err = newTelemetryClientWithCustomAddr(&c, o.telemetryAddr, c.agg != nil, bufferPool, o.writeTimeout, o.connectTimeout) + if err != nil { + return nil, err + } + } + c.telemetryClient.run(&c.wg, c.stop) + } + + return &c, nil +} + +func (c *ClientEx) watch() { + ticker := time.NewTicker(c.flushTime) + + for { + select { + case <-ticker.C: + for _, w := range c.workers { + w.flush() + } + case <-c.stop: + ticker.Stop() + return + } + } +} + +// Flush forces a flush of all the queued dogstatsd payloads This method is +// blocking and will not return until everything is sent through the network. +// In mutexMode, this will also block sampling new data to the client while the +// workers and sender are flushed. +func (c *ClientEx) Flush() error { + if c == nil { + return ErrNoClient + } + if c.agg != nil { + c.agg.flush() + } + for _, w := range c.workers { + w.pause() + defer w.unpause() + w.flushUnsafe() + } + // Now that the worker are pause the sender can flush the queue between + // worker and senders + c.sender.flush() + return nil +} + +// IsClosed returns if the client has been closed. +func (c *ClientEx) IsClosed() bool { + c.closerLock.Lock() + defer c.closerLock.Unlock() + return c.isClosed +} + +func (c *ClientEx) flushTelemetryMetrics(t *Telemetry) { + t.TotalMetricsGauge = atomic.LoadUint64(&c.telemetry.totalMetricsGauge) + t.TotalMetricsCount = atomic.LoadUint64(&c.telemetry.totalMetricsCount) + t.TotalMetricsSet = atomic.LoadUint64(&c.telemetry.totalMetricsSet) + t.TotalMetricsHistogram = atomic.LoadUint64(&c.telemetry.totalMetricsHistogram) + t.TotalMetricsDistribution = atomic.LoadUint64(&c.telemetry.totalMetricsDistribution) + t.TotalMetricsTiming = atomic.LoadUint64(&c.telemetry.totalMetricsTiming) + t.TotalEvents = atomic.LoadUint64(&c.telemetry.totalEvents) + t.TotalServiceChecks = atomic.LoadUint64(&c.telemetry.totalServiceChecks) + t.TotalDroppedOnReceive = atomic.LoadUint64(&c.telemetry.totalDroppedOnReceive) +} + +// GetTelemetry return the telemetry metrics for the client since it started. +func (c *ClientEx) GetTelemetry() Telemetry { + return c.telemetryClient.getTelemetry() +} + +// GetTransport return the name of the transport used. +func (c *ClientEx) GetTransport() string { + if c.sender == nil { + return "" + } + return c.sender.getTransportName() +} + +type ErrorInputChannelFull struct { + Metric metric + ChannelSize int + Msg string +} + +func (e ErrorInputChannelFull) Error() string { + return e.Msg +} + +func (c *ClientEx) send(m metric) error { + h := hashString32(m.name) + worker := c.workers[h%uint32(len(c.workers))] + + if c.workersMode == channelMode { + select { + case worker.inputMetrics <- m: + default: + atomic.AddUint64(&c.telemetry.totalDroppedOnReceive, 1) + err := &ErrorInputChannelFull{m, len(worker.inputMetrics), "Worker input channel full"} + if c.errorHandler != nil { + c.errorHandler(err) + } + if c.errorOnBlockedChannel { + return err + } + } + return nil + } + return worker.processMetric(m) +} + +// sendBlocking is used by the aggregator to inject aggregated metrics. +func (c *ClientEx) sendBlocking(m metric) error { + m.globalTags = c.tags + m.namespace = c.namespace + + h := hashString32(m.name) + worker := c.workers[h%uint32(len(c.workers))] + return worker.processMetric(m) +} + +func (c *ClientEx) sendToAggregator(mType metricType, name string, value float64, tags []string, rate float64, f bufferedMetricSampleFunc, cardinality Cardinality) error { + if c.aggregatorMode == channelMode { + m := metric{metricType: mType, name: name, fvalue: value, tags: tags, rate: rate, cardinality: cardinality} + select { + case c.aggExtended.inputMetrics <- m: + default: + atomic.AddUint64(&c.telemetry.totalDroppedOnReceive, 1) + err := &ErrorInputChannelFull{m, len(c.aggExtended.inputMetrics), "Aggregator input channel full"} + if c.errorHandler != nil { + c.errorHandler(err) + } + if c.errorOnBlockedChannel { + return err + } + } + return nil + } + return f(name, value, tags, rate, cardinality) +} + +// Gauge measures the value of a metric at a particular time. +func (c *ClientEx) Gauge(name string, value float64, tags []string, rate float64, parameters ...Parameter) error { + if c == nil { + return ErrNoClient + } + atomic.AddUint64(&c.telemetry.totalMetricsGauge, 1) + cardinality := parameterCardinality(parameters, c.defaultCardinality) + if c.agg != nil { + return c.agg.gauge(name, value, tags, cardinality) + } + return c.send(metric{metricType: gauge, name: name, fvalue: value, tags: tags, rate: rate, globalTags: c.tags, namespace: c.namespace, originDetection: c.originDetection, cardinality: cardinality}) +} + +// GaugeWithTimestamp measures the value of a metric at a given time. +// BETA - Please contact our support team for more information to use this feature: https://www.datadoghq.com/support/ +// The value will bypass any aggregation on the client side and agent side, this is +// useful when sending points in the past. +// +// Minimum Datadog Agent version: 7.40.0 +func (c *ClientEx) GaugeWithTimestamp(name string, value float64, tags []string, rate float64, timestamp time.Time, parameters ...Parameter) error { + if c == nil { + return ErrNoClient + } + + if timestamp.IsZero() || timestamp.Unix() <= noTimestamp { + return InvalidTimestamp + } + + atomic.AddUint64(&c.telemetry.totalMetricsGauge, 1) + cardinality := parameterCardinality(parameters, c.defaultCardinality) + return c.send(metric{metricType: gauge, name: name, fvalue: value, tags: tags, rate: rate, globalTags: c.tags, namespace: c.namespace, timestamp: timestamp.Unix(), originDetection: c.originDetection, cardinality: cardinality}) +} + +// Count tracks how many times something happened per second. +func (c *ClientEx) Count(name string, value int64, tags []string, rate float64, parameters ...Parameter) error { + if c == nil { + return ErrNoClient + } + atomic.AddUint64(&c.telemetry.totalMetricsCount, 1) + cardinality := parameterCardinality(parameters, c.defaultCardinality) + if c.agg != nil { + return c.agg.count(name, value, tags, cardinality) + } + return c.send(metric{metricType: count, name: name, ivalue: value, tags: tags, rate: rate, globalTags: c.tags, namespace: c.namespace, originDetection: c.originDetection, cardinality: cardinality}) +} + +// CountWithTimestamp tracks how many times something happened at the given second. +// BETA - Please contact our support team for more information to use this feature: https://www.datadoghq.com/support/ +// The value will bypass any aggregation on the client side and agent side, this is +// useful when sending points in the past. +// +// Minimum Datadog Agent version: 7.40.0 +func (c *ClientEx) CountWithTimestamp(name string, value int64, tags []string, rate float64, timestamp time.Time, parameters ...Parameter) error { + if c == nil { + return ErrNoClient + } + + if timestamp.IsZero() || timestamp.Unix() <= noTimestamp { + return InvalidTimestamp + } + + atomic.AddUint64(&c.telemetry.totalMetricsCount, 1) + cardinality := parameterCardinality(parameters, c.defaultCardinality) + return c.send(metric{metricType: count, name: name, ivalue: value, tags: tags, rate: rate, globalTags: c.tags, namespace: c.namespace, timestamp: timestamp.Unix(), originDetection: c.originDetection, cardinality: cardinality}) +} + +// Histogram tracks the statistical distribution of a set of values on each host. +func (c *ClientEx) Histogram(name string, value float64, tags []string, rate float64, parameters ...Parameter) error { + if c == nil { + return ErrNoClient + } + atomic.AddUint64(&c.telemetry.totalMetricsHistogram, 1) + cardinality := parameterCardinality(parameters, c.defaultCardinality) + if c.aggExtended != nil { + return c.sendToAggregator(histogram, name, value, tags, rate, c.aggExtended.histogram, cardinality) + } + return c.send(metric{metricType: histogram, name: name, fvalue: value, tags: tags, rate: rate, globalTags: c.tags, namespace: c.namespace, originDetection: c.originDetection, cardinality: cardinality}) +} + +// Distribution tracks the statistical distribution of a set of values across your infrastructure. +func (c *ClientEx) Distribution(name string, value float64, tags []string, rate float64, parameters ...Parameter) error { + if c == nil { + return ErrNoClient + } + atomic.AddUint64(&c.telemetry.totalMetricsDistribution, 1) + cardinality := parameterCardinality(parameters, c.defaultCardinality) + if c.aggExtended != nil { + return c.sendToAggregator(distribution, name, value, tags, rate, c.aggExtended.distribution, cardinality) + } + return c.send(metric{metricType: distribution, name: name, fvalue: value, tags: tags, rate: rate, globalTags: c.tags, namespace: c.namespace, originDetection: c.originDetection, cardinality: cardinality}) +} + +// Decr is just Count of -1 +func (c *ClientEx) Decr(name string, tags []string, rate float64, parameters ...Parameter) error { + return c.Count(name, -1, tags, rate, parameters...) +} + +// Incr is just Count of 1 +func (c *ClientEx) Incr(name string, tags []string, rate float64, parameters ...Parameter) error { + return c.Count(name, 1, tags, rate, parameters...) +} + +// Set counts the number of unique elements in a group. +func (c *ClientEx) Set(name string, value string, tags []string, rate float64, parameters ...Parameter) error { + if c == nil { + return ErrNoClient + } + atomic.AddUint64(&c.telemetry.totalMetricsSet, 1) + cardinality := parameterCardinality(parameters, c.defaultCardinality) + if c.agg != nil { + return c.agg.set(name, value, tags, cardinality) + } + return c.send(metric{metricType: set, name: name, svalue: value, tags: tags, rate: rate, globalTags: c.tags, namespace: c.namespace, originDetection: c.originDetection, cardinality: cardinality}) +} + +// Timing sends timing information, it is an alias for TimeInMilliseconds +func (c *ClientEx) Timing(name string, value time.Duration, tags []string, rate float64, parameters ...Parameter) error { + return c.TimeInMilliseconds(name, value.Seconds()*1000, tags, rate, parameters...) +} + +// TimeInMilliseconds sends timing information in milliseconds. +// It is flushed by statsd with percentiles, mean and other info (https://github.com/etsy/statsd/blob/master/docs/metric_types.md#timing) +func (c *ClientEx) TimeInMilliseconds(name string, value float64, tags []string, rate float64, parameters ...Parameter) error { + if c == nil { + return ErrNoClient + } + atomic.AddUint64(&c.telemetry.totalMetricsTiming, 1) + cardinality := parameterCardinality(parameters, c.defaultCardinality) + if c.aggExtended != nil { + return c.sendToAggregator(timing, name, value, tags, rate, c.aggExtended.timing, cardinality) + } + return c.send(metric{metricType: timing, name: name, fvalue: value, tags: tags, rate: rate, globalTags: c.tags, namespace: c.namespace, originDetection: c.originDetection, cardinality: cardinality}) +} + +// Event sends the provided Event. +func (c *ClientEx) Event(e *Event, parameters ...Parameter) error { + if c == nil { + return ErrNoClient + } + atomic.AddUint64(&c.telemetry.totalEvents, 1) + cardinality := parameterCardinality(parameters, c.defaultCardinality) + return c.send(metric{metricType: event, evalue: e, rate: 1, globalTags: c.tags, namespace: c.namespace, originDetection: c.originDetection, cardinality: cardinality}) +} + +// SimpleEvent sends an event with the provided title and text. +func (c *ClientEx) SimpleEvent(title, text string, parameters ...Parameter) error { + e := NewEvent(title, text) + return c.Event(e, parameters...) +} + +// ServiceCheck sends the provided ServiceCheck. +func (c *ClientEx) ServiceCheck(sc *ServiceCheck, parameters ...Parameter) error { + if c == nil { + return ErrNoClient + } + atomic.AddUint64(&c.telemetry.totalServiceChecks, 1) + cardinality := parameterCardinality(parameters, c.defaultCardinality) + return c.send(metric{metricType: serviceCheck, scvalue: sc, rate: 1, globalTags: c.tags, namespace: c.namespace, originDetection: c.originDetection, cardinality: cardinality}) +} + +// SimpleServiceCheck sends an serviceCheck with the provided name and status. +func (c *ClientEx) SimpleServiceCheck(name string, status ServiceCheckStatus, parameters ...Parameter) error { + sc := NewServiceCheck(name, status) + return c.ServiceCheck(sc, parameters...) +} + +// Close the client connection. +func (c *ClientEx) Close() error { + if c == nil { + return ErrNoClient + } + + // Acquire closer lock to ensure only one thread can close the stop channel + c.closerLock.Lock() + defer c.closerLock.Unlock() + + if c.isClosed { + return nil + } + + // Notify all other threads that they should stop + select { + case <-c.stop: + return nil + default: + } + close(c.stop) + + if c.workersMode == channelMode { + for _, w := range c.workers { + w.stopReceivingMetric() + } + } + + // flush the aggregator first + if c.agg != nil { + if c.aggExtended != nil && c.aggregatorMode == channelMode { + c.agg.stopReceivingMetric() + } + c.agg.stop() + } + + // Wait for the threads to stop + c.wg.Wait() + + c.Flush() + + c.isClosed = true + return c.sender.close() +} + +func (*ClientEx) private() { +} + +// isOriginDetectionEnabled returns whether origin detection is enabled. +// +// Disable origin detection only in one of the following cases: +// - DD_ORIGIN_DETECTION_ENABLED is explicitly set to false +// - o.originDetection is explicitly set to false, which is true by default +func isOriginDetectionEnabled(o *Options) bool { + if !o.originDetection { + return false + } + + envVarValue := os.Getenv(originDetectionEnabled) + if envVarValue == "" { + // DD_ORIGIN_DETECTION_ENABLED is not set + // default to true + return true + } + + enabled, err := strconv.ParseBool(envVarValue) + if err != nil { + // Error due to an unsupported DD_ORIGIN_DETECTION_ENABLED value + // default to true + return true + } + + return enabled +} + +// fillInContainerID returns whether the clients should fill the container field. +func fillInContainerID(o *Options) bool { + if o.containerID != "" { + return false + } + return isOriginDetectionEnabled(o) +} diff --git a/vendor/github.com/DataDog/datadog-go/v5/statsd/tag_cardinality.go b/vendor/github.com/DataDog/datadog-go/v5/statsd/tag_cardinality.go new file mode 100644 index 000000000..23d73d530 --- /dev/null +++ b/vendor/github.com/DataDog/datadog-go/v5/statsd/tag_cardinality.go @@ -0,0 +1,78 @@ +package statsd + +import ( + "os" + "strings" +) + +type Parameter interface{} + +type Cardinality int + +const ( + CardinalityNotSet Cardinality = iota + CardinalityNone + CardinalityLow + CardinalityOrchestrator + CardinalityHigh +) + +func (c Cardinality) isValid() bool { + return c >= CardinalityNotSet && c <= CardinalityHigh +} + +func (c Cardinality) String() string { + switch c { + case CardinalityNone: + return "none" + case CardinalityLow: + return "low" + case CardinalityOrchestrator: + return "orchestrator" + case CardinalityHigh: + return "high" + } + return "" +} + +// validateCardinality converts a string to Cardinality +func validateCardinality(card string) (Cardinality, bool) { + card = strings.ToLower(card) + switch card { + case "none": + return CardinalityNone, true + case "low": + return CardinalityLow, true + case "orchestrator": + return CardinalityOrchestrator, true + case "high": + return CardinalityHigh, true + default: + return CardinalityNotSet, false + } +} + +// envTagCardinality returns the tag cardinality value from the DD_CARDINALITY/DATADOG_CARDINALITY environment variable. +func envTagCardinality() (Cardinality, bool) { + // If the user has not provided a value, read the value from the DD_CARDINALITY environment variable. + if card, ok := validateCardinality(os.Getenv("DD_CARDINALITY")); ok { + return card, true + } + + // If DD_CARDINALITY is not set or valid, read the value from the DATADOG_CARDINALITY environment variable. + if card, ok := validateCardinality(os.Getenv("DATADOG_CARDINALITY")); ok { + return card, true + } + + return CardinalityNotSet, false +} + +func parameterCardinality(parameters []Parameter, defaultCardinality Cardinality) Cardinality { + for _, o := range parameters { + c, ok := o.(Cardinality) + if ok && c.isValid() { + return c + } + } + return defaultCardinality +} diff --git a/vendor/github.com/DataDog/datadog-go/v5/statsd/telemetry.go b/vendor/github.com/DataDog/datadog-go/v5/statsd/telemetry.go new file mode 100644 index 000000000..bfec2d72d --- /dev/null +++ b/vendor/github.com/DataDog/datadog-go/v5/statsd/telemetry.go @@ -0,0 +1,307 @@ +package statsd + +import ( + "fmt" + "sync" + "time" +) + +/* +telemetryInterval is the interval at which telemetry will be sent by the client. +*/ +const telemetryInterval = 10 * time.Second + +/* +clientTelemetryTag is a tag identifying this specific client. +*/ +var clientTelemetryTag = "client:go" + +/* +clientVersionTelemetryTag is a tag identifying this specific client version. +*/ +var clientVersionTelemetryTag = "client_version:5.8.3" + +// Telemetry represents internal metrics about the client behavior since it started. +type Telemetry struct { + // + // Those are produced by the 'Client' + // + + // TotalMetrics is the total number of metrics sent by the client before aggregation and sampling. + TotalMetrics uint64 + // TotalMetricsGauge is the total number of gauges sent by the client before aggregation and sampling. + TotalMetricsGauge uint64 + // TotalMetricsCount is the total number of counts sent by the client before aggregation and sampling. + TotalMetricsCount uint64 + // TotalMetricsHistogram is the total number of histograms sent by the client before aggregation and sampling. + TotalMetricsHistogram uint64 + // TotalMetricsDistribution is the total number of distributions sent by the client before aggregation and + // sampling. + TotalMetricsDistribution uint64 + // TotalMetricsSet is the total number of sets sent by the client before aggregation and sampling. + TotalMetricsSet uint64 + // TotalMetricsTiming is the total number of timings sent by the client before aggregation and sampling. + TotalMetricsTiming uint64 + // TotalEvents is the total number of events sent by the client before aggregation and sampling. + TotalEvents uint64 + // TotalServiceChecks is the total number of service_checks sent by the client before aggregation and sampling. + TotalServiceChecks uint64 + + // TotalDroppedOnReceive is the total number metrics/event/service_checks dropped when using ChannelMode (see + // WithChannelMode option). + TotalDroppedOnReceive uint64 + + // + // Those are produced by the 'sender' + // + + // TotalPayloadsSent is the total number of payload (packet on the network) succesfully sent by the client. When + // using UDP we don't know if packet dropped or not, so all packet are considered as succesfully sent. + TotalPayloadsSent uint64 + // TotalPayloadsDropped is the total number of payload dropped by the client. This includes all cause of dropped + // (TotalPayloadsDroppedQueueFull and TotalPayloadsDroppedWriter). When using UDP This won't includes the + // network dropped. + TotalPayloadsDropped uint64 + // TotalPayloadsDroppedWriter is the total number of payload dropped by the writer (when using UDS or named + // pipe) due to network timeout or error. + TotalPayloadsDroppedWriter uint64 + // TotalPayloadsDroppedQueueFull is the total number of payload dropped internally because the queue of payloads + // waiting to be sent on the wire is full. This means the client is generating more metrics than can be sent on + // the wire. If your app sends metrics in batch look at WithSenderQueueSize option to increase the queue size. + TotalPayloadsDroppedQueueFull uint64 + + // TotalBytesSent is the total number of bytes succesfully sent by the client. When using UDP we don't know if + // packet dropped or not, so all packet are considered as succesfully sent. + TotalBytesSent uint64 + // TotalBytesDropped is the total number of bytes dropped by the client. This includes all cause of dropped + // (TotalBytesDroppedQueueFull and TotalBytesDroppedWriter). When using UDP This + // won't includes the network dropped. + TotalBytesDropped uint64 + // TotalBytesDroppedWriter is the total number of bytes dropped by the writer (when using UDS or named pipe) due + // to network timeout or error. + TotalBytesDroppedWriter uint64 + // TotalBytesDroppedQueueFull is the total number of bytes dropped internally because the queue of payloads + // waiting to be sent on the wire is full. This means the client is generating more metrics than can be sent on + // the wire. If your app sends metrics in batch look at WithSenderQueueSize option to increase the queue size. + TotalBytesDroppedQueueFull uint64 + + // + // Those are produced by the 'aggregator' + // + + // AggregationNbContext is the total number of contexts flushed by the aggregator when either + // WithClientSideAggregation or WithExtendedClientSideAggregation options are enabled. + AggregationNbContext uint64 + // AggregationNbContextGauge is the total number of contexts for gauges flushed by the aggregator when either + // WithClientSideAggregation or WithExtendedClientSideAggregation options are enabled. + AggregationNbContextGauge uint64 + // AggregationNbContextCount is the total number of contexts for counts flushed by the aggregator when either + // WithClientSideAggregation or WithExtendedClientSideAggregation options are enabled. + AggregationNbContextCount uint64 + // AggregationNbContextSet is the total number of contexts for sets flushed by the aggregator when either + // WithClientSideAggregation or WithExtendedClientSideAggregation options are enabled. + AggregationNbContextSet uint64 + // AggregationNbContextHistogram is the total number of contexts for histograms flushed by the aggregator when either + // WithClientSideAggregation or WithExtendedClientSideAggregation options are enabled. + AggregationNbContextHistogram uint64 + // AggregationNbContextDistribution is the total number of contexts for distributions flushed by the aggregator when either + // WithClientSideAggregation or WithExtendedClientSideAggregation options are enabled. + AggregationNbContextDistribution uint64 + // AggregationNbContextTiming is the total number of contexts for timings flushed by the aggregator when either + // WithClientSideAggregation or WithExtendedClientSideAggregation options are enabled. + AggregationNbContextTiming uint64 +} + +type telemetryClient struct { + sync.RWMutex // used mostly to change the transport tag. + + c *ClientEx + aggEnabled bool // is aggregation enabled and should we sent aggregation telemetry. + transport string + tags []string + tagsByType map[metricType][]string + transportTagKnown bool + sender *sender + worker *worker + lastSample Telemetry // The previous sample of telemetry sent +} + +func newTelemetryClient(c *ClientEx, aggregationEnabled bool) *telemetryClient { + t := &telemetryClient{ + c: c, + aggEnabled: aggregationEnabled, + tags: []string{}, + tagsByType: map[metricType][]string{}, + } + + t.setTags() + return t +} + +func newTelemetryClientWithCustomAddr(c *ClientEx, telemetryAddr string, aggregationEnabled bool, pool *bufferPool, + writeTimeout time.Duration, connectTimeout time.Duration, +) (*telemetryClient, error) { + telemetryAddr = resolveAddr(telemetryAddr) + telemetryWriter, _, err := createWriter(telemetryAddr, writeTimeout, connectTimeout) + if err != nil { + return nil, fmt.Errorf("Could not resolve telemetry address: %v", err) + } + + t := newTelemetryClient(c, aggregationEnabled) + + // Creating a custom sender/worker with 1 worker in mutex mode for the + // telemetry that share the same bufferPool. + // FIXME due to performance pitfall, we're always using UDP defaults + // even for UDS. + t.sender = newSender(telemetryWriter, DefaultUDPBufferPoolSize, pool, c.errorHandler) + t.worker = newWorker(pool, t.sender) + return t, nil +} + +func (t *telemetryClient) run(wg *sync.WaitGroup, stop chan struct{}) { + wg.Add(1) + go func() { + defer wg.Done() + ticker := time.NewTicker(telemetryInterval) + for { + select { + case <-ticker.C: + t.sendTelemetry() + case <-stop: + ticker.Stop() + if t.sender != nil { + t.sender.close() + } + return + } + } + }() +} + +func (t *telemetryClient) sendTelemetry() { + for _, m := range t.flush() { + if t.worker != nil { + t.worker.processMetric(m) + } else { + t.c.send(m) + } + } + + if t.worker != nil { + t.worker.flush() + } +} + +func (t *telemetryClient) getTelemetry() Telemetry { + if t == nil { + // telemetry was disabled through the WithoutTelemetry option + return Telemetry{} + } + + tlm := Telemetry{} + t.c.flushTelemetryMetrics(&tlm) + t.c.sender.flushTelemetryMetrics(&tlm) + t.c.agg.flushTelemetryMetrics(&tlm) + + tlm.TotalMetrics = tlm.TotalMetricsGauge + + tlm.TotalMetricsCount + + tlm.TotalMetricsSet + + tlm.TotalMetricsHistogram + + tlm.TotalMetricsDistribution + + tlm.TotalMetricsTiming + + tlm.TotalPayloadsDropped = tlm.TotalPayloadsDroppedQueueFull + tlm.TotalPayloadsDroppedWriter + tlm.TotalBytesDropped = tlm.TotalBytesDroppedQueueFull + tlm.TotalBytesDroppedWriter + + if t.aggEnabled { + tlm.AggregationNbContext = tlm.AggregationNbContextGauge + + tlm.AggregationNbContextCount + + tlm.AggregationNbContextSet + + tlm.AggregationNbContextHistogram + + tlm.AggregationNbContextDistribution + + tlm.AggregationNbContextTiming + } + return tlm +} + +// setTransportTag if it was never set and is now known. +func (t *telemetryClient) setTags() { + transport := t.c.GetTransport() + t.RLock() + // We need to refresh if we never set the tags or if the transport changed. + // For example when `unix://` is used we might return `uds` until we actually connect and detect that + // this is a UDS Stream socket and then return `uds-stream`. + needsRefresh := len(t.tags) == len(t.c.tags) || t.transport != transport + t.RUnlock() + + if !needsRefresh { + return + } + + t.Lock() + defer t.Unlock() + + t.transport = transport + t.tags = append(t.c.tags, clientTelemetryTag, clientVersionTelemetryTag) + if transport != "" { + t.tags = append(t.tags, "client_transport:"+transport) + } + t.tagsByType[gauge] = append(append([]string{}, t.tags...), "metrics_type:gauge") + t.tagsByType[count] = append(append([]string{}, t.tags...), "metrics_type:count") + t.tagsByType[set] = append(append([]string{}, t.tags...), "metrics_type:set") + t.tagsByType[timing] = append(append([]string{}, t.tags...), "metrics_type:timing") + t.tagsByType[histogram] = append(append([]string{}, t.tags...), "metrics_type:histogram") + t.tagsByType[distribution] = append(append([]string{}, t.tags...), "metrics_type:distribution") +} + +// flushTelemetry returns Telemetry metrics to be flushed. It's its own function to ease testing. +func (t *telemetryClient) flush() []metric { + m := []metric{} + + // same as Count but without global namespace + telemetryCount := func(name string, value int64, tags []string) { + m = append(m, metric{metricType: count, name: name, ivalue: value, tags: tags, rate: 1}) + } + + tlm := t.getTelemetry() + t.setTags() + + // We send the diff between now and the previous telemetry flush. This keep the same telemetry behavior from V4 + // so users dashboard's aren't broken when upgrading to V5. It also allow to graph on the same dashboard a mix + // of V4 and V5 apps. + telemetryCount("datadog.dogstatsd.client.metrics", int64(tlm.TotalMetrics-t.lastSample.TotalMetrics), t.tags) + telemetryCount("datadog.dogstatsd.client.metrics_by_type", int64(tlm.TotalMetricsGauge-t.lastSample.TotalMetricsGauge), t.tagsByType[gauge]) + telemetryCount("datadog.dogstatsd.client.metrics_by_type", int64(tlm.TotalMetricsCount-t.lastSample.TotalMetricsCount), t.tagsByType[count]) + telemetryCount("datadog.dogstatsd.client.metrics_by_type", int64(tlm.TotalMetricsHistogram-t.lastSample.TotalMetricsHistogram), t.tagsByType[histogram]) + telemetryCount("datadog.dogstatsd.client.metrics_by_type", int64(tlm.TotalMetricsDistribution-t.lastSample.TotalMetricsDistribution), t.tagsByType[distribution]) + telemetryCount("datadog.dogstatsd.client.metrics_by_type", int64(tlm.TotalMetricsSet-t.lastSample.TotalMetricsSet), t.tagsByType[set]) + telemetryCount("datadog.dogstatsd.client.metrics_by_type", int64(tlm.TotalMetricsTiming-t.lastSample.TotalMetricsTiming), t.tagsByType[timing]) + telemetryCount("datadog.dogstatsd.client.events", int64(tlm.TotalEvents-t.lastSample.TotalEvents), t.tags) + telemetryCount("datadog.dogstatsd.client.service_checks", int64(tlm.TotalServiceChecks-t.lastSample.TotalServiceChecks), t.tags) + + telemetryCount("datadog.dogstatsd.client.metric_dropped_on_receive", int64(tlm.TotalDroppedOnReceive-t.lastSample.TotalDroppedOnReceive), t.tags) + + telemetryCount("datadog.dogstatsd.client.packets_sent", int64(tlm.TotalPayloadsSent-t.lastSample.TotalPayloadsSent), t.tags) + telemetryCount("datadog.dogstatsd.client.packets_dropped", int64(tlm.TotalPayloadsDropped-t.lastSample.TotalPayloadsDropped), t.tags) + telemetryCount("datadog.dogstatsd.client.packets_dropped_queue", int64(tlm.TotalPayloadsDroppedQueueFull-t.lastSample.TotalPayloadsDroppedQueueFull), t.tags) + telemetryCount("datadog.dogstatsd.client.packets_dropped_writer", int64(tlm.TotalPayloadsDroppedWriter-t.lastSample.TotalPayloadsDroppedWriter), t.tags) + + telemetryCount("datadog.dogstatsd.client.bytes_dropped", int64(tlm.TotalBytesDropped-t.lastSample.TotalBytesDropped), t.tags) + telemetryCount("datadog.dogstatsd.client.bytes_sent", int64(tlm.TotalBytesSent-t.lastSample.TotalBytesSent), t.tags) + telemetryCount("datadog.dogstatsd.client.bytes_dropped_queue", int64(tlm.TotalBytesDroppedQueueFull-t.lastSample.TotalBytesDroppedQueueFull), t.tags) + telemetryCount("datadog.dogstatsd.client.bytes_dropped_writer", int64(tlm.TotalBytesDroppedWriter-t.lastSample.TotalBytesDroppedWriter), t.tags) + + if t.aggEnabled { + telemetryCount("datadog.dogstatsd.client.aggregated_context", int64(tlm.AggregationNbContext-t.lastSample.AggregationNbContext), t.tags) + telemetryCount("datadog.dogstatsd.client.aggregated_context_by_type", int64(tlm.AggregationNbContextGauge-t.lastSample.AggregationNbContextGauge), t.tagsByType[gauge]) + telemetryCount("datadog.dogstatsd.client.aggregated_context_by_type", int64(tlm.AggregationNbContextSet-t.lastSample.AggregationNbContextSet), t.tagsByType[set]) + telemetryCount("datadog.dogstatsd.client.aggregated_context_by_type", int64(tlm.AggregationNbContextCount-t.lastSample.AggregationNbContextCount), t.tagsByType[count]) + telemetryCount("datadog.dogstatsd.client.aggregated_context_by_type", int64(tlm.AggregationNbContextHistogram-t.lastSample.AggregationNbContextHistogram), t.tagsByType[histogram]) + telemetryCount("datadog.dogstatsd.client.aggregated_context_by_type", int64(tlm.AggregationNbContextDistribution-t.lastSample.AggregationNbContextDistribution), t.tagsByType[distribution]) + telemetryCount("datadog.dogstatsd.client.aggregated_context_by_type", int64(tlm.AggregationNbContextTiming-t.lastSample.AggregationNbContextTiming), t.tagsByType[timing]) + } + + t.lastSample = tlm + + return m +} diff --git a/vendor/github.com/DataDog/datadog-go/v5/statsd/udp.go b/vendor/github.com/DataDog/datadog-go/v5/statsd/udp.go new file mode 100644 index 000000000..b90f75279 --- /dev/null +++ b/vendor/github.com/DataDog/datadog-go/v5/statsd/udp.go @@ -0,0 +1,39 @@ +package statsd + +import ( + "net" + "time" +) + +// udpWriter is an internal class wrapping around management of UDP connection +type udpWriter struct { + conn net.Conn +} + +// New returns a pointer to a new udpWriter given an addr in the format "hostname:port". +func newUDPWriter(addr string, _ time.Duration) (*udpWriter, error) { + udpAddr, err := net.ResolveUDPAddr("udp", addr) + if err != nil { + return nil, err + } + conn, err := net.DialUDP("udp", nil, udpAddr) + if err != nil { + return nil, err + } + writer := &udpWriter{conn: conn} + return writer, nil +} + +// Write data to the UDP connection with no error handling +func (w *udpWriter) Write(data []byte) (int, error) { + return w.conn.Write(data) +} + +func (w *udpWriter) Close() error { + return w.conn.Close() +} + +// GetTransportName returns the transport used by the sender +func (w *udpWriter) GetTransportName() string { + return writerNameUDP +} diff --git a/vendor/github.com/DataDog/datadog-go/v5/statsd/uds.go b/vendor/github.com/DataDog/datadog-go/v5/statsd/uds.go new file mode 100644 index 000000000..ed26f3ea2 --- /dev/null +++ b/vendor/github.com/DataDog/datadog-go/v5/statsd/uds.go @@ -0,0 +1,190 @@ +//go:build !windows +// +build !windows + +package statsd + +import ( + "encoding/binary" + "net" + "strings" + "sync" + "time" +) + +// udsWriter is an internal class wrapping around management of UDS connection +type udsWriter struct { + // Address to send metrics to, needed to allow reconnection on error + addr string + // Transport used + transport string + // Established connection object, or nil if not connected yet + conn net.Conn + // write timeout + writeTimeout time.Duration + // connect timeout + connectTimeout time.Duration + sync.RWMutex // used to lock conn / writer can replace it +} + +// newUDSWriter returns a pointer to a new udsWriter given a socket file path as addr. +func newUDSWriter(addr string, writeTimeout time.Duration, connectTimeout time.Duration, transport string) (*udsWriter, error) { + // Defer connection to first Write + writer := &udsWriter{addr: addr, transport: transport, conn: nil, writeTimeout: writeTimeout, connectTimeout: connectTimeout} + return writer, nil +} + +// GetTransportName returns the transport used by the writer +func (w *udsWriter) GetTransportName() string { + w.RLock() + defer w.RUnlock() + + if w.transport == "unix" { + return writerNameUDSStream + } else { + return writerNameUDS + } +} + +func (w *udsWriter) shouldCloseConnection(err error, partialWrite bool) bool { + if err != nil && partialWrite { + // We can't recover from a partial write + return true + } + if err, isNetworkErr := err.(net.Error); err != nil && (!isNetworkErr || !err.Timeout()) { + // Statsd server disconnected, retry connecting at next packet + return true + } + return false +} + +// Write data to the UDS connection with write timeout and minimal error handling: +// create the connection if nil, and destroy it if the statsd server has disconnected +func (w *udsWriter) Write(data []byte) (int, error) { + var n int + partialWrite := false + conn, err := w.ensureConnection() + if err != nil { + return 0, err + } + stream := conn.LocalAddr().Network() == "unix" + + // When using streams the deadline will only make us drop the packet if we can't write it at all, + // once we've started writing we need to finish. + conn.SetWriteDeadline(time.Now().Add(w.writeTimeout)) + + // When using streams, we append the length of the packet to the data + if stream { + bs := []byte{0, 0, 0, 0} + binary.LittleEndian.PutUint32(bs, uint32(len(data))) + _, err = w.conn.Write(bs) + + partialWrite = true + + // W need to be able to finish to write partially written packets once we have started. + // But we will reset the connection if we can't write anything at all for a long time. + w.conn.SetWriteDeadline(time.Now().Add(w.connectTimeout)) + + // Continue writing only if we've written the length of the packet + if err == nil { + n, err = w.conn.Write(data) + if err == nil { + partialWrite = false + } + } + } else { + n, err = w.conn.Write(data) + } + + if w.shouldCloseConnection(err, partialWrite) { + w.unsetConnection() + } + return n, err +} + +func (w *udsWriter) Close() error { + if w.conn != nil { + return w.conn.Close() + } + return nil +} + +func (w *udsWriter) tryToDial(network string) (net.Conn, error) { + udsAddr, err := net.ResolveUnixAddr(network, w.addr) + if err != nil { + return nil, err + } + + // Try to gracefully reconnect to the socket when we encounter "connection refused", as it's likely that the Agent + // is restarting and the socket is not yet available. + connectAttemptsLeft := 3 + connectDeadline := time.Now().Add(w.connectTimeout) + + // Calculate the backoff time for connection refused errors, but don't exceed one second: this means we won't waste + // longer than 1 seconds worth of time if the socket becomes available immediately after our last connect attempt + connRefusedBackoff := w.connectTimeout / time.Duration(connectAttemptsLeft+1) + if connRefusedBackoff > time.Second { + connRefusedBackoff = time.Second + } + + for { + connectAttemptsLeft-- + + perCallTimeout := time.Until(connectDeadline) + newConn, err := net.DialTimeout(udsAddr.Network(), udsAddr.String(), perCallTimeout) + if err != nil { + if strings.HasSuffix(err.Error(), "connection refused") && connectAttemptsLeft > 0 { + // If we get a connection refused error, we need to wait a bit before trying again. + time.Sleep(connRefusedBackoff) + continue + } + return nil, err + } + return newConn, nil + } +} + +func (w *udsWriter) ensureConnection() (net.Conn, error) { + // Check if we've already got a socket we can use + w.RLock() + currentConn := w.conn + w.RUnlock() + + if currentConn != nil { + return currentConn, nil + } + + // Looks like we might need to connect - try again with write locking. + w.Lock() + defer w.Unlock() + if w.conn != nil { + return w.conn, nil + } + + var newConn net.Conn + var err error + + // Try to guess the transport if not specified. + if w.transport == "" { + newConn, err = w.tryToDial("unixgram") + // try to connect with unixgram failed, try again with unix streams. + if err != nil && strings.Contains(err.Error(), "protocol wrong type for socket") { + newConn, err = w.tryToDial("unix") + } + } else { + newConn, err = w.tryToDial(w.transport) + } + + if err != nil { + return nil, err + } + w.conn = newConn + w.transport = newConn.RemoteAddr().Network() + return newConn, nil +} + +func (w *udsWriter) unsetConnection() { + w.Lock() + defer w.Unlock() + _ = w.conn.Close() + w.conn = nil +} diff --git a/vendor/github.com/DataDog/datadog-go/v5/statsd/uds_windows.go b/vendor/github.com/DataDog/datadog-go/v5/statsd/uds_windows.go new file mode 100644 index 000000000..909f5a0a0 --- /dev/null +++ b/vendor/github.com/DataDog/datadog-go/v5/statsd/uds_windows.go @@ -0,0 +1,15 @@ +//go:build windows +// +build windows + +package statsd + +import ( + "fmt" + "time" +) + +// newUDSWriter is disabled on Windows, SOCK_DGRAM are still unavailable but +// SOCK_STREAM should work once implemented in the agent (https://devblogs.microsoft.com/commandline/af_unix-comes-to-windows/) +func newUDSWriter(_ string, _ time.Duration, _ time.Duration, _ string) (Transport, error) { + return nil, fmt.Errorf("Unix socket is not available on Windows") +} diff --git a/vendor/github.com/DataDog/datadog-go/v5/statsd/utils.go b/vendor/github.com/DataDog/datadog-go/v5/statsd/utils.go new file mode 100644 index 000000000..8c3ac8426 --- /dev/null +++ b/vendor/github.com/DataDog/datadog-go/v5/statsd/utils.go @@ -0,0 +1,32 @@ +package statsd + +import ( + "math/rand" + "sync" +) + +func shouldSample(rate float64, r *rand.Rand, lock *sync.Mutex) bool { + if rate >= 1 { + return true + } + // sources created by rand.NewSource() (ie. w.random) are not thread safe. + // TODO: use defer once the lowest Go version we support is 1.14 (defer + // has an overhead before that). + lock.Lock() + if r.Float64() > rate { + lock.Unlock() + return false + } + lock.Unlock() + return true +} + +func copySlice(src []string) []string { + if src == nil { + return nil + } + + c := make([]string, len(src)) + copy(c, src) + return c +} diff --git a/vendor/github.com/DataDog/datadog-go/v5/statsd/worker.go b/vendor/github.com/DataDog/datadog-go/v5/statsd/worker.go new file mode 100644 index 000000000..056282627 --- /dev/null +++ b/vendor/github.com/DataDog/datadog-go/v5/statsd/worker.go @@ -0,0 +1,158 @@ +package statsd + +import ( + "math/rand" + "sync" + "time" +) + +type worker struct { + pool *bufferPool + buffer *statsdBuffer + sender *sender + random *rand.Rand + randomLock sync.Mutex + sync.Mutex + + inputMetrics chan metric + stop chan struct{} +} + +func newWorker(pool *bufferPool, sender *sender) *worker { + // Each worker uses its own random source and random lock to prevent + // workers in separate goroutines from contending for the lock on the + // "math/rand" package-global random source (e.g. calls like + // "rand.Float64()" must acquire a shared lock to get the next + // pseudorandom number). + // Note that calling "time.Now().UnixNano()" repeatedly quickly may return + // very similar values. That's fine for seeding the worker-specific random + // source because we just need an evenly distributed stream of float values. + // Do not use this random source for cryptographic randomness. + random := rand.New(rand.NewSource(time.Now().UnixNano())) + return &worker{ + pool: pool, + sender: sender, + buffer: pool.borrowBuffer(), + random: random, + stop: make(chan struct{}), + } +} + +func (w *worker) startReceivingMetric(bufferSize int) { + w.inputMetrics = make(chan metric, bufferSize) + go w.pullMetric() +} + +func (w *worker) stopReceivingMetric() { + w.stop <- struct{}{} +} + +func (w *worker) pullMetric() { + for { + select { + case m := <-w.inputMetrics: + w.processMetric(m) + case <-w.stop: + return + } + } +} + +func (w *worker) processMetric(m metric) error { + // Aggregated metrics are already sampled. + if m.metricType != distributionAggregated && m.metricType != histogramAggregated && m.metricType != timingAggregated { + if !shouldSample(m.rate, w.random, &w.randomLock) { + return nil + } + } + w.Lock() + var err error + if err = w.writeMetricUnsafe(m); err == errBufferFull { + w.flushUnsafe() + err = w.writeMetricUnsafe(m) + } + w.Unlock() + return err +} + +func (w *worker) writeAggregatedMetricUnsafe(m metric, metricSymbol []byte, precision int, rate float64) error { + globalPos := 0 + + // first check how much data we can write to the buffer: + // +3 + len(metricSymbol) because the message will include '||#' before the tags + // +1 for the potential line break at the start of the metric + extraSize := len(m.stags) + 4 + len(metricSymbol) + if m.rate < 1 { + // +2 for "|@" + // + the maximum size of a rate (https://en.wikipedia.org/wiki/IEEE_754-1985) + extraSize += 2 + 18 + } + for _, t := range m.globalTags { + extraSize += len(t) + 1 + } + + for { + pos, err := w.buffer.writeAggregated(metricSymbol, m.namespace, m.globalTags, m.name, m.fvalues[globalPos:], m.stags, extraSize, precision, rate, m.originDetection, m.cardinality) + if err == errPartialWrite { + // We successfully wrote part of the histogram metrics. + // We flush the current buffer and finish the histogram + // in a new one. + w.flushUnsafe() + globalPos += pos + } else { + return err + } + } +} + +func (w *worker) writeMetricUnsafe(m metric) error { + switch m.metricType { + case gauge: + return w.buffer.writeGauge(m.namespace, m.globalTags, m.name, m.fvalue, m.tags, m.rate, m.timestamp, m.originDetection, m.cardinality) + case count: + return w.buffer.writeCount(m.namespace, m.globalTags, m.name, m.ivalue, m.tags, m.rate, m.timestamp, m.originDetection, m.cardinality) + case histogram: + return w.buffer.writeHistogram(m.namespace, m.globalTags, m.name, m.fvalue, m.tags, m.rate, m.originDetection, m.cardinality) + case distribution: + return w.buffer.writeDistribution(m.namespace, m.globalTags, m.name, m.fvalue, m.tags, m.rate, m.originDetection, m.cardinality) + case set: + return w.buffer.writeSet(m.namespace, m.globalTags, m.name, m.svalue, m.tags, m.rate, m.originDetection, m.cardinality) + case timing: + return w.buffer.writeTiming(m.namespace, m.globalTags, m.name, m.fvalue, m.tags, m.rate, m.originDetection, m.cardinality) + case event: + return w.buffer.writeEvent(m.evalue, m.globalTags, m.originDetection, m.cardinality) + case serviceCheck: + return w.buffer.writeServiceCheck(m.scvalue, m.globalTags, m.originDetection, m.cardinality) + case histogramAggregated: + return w.writeAggregatedMetricUnsafe(m, histogramSymbol, -1, m.rate) + case distributionAggregated: + return w.writeAggregatedMetricUnsafe(m, distributionSymbol, -1, m.rate) + case timingAggregated: + return w.writeAggregatedMetricUnsafe(m, timingSymbol, 6, m.rate) + default: + return nil + } +} + +func (w *worker) flush() { + w.Lock() + w.flushUnsafe() + w.Unlock() +} + +func (w *worker) pause() { + w.Lock() +} + +func (w *worker) unpause() { + w.Unlock() +} + +// flush the current buffer. Lock must be held by caller. +// flushed buffer written to the network asynchronously. +func (w *worker) flushUnsafe() { + if len(w.buffer.bytes()) > 0 { + w.sender.send(w.buffer) + w.buffer = w.pool.borrowBuffer() + } +} diff --git a/vendor/modules.txt b/vendor/modules.txt index c13e36ca0..62ac1e341 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -9,6 +9,9 @@ filippo.io/edwards25519/field ## explicit; go 1.16 github.com/Azure/go-ansiterm github.com/Azure/go-ansiterm/winterm +# github.com/DataDog/datadog-go/v5 v5.8.3 +## explicit; go 1.13 +github.com/DataDog/datadog-go/v5/statsd # github.com/Masterminds/semver v1.5.0 ## explicit github.com/Masterminds/semver From 59f400f528de2dfc51ec08254e04e1995b5b9d85 Mon Sep 17 00:00:00 2001 From: Patrick Begley <210335+forge33@users.noreply.github.com> Date: Wed, 27 May 2026 12:51:23 -0400 Subject: [PATCH 3/7] Add Go runtime metrics to statsd reporting (#1690) * Add Datadog/statsd with simple client emitting startup * Add go runtime metrics to statsd reporting --------- Co-authored-by: meiji163 --- go/cmd/gh-ost/main.go | 5 +++ go/metrics/go_runtime.go | 61 +++++++++++++++++++++++++++++++ go/metrics/go_runtime_test.go | 67 +++++++++++++++++++++++++++++++++++ 3 files changed, 133 insertions(+) create mode 100644 go/metrics/go_runtime.go create mode 100644 go/metrics/go_runtime_test.go diff --git a/go/cmd/gh-ost/main.go b/go/cmd/gh-ost/main.go index f30c439a4..d77046231 100644 --- a/go/cmd/gh-ost/main.go +++ b/go/cmd/gh-ost/main.go @@ -13,6 +13,7 @@ import ( "os/signal" "regexp" "syscall" + "time" "github.com/github/gh-ost/go/base" "github.com/github/gh-ost/go/logic" @@ -174,6 +175,7 @@ func main() { statsdAddr := flag.String("statsd-addr", "", "StatsD endpoint (host:port or unix socket); empty disables StatsD") var statsdTags statsdTagList flag.Var(&statsdTags, "statsd-tags", "global StatsD tags applied to every metric (repeatable), format key:value. Example: --statsd-tags 'env:prod,service:my-service'") + runtimeMetricsInterval := flag.Int("runtime-metrics-interval", 10, "Seconds between Go runtime memory/GC gauge samples (requires --statsd-addr); 0 disables") quiet := flag.Bool("quiet", false, "quiet") verbose := flag.Bool("verbose", false, "verbose") debug := flag.Bool("debug", false, "debug mode (very verbose)") @@ -400,6 +402,9 @@ func main() { defer func() { _ = metricsClient.Close() }() migrationContext.Metrics = metricsClient metricsClient.Count("startup", 1) + if *runtimeMetricsInterval > 0 { + metrics.StartGoRuntimeReporter(migrationContext.GetContext(), metricsClient, time.Duration(*runtimeMetricsInterval)*time.Second) + } migrator := logic.NewMigrator(migrationContext, AppVersion) var err error diff --git a/go/metrics/go_runtime.go b/go/metrics/go_runtime.go new file mode 100644 index 000000000..24ae2c6b5 --- /dev/null +++ b/go/metrics/go_runtime.go @@ -0,0 +1,61 @@ +/* + Copyright 2022 GitHub Inc. + See https://github.com/github/gh-ost/blob/master/LICENSE +*/ + +package metrics + +import ( + "context" + "runtime" + "time" +) + +// MemStatsGaugeEmitter is implemented by *Client; used for tests without UDP. +type MemStatsGaugeEmitter interface { + Gauge(name string, value float64, tags ...string) +} + +// EmitGoRuntimeGauges emits gh_ost.go_runtime.* gauges (namespace is applied by the client). +// m and numGoroutine are typically from runtime.ReadMemStats and runtime.NumGoroutine. +func EmitGoRuntimeGauges(emit MemStatsGaugeEmitter, m *runtime.MemStats, numGoroutine int) { + if emit == nil || m == nil { + return + } + emit.Gauge("go_runtime.alloc_bytes", float64(m.Alloc)) + emit.Gauge("go_runtime.sys_bytes", float64(m.Sys)) + emit.Gauge("go_runtime.heap_inuse_bytes", float64(m.HeapInuse)) + emit.Gauge("go_runtime.num_gc", float64(m.NumGC)) + emit.Gauge("go_runtime.gc_pause_total_ns", float64(m.PauseTotalNs)) + emit.Gauge("go_runtime.goroutines", float64(numGoroutine)) +} + +// StartGoRuntimeReporter periodically samples runtime memory and goroutines and emits gauges +// until ctx is cancelled. It is a no-op when interval <= 0, client is nil, or StatsD is disabled +// (noop client). +func StartGoRuntimeReporter(ctx context.Context, client *Client, interval time.Duration) { + if ctx == nil || client == nil || interval <= 0 || client.sd == nil { + return + } + + emit := func() { + var m runtime.MemStats + runtime.ReadMemStats(&m) + EmitGoRuntimeGauges(client, &m, runtime.NumGoroutine()) + } + + go func() { + ticker := time.NewTicker(interval) + defer ticker.Stop() + + emit() + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + emit() + } + } + }() +} diff --git a/go/metrics/go_runtime_test.go b/go/metrics/go_runtime_test.go new file mode 100644 index 000000000..24811206b --- /dev/null +++ b/go/metrics/go_runtime_test.go @@ -0,0 +1,67 @@ +/* + Copyright 2022 GitHub Inc. + See https://github.com/github/gh-ost/blob/master/LICENSE +*/ + +package metrics + +import ( + "context" + "runtime" + "testing" + "time" +) + +type gaugeSpy struct { + names []string + values []float64 +} + +func (g *gaugeSpy) Gauge(name string, value float64, _ ...string) { + g.names = append(g.names, name) + g.values = append(g.values, value) +} + +func TestEmitGoRuntimeGauges(t *testing.T) { + spy := &gaugeSpy{} + m := &runtime.MemStats{ + Alloc: 100, + Sys: 200, + HeapInuse: 300, + NumGC: 7, + PauseTotalNs: 42, + } + EmitGoRuntimeGauges(spy, m, 123) + + wantNames := []string{ + "go_runtime.alloc_bytes", + "go_runtime.sys_bytes", + "go_runtime.heap_inuse_bytes", + "go_runtime.num_gc", + "go_runtime.gc_pause_total_ns", + "go_runtime.goroutines", + } + wantVals := []float64{100, 200, 300, 7, 42, 123} + + if len(spy.names) != len(wantNames) { + t.Fatalf("got %d gauges, want %d", len(spy.names), len(wantNames)) + } + for i := range wantNames { + if spy.names[i] != wantNames[i] || spy.values[i] != wantVals[i] { + t.Fatalf("[%d] got %s=%v want %s=%v", i, spy.names[i], spy.values[i], wantNames[i], wantVals[i]) + } + } +} + +func TestEmitGoRuntimeGauges_nilSafe(t *testing.T) { + EmitGoRuntimeGauges(nil, &runtime.MemStats{}, 1) + EmitGoRuntimeGauges(&gaugeSpy{}, nil, 1) +} + +func TestStartGoRuntimeReporter_stopsOnCancel(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + c := &Client{} // sd nil — should not start + StartGoRuntimeReporter(ctx, c, time.Millisecond) + cancel() + time.Sleep(20 * time.Millisecond) +} From c6363475592231e93338f0f7b66290e0a1dab218 Mon Sep 17 00:00:00 2001 From: Jakub Pliszka <56041743+jakubpliszka@users.noreply.github.com> Date: Thu, 28 May 2026 19:10:47 +0100 Subject: [PATCH 4/7] Add GET_LOCK to prevent concurrent migrations of the same table (#1693) --- go/logic/applier.go | 152 ++++++++++++++++++++++++++++++++++++++ go/logic/applier_test.go | 77 +++++++++++++++++++ go/logic/migrator.go | 3 + go/logic/migrator_test.go | 2 + 4 files changed, 234 insertions(+) diff --git a/go/logic/applier.go b/go/logic/applier.go index b49e131b8..f3474b3ef 100644 --- a/go/logic/applier.go +++ b/go/logic/applier.go @@ -6,7 +6,9 @@ package logic import ( + "crypto/sha1" gosql "database/sql" + "encoding/hex" "fmt" "reflect" "regexp" @@ -83,6 +85,12 @@ type Applier struct { dmlInsertQueryBuilder *sql.DMLInsertQueryBuilder dmlUpdateQueryBuilder *sql.DMLUpdateQueryBuilder checkpointInsertQueryBuilder *sql.CheckpointInsertQueryBuilder + + migrationLockConn *gosql.Conn + migrationLockDB *gosql.DB + migrationLockName string + migrationLockStop chan struct{} + migrationLockDone chan struct{} } func NewApplier(migrationContext *base.MigrationContext) *Applier { @@ -145,6 +153,149 @@ func (apl *Applier) InitDBConnections() (err error) { return nil } +// buildMigrationLockName returns a deterministic MySQL user-level lock name +// for the given database and table, hashed if longer than MySQL's 64-char limit. +func buildMigrationLockName(db, table string) string { + name := fmt.Sprintf("gh-ost::%s.%s", db, table) + if len(name) <= 64 { + return name + } + sum := sha1.Sum([]byte(name)) + return "gh-ost::" + hex.EncodeToString(sum[:]) +} + +// AcquireMigrationLock takes a user-level lock on a pinned connection, +// preventing two gh-ost processes from migrating the same table concurrently +// on the same MySQL server. +func (apl *Applier) AcquireMigrationLock(ctx context.Context) error { + lockName := buildMigrationLockName(apl.migrationContext.DatabaseName, apl.migrationContext.OriginalTableName) + + // Use a dedicated *sql.DB so the pinned connection does not consume a + // slot in apl.db's small pool (mysql.MaxDBPoolConnections). + lockURI := apl.connectionConfig.GetDBUri(apl.migrationContext.DatabaseName) + lockDB, err := gosql.Open("mysql", lockURI) + if err != nil { + return fmt.Errorf("failed to open migration lock DB: %w", err) + } + lockDB.SetMaxOpenConns(1) + lockDB.SetMaxIdleConns(1) + + conn, err := lockDB.Conn(ctx) + if err != nil { + lockDB.Close() + return fmt.Errorf("failed to obtain pinned connection for migration lock: %w", err) + } + + var lockResult gosql.NullInt64 + if err := conn.QueryRowContext(ctx, `select /* gh-ost */ get_lock(?, 0)`, lockName).Scan(&lockResult); err != nil { + conn.Close() + lockDB.Close() + return fmt.Errorf("failed to execute GET_LOCK for migration lock %s: %w", lockName, err) + } + + if !lockResult.Valid { + conn.Close() + lockDB.Close() + return fmt.Errorf("GET_LOCK returned NULL while acquiring migration lock %s", lockName) + } + + if lockResult.Int64 != 1 { + var holderID gosql.NullInt64 + _ = conn.QueryRowContext(ctx, `select /* gh-ost */ is_used_lock(?)`, lockName).Scan(&holderID) + conn.Close() + lockDB.Close() + if holderID.Valid { + return fmt.Errorf("another gh-ost process is already migrating `%s`.`%s`: migration lock %s held by connection id %d", + apl.migrationContext.DatabaseName, apl.migrationContext.OriginalTableName, lockName, holderID.Int64) + } + return fmt.Errorf("another gh-ost process is already migrating `%s`.`%s`: migration lock %s is held", + apl.migrationContext.DatabaseName, apl.migrationContext.OriginalTableName, lockName) + } + + apl.migrationLockConn = conn + apl.migrationLockDB = lockDB + apl.migrationLockName = lockName + apl.migrationLockStop = make(chan struct{}) + apl.migrationLockDone = make(chan struct{}) + go apl.keepMigrationLockAlive(ctx) + apl.migrationContext.Log.Infof("Acquired migration lock %s", lockName) + return nil +} + +// keepMigrationLockAlive pings the pinned migration-lock connection. If the +// ping fails the lock is considered lost and the migration is aborted via +// PanicAbort. +func (apl *Applier) keepMigrationLockAlive(ctx context.Context) { + defer close(apl.migrationLockDone) + ticker := time.NewTicker(30 * time.Second) + defer ticker.Stop() + for { + select { + case <-ctx.Done(): + return + case <-apl.migrationLockStop: + return + case <-ticker.C: + } + if err := apl.pingMigrationLockConn(ctx); err != nil { + // Shutdown may have started mid-ping; don't abort if so. + select { + case <-apl.migrationLockStop: + return + default: + } + if ctx.Err() != nil { + return + } + _ = base.SendWithContext(ctx, apl.migrationContext.PanicAbort, + fmt.Errorf("migration lock %s connection lost: %w", apl.migrationLockName, err)) + return + } + } +} + +// pingMigrationLockConn pings the pinned connection with a bounded timeout +// and propagates migrationLockStop as an early cancel so a teardown can +// interrupt a stuck ping. +func (apl *Applier) pingMigrationLockConn(parent context.Context) error { + pingCtx, cancel := context.WithTimeout(parent, 10*time.Second) + defer cancel() + done := make(chan struct{}) + defer close(done) + go func() { + select { + case <-apl.migrationLockStop: + cancel() + case <-done: + } + }() + return apl.migrationLockConn.PingContext(pingCtx) +} + +// releaseMigrationLock stops the keepalive goroutine, releases the user-level +// lock and closes the dedicated lock DB. Safe to call when no lock is held. +func (apl *Applier) releaseMigrationLock() { + if apl.migrationLockConn == nil { + return + } + // Stop keepalive before touching the pinned connection. + close(apl.migrationLockStop) + <-apl.migrationLockDone + releaseCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + if _, err := apl.migrationLockConn.ExecContext(releaseCtx, `select /* gh-ost */ release_lock(?)`, apl.migrationLockName); err != nil { + apl.migrationContext.Log.Warningf("failed to release migration lock %s: %v", apl.migrationLockName, err) + } + if err := apl.migrationLockConn.Close(); err != nil { + apl.migrationContext.Log.Warningf("failed to close migration lock connection: %v", err) + } + if apl.migrationLockDB != nil { + apl.migrationLockDB.Close() + apl.migrationLockDB = nil + } + apl.migrationLockConn = nil +} + func (apl *Applier) prepareQueries() (err error) { if apl.dmlDeleteQueryBuilder, err = sql.NewDMLDeleteQueryBuilder( apl.migrationContext.DatabaseName, @@ -1734,6 +1885,7 @@ func (apl *Applier) ApplyDMLEventQueries(dmlEvents [](*binlog.BinlogDMLEvent)) e func (apl *Applier) Teardown() { apl.migrationContext.Log.Debugf("Tearing down...") + apl.releaseMigrationLock() apl.db.Close() apl.singletonDB.Close() atomic.StoreInt64(&apl.finishedMigrating, 1) diff --git a/go/logic/applier_test.go b/go/logic/applier_test.go index 6d7ba42f4..85a5a01d3 100644 --- a/go/logic/applier_test.go +++ b/go/logic/applier_test.go @@ -507,6 +507,83 @@ func (suite *ApplierTestSuite) TestValidateOrDropExistingTablesWithGhostTableExi suite.Require().Equal(gosql.ErrNoRows, err) } +func (suite *ApplierTestSuite) TestAcquireMigrationLockSucceedsWhenFree() { + ctx := context.Background() + + _, err := suite.db.ExecContext(ctx, fmt.Sprintf("CREATE TABLE %s (id INT, item_id INT);", getTestTableName())) + suite.Require().NoError(err) + + connectionConfig, err := getTestConnectionConfig(ctx, suite.mysqlContainer) + suite.Require().NoError(err) + + migrationContext := newTestMigrationContext() + migrationContext.ApplierConnectionConfig = connectionConfig + migrationContext.SetConnectionConfig("innodb") + + applier := NewApplier(migrationContext) + defer applier.Teardown() + + suite.Require().NoError(applier.InitDBConnections()) + suite.Require().NoError(applier.AcquireMigrationLock(ctx)) + suite.Require().NotNil(applier.migrationLockConn) + suite.Require().Equal(buildMigrationLockName(testMysqlDatabase, testMysqlTableName), applier.migrationLockName) +} + +func (suite *ApplierTestSuite) TestAcquireMigrationLockFailsWhenHeld() { + ctx := context.Background() + + _, err := suite.db.ExecContext(ctx, fmt.Sprintf("CREATE TABLE %s (id INT, item_id INT);", getTestTableName())) + suite.Require().NoError(err) + + connectionConfig, err := getTestConnectionConfig(ctx, suite.mysqlContainer) + suite.Require().NoError(err) + + migrationContextA := newTestMigrationContext() + migrationContextA.ApplierConnectionConfig = connectionConfig + migrationContextA.SetConnectionConfig("innodb") + + applierA := NewApplier(migrationContextA) + defer applierA.Teardown() + suite.Require().NoError(applierA.InitDBConnections()) + suite.Require().NoError(applierA.AcquireMigrationLock(ctx)) + + connectionConfigB, err := getTestConnectionConfig(ctx, suite.mysqlContainer) + suite.Require().NoError(err) + + migrationContextB := newTestMigrationContext() + migrationContextB.ApplierConnectionConfig = connectionConfigB + migrationContextB.SetConnectionConfig("innodb") + + applierB := NewApplier(migrationContextB) + defer applierB.Teardown() + suite.Require().NoError(applierB.InitDBConnections()) + + err = applierB.AcquireMigrationLock(ctx) + suite.Require().Error(err) + suite.Require().Contains(err.Error(), "already migrating") + suite.Require().Nil(applierB.migrationLockConn) +} + +func TestBuildMigrationLockName(t *testing.T) { + t.Run("short name is returned verbatim", func(t *testing.T) { + name := buildMigrationLockName("mydb", "mytable") + require.Equal(t, "gh-ost::mydb.mytable", name) + require.LessOrEqual(t, len(name), 64) + }) + + t.Run("long name is hashed and within MySQL limit", func(t *testing.T) { + longDB := strings.Repeat("d", 40) + longTable := strings.Repeat("t", 40) + name := buildMigrationLockName(longDB, longTable) + require.LessOrEqual(t, len(name), 64) + require.True(t, strings.HasPrefix(name, "gh-ost::")) + // deterministic + require.Equal(t, name, buildMigrationLockName(longDB, longTable)) + // distinct inputs produce distinct hashes + require.NotEqual(t, name, buildMigrationLockName(longDB, longTable+"x")) + }) +} + func (suite *ApplierTestSuite) TestCreateGhostTable() { ctx := context.Background() diff --git a/go/logic/migrator.go b/go/logic/migrator.go index 226cf13a7..bec13e594 100644 --- a/go/logic/migrator.go +++ b/go/logic/migrator.go @@ -1511,6 +1511,9 @@ func (mgtr *Migrator) initiateApplier() error { if err := mgtr.applier.InitDBConnections(); err != nil { return err } + if err := mgtr.applier.AcquireMigrationLock(mgtr.migrationContext.GetContext()); err != nil { + return err + } if mgtr.migrationContext.Revert { if err := mgtr.applier.CreateChangelogTable(); err != nil { mgtr.migrationContext.Log.Errorf("unable to create changelog table, see further error details. Perhaps a previous migration failed without dropping the table? OR is there a running migration? Bailing out") diff --git a/go/logic/migrator_test.go b/go/logic/migrator_test.go index 95278fc3d..fc8feada1 100644 --- a/go/logic/migrator_test.go +++ b/go/logic/migrator_test.go @@ -675,6 +675,7 @@ func (suite *MigratorTestSuite) TestCopierIntPK() { migrator := NewMigrator(migrationContext, "0.0.0") suite.Require().NoError(migrator.initiateApplier()) + defer migrator.applier.Teardown() suite.Require().NoError(migrator.applier.prepareQueries()) suite.Require().NoError(migrator.applier.ReadMigrationRangeValues()) @@ -746,6 +747,7 @@ func (suite *MigratorTestSuite) TestCopierCompositePK() { migrator := NewMigrator(migrationContext, "0.0.0") suite.Require().NoError(migrator.initiateApplier()) + defer migrator.applier.Teardown() suite.Require().NoError(migrator.applier.prepareQueries()) suite.Require().NoError(migrator.applier.ReadMigrationRangeValues()) From 251b08da9a6d33e1e246cf223dfe734ee27908e6 Mon Sep 17 00:00:00 2001 From: Eric Yan Date: Fri, 29 May 2026 18:56:59 +0200 Subject: [PATCH 5/7] Use os.TempDir() for test socket path (#1694) Refactor newTestMigrationContext to set ServeSocketFile via os.TempDir() and remove runtime.Caller-based path derivation. --- go/logic/test_utils.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/go/logic/test_utils.go b/go/logic/test_utils.go index f552cfc76..cdcfcee84 100644 --- a/go/logic/test_utils.go +++ b/go/logic/test_utils.go @@ -4,8 +4,8 @@ import ( "context" "fmt" + "os" "path/filepath" - "runtime" "github.com/github/gh-ost/go/base" "github.com/github/gh-ost/go/mysql" @@ -68,9 +68,7 @@ func newTestMigrationContext() *base.MigrationContext { migrationContext.PanicOnWarnings = true migrationContext.AllowedRunningOnMaster = true - //nolint:dogsled - _, filename, _, _ := runtime.Caller(0) - migrationContext.ServeSocketFile = filepath.Join(filepath.Dir(filename), "../../tmp/gh-ost.sock") + migrationContext.ServeSocketFile = filepath.Join(os.TempDir(), "gh-ost.sock") return migrationContext } From 835f5379afe7318d80d4f347016e3d81721327c7 Mon Sep 17 00:00:00 2001 From: Bastian Bartmann Date: Mon, 1 Jun 2026 19:03:21 +0200 Subject: [PATCH 6/7] Improve performance for tables with composite primary keys (#1686) --- go/sql/builder.go | 322 ++++++++++++++++++++++++++++++++- go/sql/builder_test.go | 400 ++++++++++++++++++++++++++++++++++++----- 2 files changed, 673 insertions(+), 49 deletions(-) diff --git a/go/sql/builder.go b/go/sql/builder.go index 6e41eb4e1..7d0864601 100644 --- a/go/sql/builder.go +++ b/go/sql/builder.go @@ -281,23 +281,34 @@ func BuildRangeInsertQuery(databaseName, originalTableName, ghostTableName strin sharedColumnsListing := strings.Join(sharedColumns, ", ") uniqueKey = EscapeName(uniqueKey) + transactionalClause := "" + if transactionalTable { + if noWait { + transactionalClause = "for share nowait" + } else { + transactionalClause = "lock in share mode" + } + } var minRangeComparisonSign = GreaterThanComparisonSign if includeRangeStartValues { minRangeComparisonSign = GreaterThanOrEqualsComparisonSign } + + if uniqueKeyColumns.Len() == 2 { + return buildRangeInsertQueryTwoColumn( + databaseName, originalTableName, ghostTableName, + sharedColumnsListing, mappedSharedColumnsListing, + uniqueKey, uniqueKeyColumns, + rangeStartValues, rangeEndValues, + rangeStartArgs, rangeEndArgs, + minRangeComparisonSign, transactionalClause, + ) + } rangeStartComparison, rangeExplodedArgs, err := BuildRangeComparison(uniqueKeyColumns.Names(), rangeStartValues, rangeStartArgs, minRangeComparisonSign) if err != nil { return "", explodedArgs, err } explodedArgs = append(explodedArgs, rangeExplodedArgs...) - transactionalClause := "" - if transactionalTable { - if noWait { - transactionalClause = "for share nowait" - } else { - transactionalClause = "lock in share mode" - } - } rangeEndComparison, rangeExplodedArgs, err := BuildRangeComparison(uniqueKeyColumns.Names(), rangeEndValues, rangeEndArgs, LessThanOrEqualsComparisonSign) if err != nil { return "", explodedArgs, err @@ -323,6 +334,91 @@ func BuildRangeInsertQuery(databaseName, originalTableName, ghostTableName strin return result, explodedArgs, nil } +func sameFirstColumnValue(rangeStartArgs, rangeEndArgs []interface{}) bool { + if len(rangeStartArgs) == 0 || len(rangeEndArgs) == 0 { + return false + } + return fmt.Sprintf("%v", rangeStartArgs[0]) == fmt.Sprintf("%v", rangeEndArgs[0]) +} + +func buildRangeInsertQueryTwoColumn( + databaseName, originalTableName, ghostTableName string, + sharedColumnsListing, mappedSharedColumnsListing string, + uniqueKey string, + uniqueKeyColumns *ColumnList, + rangeStartValues, rangeEndValues []string, + rangeStartArgs, rangeEndArgs []interface{}, + minRangeComparisonSign ValueComparisonSign, + transactionalClause string, +) (result string, explodedArgs []interface{}, err error) { + cols := uniqueKeyColumns.Columns() + + if len(cols) != len(rangeStartValues) { + return "", explodedArgs, fmt.Errorf("got %d columns but %d rangeStartValues in buildRangeInsertQueryTwoColumn", len(cols), len(rangeStartValues)) + } + if len(cols) != len(rangeEndValues) { + return "", explodedArgs, fmt.Errorf("got %d columns but %d rangeEndValues in buildRangeInsertQueryTwoColumn", len(cols), len(rangeEndValues)) + } + if len(cols) != len(rangeStartArgs) { + return "", explodedArgs, fmt.Errorf("got %d columns but %d rangeStartArgs in buildRangeInsertQueryTwoColumn", len(cols), len(rangeStartArgs)) + } + if len(cols) != len(rangeEndArgs) { + return "", explodedArgs, fmt.Errorf("got %d columns but %d rangeEndArgs in buildRangeInsertQueryTwoColumn", len(cols), len(rangeEndArgs)) + } + + col1Name := EscapeName(cols[0].Name) + col2Name := EscapeName(cols[1].Name) + col1StartVal := rangeStartValues[0] + col2StartVal := rangeStartValues[1] + col1EndVal := rangeEndValues[0] + col2EndVal := rangeEndValues[1] + col2StartOp := string(minRangeComparisonSign) + fromClause := fmt.Sprintf("%s.%s force index (%s)", databaseName, originalTableName, uniqueKey) + + if sameFirstColumnValue(rangeStartArgs, rangeEndArgs) { + result = fmt.Sprintf(` + insert /* gh-ost %s.%s */ ignore + into + %s.%s + (%s) + ( + select %s + from + %s.%s + force index (%s) + where (%s = %s and %s %s %s and %s <= %s) + %s + )`, + databaseName, originalTableName, + databaseName, ghostTableName, mappedSharedColumnsListing, + sharedColumnsListing, + databaseName, originalTableName, uniqueKey, + col1Name, col1StartVal, col2Name, col2StartOp, col2StartVal, col2Name, col2EndVal, + transactionalClause, + ) + explodedArgs = append(explodedArgs, rangeStartArgs[0], rangeStartArgs[1], rangeEndArgs[1]) + return result, explodedArgs, nil + } + + part1, part2, part3, explodedArgs := buildTwoColumnUnionParts( + sharedColumnsListing, fromClause, + col1Name, col2Name, + col1StartVal, col2StartVal, col1EndVal, col2EndVal, + col2StartOp, transactionalClause, + rangeStartArgs, rangeEndArgs, + ) + + result = fmt.Sprintf(` + insert /* gh-ost %s.%s */ ignore + into %s.%s (%s) + %s union all %s union all %s`, + databaseName, originalTableName, + databaseName, ghostTableName, mappedSharedColumnsListing, + part1, part2, part3, + ) + return result, explodedArgs, nil +} + func BuildRangeInsertPreparedQuery(databaseName, originalTableName, ghostTableName string, sharedColumns []string, mappedSharedColumns []string, uniqueKey string, uniqueKeyColumns *ColumnList, rangeStartArgs, rangeEndArgs []interface{}, includeRangeStartValues bool, transactionalTable bool, noWait bool) (result string, explodedArgs []interface{}, err error) { rangeStartValues := buildColumnsPreparedValues(uniqueKeyColumns) rangeEndValues := buildColumnsPreparedValues(uniqueKeyColumns) @@ -340,6 +436,11 @@ func BuildUniqueKeyRangeEndPreparedQueryViaOffset(databaseName, tableName string if includeRangeStartValues { startRangeComparisonSign = GreaterThanOrEqualsComparisonSign } + + if uniqueKeyColumns.Len() == 2 { + return buildUniqueKeyRangeEndTwoColumnViaOffset(databaseName, tableName, uniqueKeyColumns, rangeStartArgs, rangeEndArgs, chunkSize, startRangeComparisonSign, hint) + } + rangeStartComparison, rangeExplodedArgs, err := BuildRangePreparedComparison(uniqueKeyColumns, rangeStartArgs, startRangeComparisonSign) if err != nil { return "", explodedArgs, err @@ -393,6 +494,11 @@ func BuildUniqueKeyRangeEndPreparedQueryViaTemptable(databaseName, tableName str if includeRangeStartValues { startRangeComparisonSign = GreaterThanOrEqualsComparisonSign } + + if uniqueKeyColumns.Len() == 2 { + return buildUniqueKeyRangeEndTwoColumnViaTemptable(databaseName, tableName, uniqueKeyColumns, rangeStartArgs, rangeEndArgs, chunkSize, startRangeComparisonSign, hint) + } + rangeStartComparison, rangeExplodedArgs, err := BuildRangePreparedComparison(uniqueKeyColumns, rangeStartArgs, startRangeComparisonSign) if err != nil { return "", explodedArgs, err @@ -442,6 +548,206 @@ func BuildUniqueKeyRangeEndPreparedQueryViaTemptable(databaseName, tableName str return result, explodedArgs, nil } +type twoColumnRangeMeta struct { + col1Name, col2Name string + col1Val, col2Val string + orderByAsc string + orderByDesc string +} + +func newTwoColumnRangeMeta(uniqueKeyColumns *ColumnList) twoColumnRangeMeta { + colVals := buildColumnsPreparedValues(uniqueKeyColumns) + cols := uniqueKeyColumns.Columns() + col1Name := EscapeName(cols[0].Name) + col2Name := EscapeName(cols[1].Name) + col1Asc := fmt.Sprintf("%s asc", col1Name) + col2Asc := fmt.Sprintf("%s asc", col2Name) + col1Desc := fmt.Sprintf("%s desc", col1Name) + col2Desc := fmt.Sprintf("%s desc", col2Name) + if cols[0].Type == EnumColumnType { + col1Asc = fmt.Sprintf("concat(%s) asc", col1Name) + col1Desc = fmt.Sprintf("concat(%s) desc", col1Name) + } + if cols[1].Type == EnumColumnType { + col2Asc = fmt.Sprintf("concat(%s) asc", col2Name) + col2Desc = fmt.Sprintf("concat(%s) desc", col2Name) + } + return twoColumnRangeMeta{ + col1Name: col1Name, + col2Name: col2Name, + col1Val: colVals[0], + col2Val: colVals[1], + orderByAsc: col1Asc + ", " + col2Asc, + orderByDesc: col1Desc + ", " + col2Desc, + } +} + +func buildTwoColumnUnionParts( + selectClause, fromClause string, + col1Name, col2Name string, + col1StartVal, col2StartVal, col1EndVal, col2EndVal string, + col2StartOp, partSuffix string, + rangeStartArgs, rangeEndArgs []interface{}, +) (part1, part2, part3 string, explodedArgs []interface{}) { + part1 = fmt.Sprintf( + `(select %s from %s where %s = %s and %s %s %s %s)`, + selectClause, fromClause, + col1Name, col1StartVal, col2Name, col2StartOp, col2StartVal, + partSuffix, + ) + explodedArgs = append(explodedArgs, rangeStartArgs[0], rangeStartArgs[1]) + + part2 = fmt.Sprintf( + `(select %s from %s where %s > %s and %s < %s %s)`, + selectClause, fromClause, + col1Name, col1StartVal, col1Name, col1EndVal, + partSuffix, + ) + explodedArgs = append(explodedArgs, rangeStartArgs[0], rangeEndArgs[0]) + + part3 = fmt.Sprintf( + `(select %s from %s where %s = %s and %s <= %s %s)`, + selectClause, fromClause, + col1Name, col1EndVal, col2Name, col2EndVal, + partSuffix, + ) + explodedArgs = append(explodedArgs, rangeEndArgs[0], rangeEndArgs[1]) + return +} + +func buildUniqueKeyRangeEndTwoColumnViaOffset( + databaseName, tableName string, + uniqueKeyColumns *ColumnList, + rangeStartArgs, rangeEndArgs []interface{}, + chunkSize int64, + startRangeComparisonSign ValueComparisonSign, + hint string, +) (result string, explodedArgs []interface{}, err error) { + m := newTwoColumnRangeMeta(uniqueKeyColumns) + if len(rangeStartArgs) != 2 || len(rangeEndArgs) != 2 { + return "", nil, fmt.Errorf("expected 2 range args in buildUniqueKeyRangeEndTwoColumnViaOffset, got %d start and %d end", len(rangeStartArgs), len(rangeEndArgs)) + } + col2StartOp := string(startRangeComparisonSign) + selectClause := m.col1Name + ", " + m.col2Name + fromClause := databaseName + "." + tableName + partSuffix := fmt.Sprintf("order by %s limit %d", m.orderByAsc, chunkSize) + + if sameFirstColumnValue(rangeStartArgs, rangeEndArgs) { + result = fmt.Sprintf(` + select /* gh-ost %s.%s %s */ + %s, %s + from + %s.%s + where + (%s = %s and %s %s %s and %s <= %s) + order by + %s + limit 1 + offset %d`, + databaseName, tableName, hint, + m.col1Name, m.col2Name, + databaseName, tableName, + m.col1Name, m.col1Val, m.col2Name, col2StartOp, m.col2Val, m.col2Name, m.col2Val, + m.orderByAsc, + chunkSize-1, + ) + explodedArgs = append(explodedArgs, rangeStartArgs[0], rangeStartArgs[1], rangeEndArgs[1]) + return result, explodedArgs, nil + } + + part1, part2, part3, explodedArgs := buildTwoColumnUnionParts( + selectClause, fromClause, + m.col1Name, m.col2Name, + m.col1Val, m.col2Val, m.col1Val, m.col2Val, + col2StartOp, partSuffix, + rangeStartArgs, rangeEndArgs, + ) + + result = fmt.Sprintf(` + select /* gh-ost %s.%s %s */ + %s, %s + from + (%s union all %s union all %s) t + order by + %s + limit 1 + offset %d`, + databaseName, tableName, hint, + m.col1Name, m.col2Name, + part1, part2, part3, + m.orderByAsc, + chunkSize-1, + ) + return result, explodedArgs, nil +} + +func buildUniqueKeyRangeEndTwoColumnViaTemptable( + databaseName, tableName string, + uniqueKeyColumns *ColumnList, + rangeStartArgs, rangeEndArgs []interface{}, + chunkSize int64, + startRangeComparisonSign ValueComparisonSign, + hint string, +) (result string, explodedArgs []interface{}, err error) { + m := newTwoColumnRangeMeta(uniqueKeyColumns) + if len(rangeStartArgs) != 2 || len(rangeEndArgs) != 2 { + return "", nil, fmt.Errorf("expected 2 range args in buildUniqueKeyRangeEndTwoColumnViaTemptable, got %d start and %d end", len(rangeStartArgs), len(rangeEndArgs)) + } + col2StartOp := string(startRangeComparisonSign) + selectClause := m.col1Name + ", " + m.col2Name + fromClause := databaseName + "." + tableName + partSuffix := fmt.Sprintf("order by %s limit %d", m.orderByAsc, chunkSize) + + if sameFirstColumnValue(rangeStartArgs, rangeEndArgs) { + result = fmt.Sprintf(` + select /* gh-ost %s.%s %s */ %s, %s + from ( + select %s, %s + from %s.%s + where (%s = %s and %s %s %s and %s <= %s) + order by %s + limit %d + ) select_osc_chunk + order by %s + limit 1`, + databaseName, tableName, hint, m.col1Name, m.col2Name, + m.col1Name, m.col2Name, + databaseName, tableName, + m.col1Name, m.col1Val, m.col2Name, col2StartOp, m.col2Val, m.col2Name, m.col2Val, + m.orderByAsc, chunkSize, + m.orderByDesc, + ) + explodedArgs = append(explodedArgs, rangeStartArgs[0], rangeStartArgs[1], rangeEndArgs[1]) + return result, explodedArgs, nil + } + + part1, part2, part3, explodedArgs := buildTwoColumnUnionParts( + selectClause, fromClause, + m.col1Name, m.col2Name, + m.col1Val, m.col2Val, m.col1Val, m.col2Val, + col2StartOp, partSuffix, + rangeStartArgs, rangeEndArgs, + ) + + result = fmt.Sprintf(` + select /* gh-ost %s.%s %s */ %s, %s + from ( + select %s, %s + from (%s union all %s union all %s) t + order by %s + limit %d + ) select_osc_chunk + order by %s + limit 1`, + databaseName, tableName, hint, m.col1Name, m.col2Name, + m.col1Name, m.col2Name, + part1, part2, part3, + m.orderByAsc, chunkSize, + m.orderByDesc, + ) + return result, explodedArgs, nil +} + func BuildUniqueKeyMinValuesPreparedQuery(databaseName, tableName string, uniqueKey *UniqueKey) (string, error) { return buildUniqueKeyMinMaxValuesPreparedQuery(databaseName, tableName, uniqueKey, "asc") } diff --git a/go/sql/builder_test.go b/go/sql/builder_test.go index 0d10b75e7..be7075927 100644 --- a/go/sql/builder_test.go +++ b/go/sql/builder_test.go @@ -192,6 +192,7 @@ func TestBuildRangeInsertQuery(t *testing.T) { require.Equal(t, []interface{}{3, 3, 103, 103}, explodedArgs) } { + // Different first-column values → 3-part UNION insert. uniqueKey := "name_position_uidx" uniqueKeyColumns := NewColumnList([]string{"name", "position"}) rangeStartValues := []string{"@v1s", "@v2s"} @@ -199,6 +200,28 @@ func TestBuildRangeInsertQuery(t *testing.T) { rangeStartArgs := []interface{}{3, 17} rangeEndArgs := []interface{}{103, 117} + query, explodedArgs, err := BuildRangeInsertQuery(databaseName, originalTableName, ghostTableName, sharedColumns, sharedColumns, uniqueKey, uniqueKeyColumns, rangeStartValues, rangeEndValues, rangeStartArgs, rangeEndArgs, true, true, true) + require.NoError(t, err) + expected := ` + insert /* gh-ost mydb.tbl */ ignore + into mydb.ghost (id, name, position) + (select id, name, position from mydb.tbl force index (name_position_uidx) where name = @v1s and position >= @v2s for share nowait) + union all + (select id, name, position from mydb.tbl force index (name_position_uidx) where name > @v1s and name < @v1e for share nowait) + union all + (select id, name, position from mydb.tbl force index (name_position_uidx) where name = @v1e and position <= @v2e for share nowait)` + require.Equal(t, normalizeQuery(expected), normalizeQuery(query)) + require.Equal(t, []interface{}{3, 17, 3, 103, 103, 117}, explodedArgs) + } + { + // Same first-column value → single range query (no UNION needed). + uniqueKey := "name_position_uidx" + uniqueKeyColumns := NewColumnList([]string{"name", "position"}) + rangeStartValues := []string{"@v1s", "@v2s"} + rangeEndValues := []string{"@v1e", "@v2e"} + rangeStartArgs := []interface{}{3, 17} + rangeEndArgs := []interface{}{3, 117} + query, explodedArgs, err := BuildRangeInsertQuery(databaseName, originalTableName, ghostTableName, sharedColumns, sharedColumns, uniqueKey, uniqueKeyColumns, rangeStartValues, rangeEndValues, rangeStartArgs, rangeEndArgs, true, true, true) require.NoError(t, err) expected := ` @@ -211,19 +234,77 @@ func TestBuildRangeInsertQuery(t *testing.T) { from mydb.tbl force index (name_position_uidx) - where - (((name > @v1s) or (((name = @v1s)) - AND (position > @v2s)) - or ((name = @v1s) - and (position = @v2s))) - and ((name < @v1e) - or (((name = @v1e)) - AND (position < @v2e)) - or ((name = @v1e) and (position = @v2e)))) - for share nowait + where (name = @v1s and position >= @v2s and position <= @v2e) + for share nowait )` require.Equal(t, normalizeQuery(expected), normalizeQuery(query)) - require.Equal(t, []interface{}{3, 3, 17, 3, 17, 103, 103, 117, 103, 117}, explodedArgs) + require.Equal(t, []interface{}{3, 17, 117}, explodedArgs) + } + { + // includeRangeStartValues=false → exclusive start (col2 uses >, not >=). + uniqueKey := "name_position_uidx" + uniqueKeyColumns := NewColumnList([]string{"name", "position"}) + rangeStartValues := []string{"@v1s", "@v2s"} + rangeEndValues := []string{"@v1e", "@v2e"} + rangeStartArgs := []interface{}{3, 17} + rangeEndArgs := []interface{}{103, 117} + + query, explodedArgs, err := BuildRangeInsertQuery(databaseName, originalTableName, ghostTableName, sharedColumns, sharedColumns, uniqueKey, uniqueKeyColumns, rangeStartValues, rangeEndValues, rangeStartArgs, rangeEndArgs, false, true, true) + require.NoError(t, err) + expected := ` + insert /* gh-ost mydb.tbl */ ignore + into mydb.ghost (id, name, position) + (select id, name, position from mydb.tbl force index (name_position_uidx) where name = @v1s and position > @v2s for share nowait) + union all + (select id, name, position from mydb.tbl force index (name_position_uidx) where name > @v1s and name < @v1e for share nowait) + union all + (select id, name, position from mydb.tbl force index (name_position_uidx) where name = @v1e and position <= @v2e for share nowait)` + require.Equal(t, normalizeQuery(expected), normalizeQuery(query)) + require.Equal(t, []interface{}{3, 17, 3, 103, 103, 117}, explodedArgs) + } + { + // transactionalTable=false → no locking clause on UNION subqueries. + uniqueKey := "name_position_uidx" + uniqueKeyColumns := NewColumnList([]string{"name", "position"}) + rangeStartValues := []string{"@v1s", "@v2s"} + rangeEndValues := []string{"@v1e", "@v2e"} + rangeStartArgs := []interface{}{3, 17} + rangeEndArgs := []interface{}{103, 117} + + query, explodedArgs, err := BuildRangeInsertQuery(databaseName, originalTableName, ghostTableName, sharedColumns, sharedColumns, uniqueKey, uniqueKeyColumns, rangeStartValues, rangeEndValues, rangeStartArgs, rangeEndArgs, true, false, false) + require.NoError(t, err) + expected := ` + insert /* gh-ost mydb.tbl */ ignore + into mydb.ghost (id, name, position) + (select id, name, position from mydb.tbl force index (name_position_uidx) where name = @v1s and position >= @v2s ) + union all + (select id, name, position from mydb.tbl force index (name_position_uidx) where name > @v1s and name < @v1e ) + union all + (select id, name, position from mydb.tbl force index (name_position_uidx) where name = @v1e and position <= @v2e )` + require.Equal(t, normalizeQuery(expected), normalizeQuery(query)) + require.Equal(t, []interface{}{3, 17, 3, 103, 103, 117}, explodedArgs) + } + { + // transactionalTable=true, noWait=false → "lock in share mode" on UNION subqueries. + uniqueKey := "name_position_uidx" + uniqueKeyColumns := NewColumnList([]string{"name", "position"}) + rangeStartValues := []string{"@v1s", "@v2s"} + rangeEndValues := []string{"@v1e", "@v2e"} + rangeStartArgs := []interface{}{3, 17} + rangeEndArgs := []interface{}{103, 117} + + query, explodedArgs, err := BuildRangeInsertQuery(databaseName, originalTableName, ghostTableName, sharedColumns, sharedColumns, uniqueKey, uniqueKeyColumns, rangeStartValues, rangeEndValues, rangeStartArgs, rangeEndArgs, true, true, false) + require.NoError(t, err) + expected := ` + insert /* gh-ost mydb.tbl */ ignore + into mydb.ghost (id, name, position) + (select id, name, position from mydb.tbl force index (name_position_uidx) where name = @v1s and position >= @v2s lock in share mode) + union all + (select id, name, position from mydb.tbl force index (name_position_uidx) where name > @v1s and name < @v1e lock in share mode) + union all + (select id, name, position from mydb.tbl force index (name_position_uidx) where name = @v1e and position <= @v2e lock in share mode)` + require.Equal(t, normalizeQuery(expected), normalizeQuery(query)) + require.Equal(t, []interface{}{3, 17, 3, 103, 103, 117}, explodedArgs) } } @@ -274,23 +355,14 @@ func TestBuildRangeInsertQueryRenameMap(t *testing.T) { require.NoError(t, err) expected := ` insert /* gh-ost mydb.tbl */ ignore - into - mydb.ghost - (id, name, location) - ( - select id, name, position - from - mydb.tbl - force index (name_position_uidx) - where - (((name > @v1s) or (((name = @v1s)) - AND (position > @v2s)) or ((name = @v1s) and (position = @v2s))) - and ((name < @v1e) or (((name = @v1e)) AND (position < @v2e)) - or ((name = @v1e) and (position = @v2e)))) - for share nowait - )` + into mydb.ghost (id, name, location) + (select id, name, position from mydb.tbl force index (name_position_uidx) where name = @v1s and position >= @v2s for share nowait) + union all + (select id, name, position from mydb.tbl force index (name_position_uidx) where name > @v1s and name < @v1e for share nowait) + union all + (select id, name, position from mydb.tbl force index (name_position_uidx) where name = @v1e and position <= @v2e for share nowait)` require.Equal(t, normalizeQuery(expected), normalizeQuery(query)) - require.Equal(t, []interface{}{3, 3, 17, 3, 17, 103, 103, 117, 103, 117}, explodedArgs) + require.Equal(t, []interface{}{3, 17, 3, 103, 103, 117}, explodedArgs) } } @@ -305,6 +377,26 @@ func TestBuildRangeInsertPreparedQuery(t *testing.T) { rangeStartArgs := []interface{}{3, 17} rangeEndArgs := []interface{}{103, 117} + query, explodedArgs, err := BuildRangeInsertPreparedQuery(databaseName, originalTableName, ghostTableName, sharedColumns, sharedColumns, uniqueKey, uniqueKeyColumns, rangeStartArgs, rangeEndArgs, true, true, true) + require.NoError(t, err) + expected := ` + insert /* gh-ost mydb.tbl */ ignore + into mydb.ghost (id, name, position) + (select id, name, position from mydb.tbl force index (name_position_uidx) where name = ? and position >= ? for share nowait) + union all + (select id, name, position from mydb.tbl force index (name_position_uidx) where name > ? and name < ? for share nowait) + union all + (select id, name, position from mydb.tbl force index (name_position_uidx) where name = ? and position <= ? for share nowait)` + require.Equal(t, normalizeQuery(expected), normalizeQuery(query)) + require.Equal(t, []interface{}{3, 17, 3, 103, 103, 117}, explodedArgs) + } + { + // Same first-column value → single range query (no UNION needed). + uniqueKey := "name_position_uidx" + uniqueKeyColumns := NewColumnList([]string{"name", "position"}) + rangeStartArgs := []interface{}{3, 17} + rangeEndArgs := []interface{}{3, 117} + query, explodedArgs, err := BuildRangeInsertPreparedQuery(databaseName, originalTableName, ghostTableName, sharedColumns, sharedColumns, uniqueKey, uniqueKeyColumns, rangeStartArgs, rangeEndArgs, true, true, true) require.NoError(t, err) expected := ` @@ -317,11 +409,11 @@ func TestBuildRangeInsertPreparedQuery(t *testing.T) { from mydb.tbl force index (name_position_uidx) - where (((name > ?) or (((name = ?)) AND (position > ?)) or ((name = ?) and (position = ?))) and ((name < ?) or (((name = ?)) AND (position < ?)) or ((name = ?) and (position = ?)))) - for share nowait + where (name = ? and position >= ? and position <= ?) + for share nowait )` require.Equal(t, normalizeQuery(expected), normalizeQuery(query)) - require.Equal(t, []interface{}{3, 3, 17, 3, 17, 103, 103, 117, 103, 117}, explodedArgs) + require.Equal(t, []interface{}{3, 17, 117}, explodedArgs) } } @@ -330,10 +422,35 @@ func TestBuildUniqueKeyRangeEndPreparedQueryViaOffset(t *testing.T) { originalTableName := "tbl" var chunkSize int64 = 500 { + // Different first-column values → 3-part UNION for efficient boundary seeks. uniqueKeyColumns := NewColumnList([]string{"name", "position"}) rangeStartArgs := []interface{}{3, 17} rangeEndArgs := []interface{}{103, 117} + query, explodedArgs, err := BuildUniqueKeyRangeEndPreparedQueryViaOffset(databaseName, originalTableName, uniqueKeyColumns, rangeStartArgs, rangeEndArgs, chunkSize, false, "test") + require.NoError(t, err) + expected := ` + select /* gh-ost mydb.tbl test */ + name, position + from + ((select name, position from mydb.tbl where name = ? and position > ? order by name asc, position asc limit 500) + union all + (select name, position from mydb.tbl where name > ? and name < ? order by name asc, position asc limit 500) + union all + (select name, position from mydb.tbl where name = ? and position <= ? order by name asc, position asc limit 500)) t + order by + name asc, position asc + limit 1 + offset 499` + require.Equal(t, normalizeQuery(expected), normalizeQuery(query)) + require.Equal(t, []interface{}{3, 17, 3, 103, 103, 117}, explodedArgs) + } + { + // Same first-column value → single range query (no UNION needed). + uniqueKeyColumns := NewColumnList([]string{"name", "position"}) + rangeStartArgs := []interface{}{3, 17} + rangeEndArgs := []interface{}{3, 117} + query, explodedArgs, err := BuildUniqueKeyRangeEndPreparedQueryViaOffset(databaseName, originalTableName, uniqueKeyColumns, rangeStartArgs, rangeEndArgs, chunkSize, false, "test") require.NoError(t, err) expected := ` @@ -342,13 +459,13 @@ func TestBuildUniqueKeyRangeEndPreparedQueryViaOffset(t *testing.T) { from mydb.tbl where - ((name > ?) or (((name = ?)) AND (position > ?))) and ((name < ?) or (((name = ?)) AND (position < ?)) or ((name = ?) and (position = ?))) + (name = ? and position > ? and position <= ?) order by name asc, position asc limit 1 offset 499` require.Equal(t, normalizeQuery(expected), normalizeQuery(query)) - require.Equal(t, []interface{}{3, 3, 17, 103, 103, 117, 103, 117}, explodedArgs) + require.Equal(t, []interface{}{3, 17, 117}, explodedArgs) } } @@ -357,30 +474,231 @@ func TestBuildUniqueKeyRangeEndPreparedQueryViaTemptable(t *testing.T) { originalTableName := "tbl" var chunkSize int64 = 500 { + // Different first-column values → 3-part UNION for efficient boundary seeks. uniqueKeyColumns := NewColumnList([]string{"name", "position"}) rangeStartArgs := []interface{}{3, 17} rangeEndArgs := []interface{}{103, 117} query, explodedArgs, err := BuildUniqueKeyRangeEndPreparedQueryViaTemptable(databaseName, originalTableName, uniqueKeyColumns, rangeStartArgs, rangeEndArgs, chunkSize, false, "test") require.NoError(t, err) + expected := ` + select /* gh-ost mydb.tbl test */ name, position + from ( + select name, position + from + ((select name, position from mydb.tbl where name = ? and position > ? order by name asc, position asc limit 500) + union all + (select name, position from mydb.tbl where name > ? and name < ? order by name asc, position asc limit 500) + union all + (select name, position from mydb.tbl where name = ? and position <= ? order by name asc, position asc limit 500)) t + order by name asc, position asc + limit 500 + ) select_osc_chunk + order by name desc, position desc + limit 1` + require.Equal(t, normalizeQuery(expected), normalizeQuery(query)) + require.Equal(t, []interface{}{3, 17, 3, 103, 103, 117}, explodedArgs) + } + { + // Same first-column value → single range query (no UNION needed). + uniqueKeyColumns := NewColumnList([]string{"name", "position"}) + rangeStartArgs := []interface{}{3, 17} + rangeEndArgs := []interface{}{3, 117} + + query, explodedArgs, err := BuildUniqueKeyRangeEndPreparedQueryViaTemptable(databaseName, originalTableName, uniqueKeyColumns, rangeStartArgs, rangeEndArgs, chunkSize, false, "test") + require.NoError(t, err) + expected := ` + select /* gh-ost mydb.tbl test */ name, position + from ( + select name, position + from mydb.tbl + where (name = ? and position > ? and position <= ?) + order by name asc, position asc + limit 500 + ) select_osc_chunk + order by name desc, position desc + limit 1` + require.Equal(t, normalizeQuery(expected), normalizeQuery(query)) + require.Equal(t, []interface{}{3, 17, 117}, explodedArgs) + } +} + +func TestBuildUniqueKeyRangeEndPreparedQueryTwoColumnEnum(t *testing.T) { + databaseName := "mydb" + originalTableName := "tbl" + var chunkSize int64 = 500 + { + // First key column is an enum → ORDER BY must wrap it with concat() so MySQL + // sorts by the enum's text label rather than its internal numeric position. + uniqueKeyColumns := NewColumnList([]string{"name", "position"}) + uniqueKeyColumns.SetColumnType("name", EnumColumnType) + rangeStartArgs := []interface{}{"a", 17} + rangeEndArgs := []interface{}{"z", 117} + + query, _, err := BuildUniqueKeyRangeEndPreparedQueryViaOffset(databaseName, originalTableName, uniqueKeyColumns, rangeStartArgs, rangeEndArgs, chunkSize, false, "test") + require.NoError(t, err) expected := ` select /* gh-ost mydb.tbl test */ name, position + from + ((select name, position from mydb.tbl where name = ? and position > ? order by concat(name) asc, position asc limit 500) + union all + (select name, position from mydb.tbl where name > ? and name < ? order by concat(name) asc, position asc limit 500) + union all + (select name, position from mydb.tbl where name = ? and position <= ? order by concat(name) asc, position asc limit 500)) t + order by + concat(name) asc, position asc + limit 1 + offset 499` + require.Equal(t, normalizeQuery(expected), normalizeQuery(query)) + } + { + // Second key column is an enum → its asc/desc clauses get concat() too. + // ViaTemptable also exercises the desc ORDER BY in the outer wrapper. + uniqueKeyColumns := NewColumnList([]string{"name", "position"}) + uniqueKeyColumns.SetColumnType("position", EnumColumnType) + rangeStartArgs := []interface{}{3, "a"} + rangeEndArgs := []interface{}{103, "z"} + + query, _, err := BuildUniqueKeyRangeEndPreparedQueryViaTemptable(databaseName, originalTableName, uniqueKeyColumns, rangeStartArgs, rangeEndArgs, chunkSize, false, "test") + require.NoError(t, err) + expected := ` + select /* gh-ost mydb.tbl test */ name, position from ( - select - name, position + select name, position from - mydb.tbl - where ((name > ?) or (((name = ?)) AND (position > ?))) and ((name < ?) or (((name = ?)) AND (position < ?)) or ((name = ?) and (position = ?))) - order by - name asc, position asc + ((select name, position from mydb.tbl where name = ? and position > ? order by name asc, concat(position) asc limit 500) + union all + (select name, position from mydb.tbl where name > ? and name < ? order by name asc, concat(position) asc limit 500) + union all + (select name, position from mydb.tbl where name = ? and position <= ? order by name asc, concat(position) asc limit 500)) t + order by name asc, concat(position) asc limit 500 ) select_osc_chunk - order by - name desc, position desc + order by name desc, concat(position) desc limit 1` require.Equal(t, normalizeQuery(expected), normalizeQuery(query)) - require.Equal(t, []interface{}{3, 3, 17, 103, 103, 117, 103, 117}, explodedArgs) + } +} + +func TestSameFirstColumnValue(t *testing.T) { + { + // Identical integer values match. + require.True(t, sameFirstColumnValue([]interface{}{3, 17}, []interface{}{3, 117})) + } + { + // Different integer values do not match. + require.False(t, sameFirstColumnValue([]interface{}{3, 17}, []interface{}{4, 17})) + } + { + // Identical string values match. + require.True(t, sameFirstColumnValue([]interface{}{"abc", 1}, []interface{}{"abc", 2})) + } + { + // fmt.Sprintf("%v", x) is the comparison key — int(3) and "3" stringify + // identically and are therefore treated as equal. This is intentional: + // real callers pass type-consistent args originating from the same row. + require.True(t, sameFirstColumnValue([]interface{}{3, 1}, []interface{}{"3", 2})) + } +} + +func TestNewTwoColumnRangeMeta(t *testing.T) { + { + // No enum columns → plain "col asc" / "col desc" ORDER BY clauses. + uniqueKeyColumns := NewColumnList([]string{"name", "position"}) + m := newTwoColumnRangeMeta(uniqueKeyColumns) + require.Equal(t, "`name`", m.col1Name) + require.Equal(t, "`position`", m.col2Name) + require.Equal(t, "?", m.col1Val) + require.Equal(t, "?", m.col2Val) + require.Equal(t, "`name` asc, `position` asc", m.orderByAsc) + require.Equal(t, "`name` desc, `position` desc", m.orderByDesc) + } + { + // Enum on first column only. + uniqueKeyColumns := NewColumnList([]string{"name", "position"}) + uniqueKeyColumns.SetColumnType("name", EnumColumnType) + m := newTwoColumnRangeMeta(uniqueKeyColumns) + require.Equal(t, "concat(`name`) asc, `position` asc", m.orderByAsc) + require.Equal(t, "concat(`name`) desc, `position` desc", m.orderByDesc) + } + { + // Enum on second column only. + uniqueKeyColumns := NewColumnList([]string{"name", "position"}) + uniqueKeyColumns.SetColumnType("position", EnumColumnType) + m := newTwoColumnRangeMeta(uniqueKeyColumns) + require.Equal(t, "`name` asc, concat(`position`) asc", m.orderByAsc) + require.Equal(t, "`name` desc, concat(`position`) desc", m.orderByDesc) + } + { + // Enum on both columns. + uniqueKeyColumns := NewColumnList([]string{"name", "position"}) + uniqueKeyColumns.SetColumnType("name", EnumColumnType) + uniqueKeyColumns.SetColumnType("position", EnumColumnType) + m := newTwoColumnRangeMeta(uniqueKeyColumns) + require.Equal(t, "concat(`name`) asc, concat(`position`) asc", m.orderByAsc) + require.Equal(t, "concat(`name`) desc, concat(`position`) desc", m.orderByDesc) + } +} + +func TestBuildTwoColumnUnionParts(t *testing.T) { + rangeStartArgs := []interface{}{3, 17} + rangeEndArgs := []interface{}{103, 117} + part1, part2, part3, explodedArgs := buildTwoColumnUnionParts( + "name, position", "mydb.tbl", + "name", "position", + "?", "?", "?", "?", + ">=", "order by name asc, position asc limit 500", + rangeStartArgs, rangeEndArgs, + ) + require.Equal(t, normalizeQuery("(select name, position from mydb.tbl where name = ? and position >= ? order by name asc, position asc limit 500)"), normalizeQuery(part1)) + require.Equal(t, normalizeQuery("(select name, position from mydb.tbl where name > ? and name < ? order by name asc, position asc limit 500)"), normalizeQuery(part2)) + require.Equal(t, normalizeQuery("(select name, position from mydb.tbl where name = ? and position <= ? order by name asc, position asc limit 500)"), normalizeQuery(part3)) + // Args follow the column order each subquery binds: (start1, start2), (start1, end1), (end1, end2). + require.Equal(t, []interface{}{3, 17, 3, 103, 103, 117}, explodedArgs) +} + +func TestBuildRangeInsertQueryTwoColumnGuards(t *testing.T) { + databaseName := "mydb" + originalTableName := "tbl" + ghostTableName := "ghost" + sharedColumnsListing := "id, name, position" + uniqueKey := "name_position_uidx" + uniqueKeyColumns := NewColumnList([]string{"name", "position"}) + validValues := []string{"@v1", "@v2"} + validArgs := []interface{}{3, 17} + + call := func(rangeStartValues, rangeEndValues []string, rangeStartArgs, rangeEndArgs []interface{}) error { + _, _, err := buildRangeInsertQueryTwoColumn( + databaseName, originalTableName, ghostTableName, + sharedColumnsListing, sharedColumnsListing, + uniqueKey, uniqueKeyColumns, + rangeStartValues, rangeEndValues, + rangeStartArgs, rangeEndArgs, + GreaterThanOrEqualsComparisonSign, "", + ) + return err + } + + { + // rangeStartValues length mismatch. + err := call([]string{"@v1"}, validValues, validArgs, validArgs) + require.ErrorContains(t, err, "got 2 columns but 1 rangeStartValues") + } + { + // rangeEndValues length mismatch. + err := call(validValues, []string{"@v1", "@v2", "@v3"}, validArgs, validArgs) + require.ErrorContains(t, err, "got 2 columns but 3 rangeEndValues") + } + { + // rangeStartArgs length mismatch. + err := call(validValues, validValues, []interface{}{3}, validArgs) + require.ErrorContains(t, err, "got 2 columns but 1 rangeStartArgs") + } + { + // rangeEndArgs length mismatch. + err := call(validValues, validValues, validArgs, []interface{}{}) + require.ErrorContains(t, err, "got 2 columns but 0 rangeEndArgs") } } From 83aaa708e7ef3548625a4e56cded4f821c674335 Mon Sep 17 00:00:00 2001 From: meiji163 Date: Thu, 4 Jun 2026 11:56:42 -0600 Subject: [PATCH 7/7] Disable CGO for release builds (#1697) * Disable CGO for release builds * Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --------- Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sh b/build.sh index 1a5af7036..8c5bf47d8 100755 --- a/build.sh +++ b/build.sh @@ -24,7 +24,7 @@ function build { echo "Building ${osname}-${GOARCH} binary" export GOOS export GOARCH - go build -ldflags "$ldflags" -o $buildpath/$target go/cmd/gh-ost/main.go + CGO_ENABLED="${CGO_ENABLED:-0}" go build -ldflags "$ldflags" -o "$buildpath/$target" go/cmd/gh-ost/main.go if [ $? -ne 0 ]; then echo "Build failed for ${osname} ${GOARCH}."