From ecac686af15357fec889b77a9e1d6b503499e1f8 Mon Sep 17 00:00:00 2001 From: Aditya Thebe Date: Fri, 15 May 2026 19:09:50 +0545 Subject: [PATCH] refactor(db): use duty config change dedupe Config change dedupe now lives in duty with the shared fingerprint cache and dedupe behavior. Delegate cache initialization and dedupe decisions to duty, keeping a temporary compatibility shim while config-db still uses its local ConfigChange model in the scrape pipeline. --- db/change_traversal.go | 5 +- db/changes.go | 95 +++++++++++++++++--------------------- db/diff_test.go | 54 ---------------------- db/models/config_change.go | 6 --- db/update.go | 2 +- debug.go | 3 +- go.mod | 2 +- go.sum | 4 +- 8 files changed, 50 insertions(+), 121 deletions(-) diff --git a/db/change_traversal.go b/db/change_traversal.go index a82867460..c0e935a3e 100644 --- a/db/change_traversal.go +++ b/db/change_traversal.go @@ -25,10 +25,7 @@ func resolveChange(change *v1.ChangeResult, action string, targetConfigID string ExternalCreatedBy: change.CreatedBy, CreatedAt: change.CreatedAt, Action: action, - } - - if change.Diff != nil { - change.Resolved.Diff = *change.Diff + Diff: change.Diff, } } diff --git a/db/changes.go b/db/changes.go index 30147dfd4..ea4a30cb6 100644 --- a/db/changes.go +++ b/db/changes.go @@ -1,76 +1,67 @@ package db import ( - "fmt" "time" "github.com/flanksource/config-db/api" "github.com/flanksource/config-db/db/models" "github.com/flanksource/duty/context" - "github.com/patrickmn/go-cache" + dutyModels "github.com/flanksource/duty/models" ) -var ChangeCacheByFingerprint = cache.New(time.Hour, time.Hour) - -func changeFingeprintCacheKey(configID, fingerprint string) string { - return fmt.Sprintf("%s:%s", configID, fingerprint) -} - func InitChangeFingerprintCache(ctx context.Context, window time.Duration) error { - var changes []*models.ConfigChange - if err := ctx.DB().Where("fingerprint IS NOT NULL").Where(fmt.Sprintf("created_at >= NOW() - INTERVAL '%d SECOND'", int(window.Seconds()))).Find(&changes).Error; err != nil { - return err - } - - ctx.Logger.Debugf("initializing changes cache with %d changes", len(changes)) - - for _, c := range changes { - key := changeFingeprintCacheKey(c.ConfigID, *c.Fingerprint) - ChangeCacheByFingerprint.Set(key, c.ID, time.Until(c.CreatedAt.Add(window))) - } - - return nil + return dutyModels.InitChangeFingerprintCache(ctx.DB(), window, ctx.Logger.Debugf) } -func dedupChanges(window time.Duration, changes []*models.ConfigChange) ([]*models.ConfigChange, []models.ConfigChangeUpdate) { - if len(changes) == 0 { - return nil, nil - } +// configChangeUpdate keeps the rest of config-db working with its local +// ConfigChange model while duty owns the dedupe decision/result shape. +type configChangeUpdate struct { + Change *models.ConfigChange + CountIncrement int + FirstInBatch bool +} - var nonDuped []*models.ConfigChange - var fingerprinted = map[string]models.ConfigChangeUpdate{} +// dedupChanges is a temporary compatibility shim around duty's ConfigChange +// deduper. +// +// duty now owns fingerprint cache initialization and dedupe behavior, but +// config-db still uses its local db/models.ConfigChange in the scrape pipeline. +// Until that model is fully replaced with duty/models.ConfigChange, this helper +// projects the fields required by duty's deduper (ID, ConfigID, Fingerprint), +// calls dutyModels.DedupConfigChanges, then maps the dedupe decisions back onto +// the original config-db change objects. +func dedupChanges(window time.Duration, changes []*models.ConfigChange) ([]*models.ConfigChange, []configChangeUpdate) { + dutyChanges := make([]*dutyModels.ConfigChange, 0, len(changes)) + originals := make(map[*dutyModels.ConfigChange]*models.ConfigChange, len(changes)) for _, change := range changes { - if change.Fingerprint == nil { - nonDuped = append(nonDuped, change) - continue + dutyChange := &dutyModels.ConfigChange{ + ID: change.ID, + ConfigID: change.ConfigID, + Fingerprint: change.Fingerprint, } + dutyChanges = append(dutyChanges, dutyChange) + originals[dutyChange] = change + } - key := changeFingeprintCacheKey(change.ConfigID, *change.Fingerprint) - if existingChangeID, ok := ChangeCacheByFingerprint.Get(key); !ok { - ChangeCacheByFingerprint.Set(key, change.ID, window) - fingerprinted[change.ID] = models.ConfigChangeUpdate{Change: change, CountIncrement: 0, FirstInBatch: true} - } else { - change.ID = existingChangeID.(string) - ChangeCacheByFingerprint.Set(key, change.ID, window) // Refresh the cache expiry + nonDupedDuty, dedupedDuty := dutyModels.DedupConfigChanges(window, dutyChanges) - if existing, ok := fingerprinted[change.ID]; ok { - // Preserve the original change, just increment the count - fingerprinted[change.ID] = models.ConfigChangeUpdate{Change: existing.Change, CountIncrement: existing.CountIncrement + 1, FirstInBatch: existing.FirstInBatch} - } else { - fingerprinted[change.ID] = models.ConfigChangeUpdate{Change: change, CountIncrement: 1, FirstInBatch: false} - } - } + nonDuped := make([]*models.ConfigChange, 0, len(nonDupedDuty)) + for _, dutyChange := range nonDupedDuty { + change := originals[dutyChange] + change.ID = dutyChange.ID + nonDuped = append(nonDuped, change) } - var deduped []models.ConfigChangeUpdate - for _, v := range fingerprinted { - if v.FirstInBatch || v.CountIncrement == 0 { - // First occurrence in the batch will be inserted - nonDuped = append(nonDuped, v.Change) - } else { - deduped = append(deduped, v) - } + deduped := make([]configChangeUpdate, 0, len(dedupedDuty)) + for _, dutyUpdate := range dedupedDuty { + change := originals[dutyUpdate.Change] + change.ID = dutyUpdate.Change.ID + deduped = append(deduped, configChangeUpdate{ + Change: change, + CountIncrement: dutyUpdate.CountIncrement, + FirstInBatch: dutyUpdate.FirstInBatch, + }) } return nonDuped, deduped diff --git a/db/diff_test.go b/db/diff_test.go index e0d24fa3e..9897db33b 100644 --- a/db/diff_test.go +++ b/db/diff_test.go @@ -7,13 +7,9 @@ import ( "runtime" "runtime/debug" "testing" - "time" - "github.com/flanksource/config-db/db/models" - "github.com/google/uuid" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - "github.com/samber/lo" "github.com/shirou/gopsutil/v3/process" ) @@ -35,56 +31,6 @@ var _ = Describe("generateDiff", func() { ) }) -var _ = Describe("dedupChanges", func() { - It("deduplicates changes by fingerprint and separates non-duplicates", func() { - abcKey := changeFingeprintCacheKey("dae6b3f5-bc26-48ac-8ad4-06e5efbb2a7d", "abc") - ChangeCacheByFingerprint.Set(abcKey, "8b9d2659-7a11-46ff-bdff-1c4e8964c437", time.Hour) - defer func() { - // Clean up inserted cache keys so they don't leak into other specs - ChangeCacheByFingerprint.Delete(abcKey) - ChangeCacheByFingerprint.Delete(changeFingeprintCacheKey("dae6b3f5-bc26-48ac-8ad4-06e5efbb2a7d", "xyz")) - }() - - changes := []*models.ConfigChange{ - {ID: "8b9d2659-7a11-46ff-bdff-1c4e8964c437", CreatedAt: time.Date(2024, 01, 02, 0, 0, 0, 0, time.UTC), Fingerprint: lo.ToPtr("abc"), ConfigID: "dae6b3f5-bc26-48ac-8ad4-06e5efbb2a7d", Summary: "first", Count: 1}, - {ID: uuid.NewString(), CreatedAt: time.Date(2024, 02, 02, 0, 0, 0, 0, time.UTC), Fingerprint: lo.ToPtr("abc"), ConfigID: "dae6b3f5-bc26-48ac-8ad4-06e5efbb2a7d", Summary: "second", Count: 1}, - {ID: uuid.NewString(), CreatedAt: time.Date(2024, 03, 02, 0, 0, 0, 0, time.UTC), Fingerprint: lo.ToPtr("abc"), ConfigID: "dae6b3f5-bc26-48ac-8ad4-06e5efbb2a7d", Summary: "third", Count: 1}, - {ID: uuid.NewString(), CreatedAt: time.Date(2024, 04, 02, 0, 0, 0, 0, time.UTC), Fingerprint: lo.ToPtr("abc"), ConfigID: "dae6b3f5-bc26-48ac-8ad4-06e5efbb2a7d", Summary: "fourth", Count: 1}, - {ID: "01eda583-3f5e-4c44-851f-93ac73272b92", CreatedAt: time.Date(2024, 04, 02, 0, 0, 0, 0, time.UTC), Fingerprint: lo.ToPtr("xyz"), ConfigID: "dae6b3f5-bc26-48ac-8ad4-06e5efbb2a7d", Summary: "different", Count: 1}, - {ID: uuid.NewString(), CreatedAt: time.Date(2024, 04, 03, 0, 0, 0, 0, time.UTC), Fingerprint: lo.ToPtr("xyz"), ConfigID: "dae6b3f5-bc26-48ac-8ad4-06e5efbb2a7d", Summary: "different two", Count: 1}, - } - - expectedDeduped := []models.ConfigChangeUpdate{ - { - Change: &models.ConfigChange{ - ID: "8b9d2659-7a11-46ff-bdff-1c4e8964c437", - CreatedAt: time.Date(2024, 01, 02, 0, 0, 0, 0, time.UTC), - Fingerprint: lo.ToPtr("abc"), - ConfigID: "dae6b3f5-bc26-48ac-8ad4-06e5efbb2a7d", - Summary: "first", - Count: 1, - }, - CountIncrement: 4, - }, - } - - expectedNonDuped := []*models.ConfigChange{ - { - ID: "01eda583-3f5e-4c44-851f-93ac73272b92", - CreatedAt: time.Date(2024, 04, 02, 0, 0, 0, 0, time.UTC), - Fingerprint: lo.ToPtr("xyz"), - ConfigID: "dae6b3f5-bc26-48ac-8ad4-06e5efbb2a7d", - Summary: "different", - Count: 1, - }, - } - - nonDuped, deduped := dedupChanges(time.Hour, changes) - Expect(deduped).To(Equal(expectedDeduped)) - Expect(nonDuped).To(Equal(expectedNonDuped)) - }) -}) - // go test -benchmem -run=^$ -bench ^BenchmarkDiffGenerator$ github.com/flanksource/config-db/db -count=5 -benchtime=10s -v -memprofile memprofile.out -cpuprofile profile.out func BenchmarkDiffGenerator(b *testing.B) { for _, c := range []struct { diff --git a/db/models/config_change.go b/db/models/config_change.go index 3bfa521f0..db51b50b0 100644 --- a/db/models/config_change.go +++ b/db/models/config_change.go @@ -14,12 +14,6 @@ import ( v1 "github.com/flanksource/config-db/api/v1" ) -type ConfigChangeUpdate struct { - Change *ConfigChange - CountIncrement int - FirstInBatch bool // First occurrence in current batch (not found in cache) -} - // ConfigChange represents the config change database table type ConfigChange struct { ExternalID string `gorm:"-"` diff --git a/db/update.go b/db/update.go index 8c240b314..6f2387799 100644 --- a/db/update.go +++ b/db/update.go @@ -910,7 +910,7 @@ func saveResults(ctx api.ScrapeContext, results []v1.ScrapeResult) (v1.ScrapeSum dedupWindow := ctx.Properties().Duration("changes.dedup.window", time.Hour) var newChanges []*models.ConfigChange - var deduped []models.ConfigChangeUpdate + var deduped []configChangeUpdate if ctx.Properties().On(false, "changes.dedup.disable") { newChanges = extractResult.newChanges diff --git a/debug.go b/debug.go index 06b6265fe..0b38b9857 100644 --- a/debug.go +++ b/debug.go @@ -12,6 +12,7 @@ import ( "github.com/flanksource/config-db/scrapers" "github.com/flanksource/config-db/scrapers/kubernetes" "github.com/flanksource/config-db/utils" + dutyModels "github.com/flanksource/duty/models" "github.com/labstack/echo/v4" ) @@ -42,7 +43,7 @@ func init() { utils.TrackObject("ScraperTempCache", &api.ScraperTempCache) utils.TrackObject("IgnoreCache", &kubernetes.IgnoreCache) utils.TrackObject("OrphanCache", &db.OrphanCache) - utils.TrackObject("ChangeCacheByFingerprint", &db.ChangeCacheByFingerprint) + utils.TrackObject("ChangeCacheByFingerprint", &dutyModels.ChangeCacheByFingerprint) utils.TrackObject("ParentCache", &db.ParentCache) utils.TrackObject("ResourceIDMapPerCluster", &kubernetes.ResourceIDMapPerCluster) } diff --git a/go.mod b/go.mod index dfc460ed3..8486f3355 100644 --- a/go.mod +++ b/go.mod @@ -7,7 +7,7 @@ require ( github.com/flanksource/clicky v1.21.8 github.com/flanksource/commons v1.51.4 github.com/flanksource/deps v1.0.28 - github.com/flanksource/duty v1.0.1307 + github.com/flanksource/duty v1.0.1308-0.20260515121513-ab373e44de48 github.com/flanksource/gomplate/v3 v3.24.79 github.com/flanksource/is-healthy v1.0.87 github.com/flanksource/ketall v1.1.9 diff --git a/go.sum b/go.sum index 5b502d343..c8fee0640 100644 --- a/go.sum +++ b/go.sum @@ -502,8 +502,8 @@ github.com/flanksource/commons v1.51.4 h1:ys3O4g0exWoz/viKf9vwTUItTkP/RgP1jORooh github.com/flanksource/commons v1.51.4/go.mod h1:XXLA39QGuFUyqIK19W5oDCIZDinLzFyFcGj83ZkyOfo= github.com/flanksource/deps v1.0.28 h1:mm7l7WjzLbkj2aFrgnlMaRizp+j+0x22TvtwXzRlFtE= github.com/flanksource/deps v1.0.28/go.mod h1:2YRfP32WZrMxGVMYV51RlVHZfgerxf8DT3TqSgjzmTQ= -github.com/flanksource/duty v1.0.1307 h1:5wmA2uHo9oi0T6EfiZ4/dtMMsVmZ6vZ2bJ/l8boIVPs= -github.com/flanksource/duty v1.0.1307/go.mod h1:aH4xdGF3brwBiOKUEFsspgu8U7tBiJOZDXrEqB3OMtc= +github.com/flanksource/duty v1.0.1308-0.20260515121513-ab373e44de48 h1:ncyUauhAmQa+3MdGSSFudUnSYL5KO7fjM+fjZSz/ON8= +github.com/flanksource/duty v1.0.1308-0.20260515121513-ab373e44de48/go.mod h1:aH4xdGF3brwBiOKUEFsspgu8U7tBiJOZDXrEqB3OMtc= github.com/flanksource/gomplate/v3 v3.24.79 h1:T5Ls0tjsnDhcV/dQWjrm2UpHiwOhytDLmYDSF0O6p3Q= github.com/flanksource/gomplate/v3 v3.24.79/go.mod h1:RzIg+YwNQI0eUV61LtqmhNN2Qw8ebm1cGa6IhNQmkWE= github.com/flanksource/is-healthy v1.0.87 h1:wSK9wI9tu//gdKO9JxyZe8ZQ5H7MCpwG17KdbWaiMeM=