diff --git a/configs/local.yaml b/configs/local.yaml index b420f30..a61c8da 100644 --- a/configs/local.yaml +++ b/configs/local.yaml @@ -11,6 +11,9 @@ pool: # plugins location plugin_directory: ./plugins +janitor: + finished_job_retention_days: 14 + # auth plugin auth: plugin: ./plugins/auth_header.so diff --git a/internal/pkg/janitor/janitor.go b/internal/pkg/janitor/janitor.go index 165d5ef..81c847c 100644 --- a/internal/pkg/janitor/janitor.go +++ b/internal/pkg/janitor/janitor.go @@ -1,16 +1,21 @@ package janitor import ( - "fmt" "time" + "github.com/hladush/go-telemetry/pkg/telemetry" "github.com/patterninc/heimdall/internal/pkg/database" ) +var ( + startMethod = telemetry.NewMethod("Start", "Janitor") +) + type Janitor struct { - Keepalive int `yaml:"keepalive,omitempty" json:"keepalive,omitempty"` - StaleJob int `yaml:"stale_job,omitempty" json:"stale_job,omitempty"` - db *database.Database + Keepalive int `yaml:"keepalive,omitempty" json:"keepalive,omitempty"` + StaleJob int `yaml:"stale_job,omitempty" json:"stale_job,omitempty"` + FinishedJobRetentionDays int `yaml:"finished_job_retention_days,omitempty" json:"finished_job_retention_days,omitempty"` + db *database.Database } func (j *Janitor) Start(d *database.Database) error { @@ -29,9 +34,12 @@ func (j *Janitor) Start(d *database.Database) error { for { if err := j.cleanupStaleJobs(); err != nil { - fmt.Println(`Janitor error:`, err) + startMethod.LogAndCountError(err, "cleanup_stale_jobs") } + if err := j.cleanupFinishedJobs(); err != nil { + startMethod.LogAndCountError(err, "cleanup_finished_jobs") + } time.Sleep(60 * time.Second) } diff --git a/internal/pkg/janitor/job.go b/internal/pkg/janitor/job.go index 2e4ea80..290644d 100644 --- a/internal/pkg/janitor/job.go +++ b/internal/pkg/janitor/job.go @@ -1,7 +1,10 @@ package janitor import ( + "database/sql" _ "embed" + "fmt" + "time" "github.com/patterninc/heimdall/internal/pkg/database" ) @@ -15,6 +18,30 @@ var queryFailStaleJobs string //go:embed queries/stale_jobs_delete.sql var queryStaleJobsDelete string +//go:embed queries/old_jobs_cluster_tags_delete.sql +var queryOldJobsClusterTagsDelete string + +//go:embed queries/old_jobs_command_tags_delete.sql +var queryOldJobsCommandTagsDelete string + +//go:embed queries/old_jobs_tags_delete.sql +var queryOldJobsTagsDelete string + +//go:embed queries/old_jobs_delete.sql +var queryOldJobsDelete string + +//go:embed queries/old_job_biggest_id.sql +var queryOldJobsBiggestID string + +var ( + queriesForOldJobsCleanup = []string{ + queryOldJobsClusterTagsDelete, + queryOldJobsCommandTagsDelete, + queryOldJobsTagsDelete, + queryOldJobsDelete, + } +) + func (j *Janitor) cleanupStaleJobs() error { // let's find the jobs we'll be cleaning up... @@ -74,3 +101,50 @@ func (j *Janitor) cleanupStaleJobs() error { return nil } + +func (j *Janitor) cleanupFinishedJobs() error { + if j.FinishedJobRetentionDays == 0 { + return nil + } + // open session + sess, err := j.db.NewSession(false) + if err != nil { + return err + } + defer sess.Close() + + retentionTimestamp := time.Now().AddDate(0, 0, -j.FinishedJobRetentionDays).Unix() + + // get biggest ID of old jobs + row, err := sess.QueryRow(queryOldJobsBiggestID, retentionTimestamp) + if err != nil { + return fmt.Errorf("failed to get biggest ID of old jobs: %w", err) + } + + var biggestID sql.NullInt64 + if err := row.Scan(&biggestID); err != nil { + if err == sql.ErrNoRows { + return nil + } + return fmt.Errorf("failed to get biggest ID of old jobs: %w", err) + } + + if !biggestID.Valid || biggestID.Int64 == 0 { + return nil + } + + // remove old jobs data + for _, q := range queriesForOldJobsCleanup { + for { + affectedRows, err := sess.Exec(q, biggestID.Int64) + if err != nil { + return err + } + if affectedRows == 0 { + break + } + } + } + + return nil +} diff --git a/internal/pkg/janitor/queries/old_job_biggest_id.sql b/internal/pkg/janitor/queries/old_job_biggest_id.sql new file mode 100644 index 0000000..a49bb73 --- /dev/null +++ b/internal/pkg/janitor/queries/old_job_biggest_id.sql @@ -0,0 +1,5 @@ +SELECT system_job_id +FROM jobs +WHERE updated_at < $1 +ORDER BY updated_at desc +LIMIT 1 \ No newline at end of file diff --git a/internal/pkg/janitor/queries/old_jobs_cluster_tags_delete.sql b/internal/pkg/janitor/queries/old_jobs_cluster_tags_delete.sql new file mode 100644 index 0000000..bcd0305 --- /dev/null +++ b/internal/pkg/janitor/queries/old_jobs_cluster_tags_delete.sql @@ -0,0 +1,7 @@ +DELETE FROM job_cluster_tags +WHERE system_job_id IN ( + SELECT system_job_id + FROM job_cluster_tags + WHERE system_job_id <= $1 + LIMIT 100 +); diff --git a/internal/pkg/janitor/queries/old_jobs_command_tags_delete.sql b/internal/pkg/janitor/queries/old_jobs_command_tags_delete.sql new file mode 100644 index 0000000..5b2d570 --- /dev/null +++ b/internal/pkg/janitor/queries/old_jobs_command_tags_delete.sql @@ -0,0 +1,8 @@ + +DELETE FROM job_command_tags +WHERE system_job_id IN ( + SELECT system_job_id + FROM job_command_tags + WHERE system_job_id <= $1 + LIMIT 100 +); \ No newline at end of file diff --git a/internal/pkg/janitor/queries/old_jobs_delete.sql b/internal/pkg/janitor/queries/old_jobs_delete.sql new file mode 100644 index 0000000..d248e5b --- /dev/null +++ b/internal/pkg/janitor/queries/old_jobs_delete.sql @@ -0,0 +1,8 @@ + +DELETE FROM jobs +WHERE system_job_id IN ( + SELECT system_job_id + FROM jobs + WHERE system_job_id <= $1 + LIMIT 100 +); \ No newline at end of file diff --git a/internal/pkg/janitor/queries/old_jobs_tags_delete.sql b/internal/pkg/janitor/queries/old_jobs_tags_delete.sql new file mode 100644 index 0000000..110b778 --- /dev/null +++ b/internal/pkg/janitor/queries/old_jobs_tags_delete.sql @@ -0,0 +1,7 @@ +DELETE FROM job_tags +WHERE system_job_id IN ( + SELECT system_job_id + FROM job_tags + WHERE system_job_id <= $1 + LIMIT 100 +); \ No newline at end of file