From 04d0e0c170f4f195ef6aec24c2e1e7975af5ca15 Mon Sep 17 00:00:00 2001 From: Shirly Radco Date: Thu, 12 Mar 2026 20:34:26 +0200 Subject: [PATCH] docs: add API docs, CI config, and e2e tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add alert management API documentation, classification guide, GitHub Actions workflow for unit tests, and end-to-end test coverage for alert management and relabeled rules. Signed-off-by: Shirly Radco Signed-off-by: João Vilaça Signed-off-by: Aviv Litman Co-authored-by: AI Assistant --- test/e2e/alert_management_api_test.go | 218 ++++++++++++++++++++++++++ 1 file changed, 218 insertions(+) create mode 100644 test/e2e/alert_management_api_test.go diff --git a/test/e2e/alert_management_api_test.go b/test/e2e/alert_management_api_test.go new file mode 100644 index 000000000..bbc26ebd6 --- /dev/null +++ b/test/e2e/alert_management_api_test.go @@ -0,0 +1,218 @@ +package e2e + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "testing" + "time" + + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/apimachinery/pkg/util/wait" + + "github.com/openshift/monitoring-plugin/internal/managementrouter" + "github.com/openshift/monitoring-plugin/pkg/k8s" + "github.com/openshift/monitoring-plugin/test/e2e/framework" +) + +func listRulesForAlertMgmt(ctx context.Context, f *framework.Framework) ([]monitoringv1.Rule, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, f.PluginURL+"/api/v1/alerting/rules", nil) + if err != nil { + return nil, err + } + + resp, err := f.HTTPClient().Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode) + } + + var listResp struct { + Data struct { + Groups []struct { + Rules []monitoringv1.Rule `json:"rules"` + } `json:"groups"` + } `json:"data"` + } + if err := json.NewDecoder(resp.Body).Decode(&listResp); err != nil { + return nil, err + } + + var allRules []monitoringv1.Rule + for _, group := range listResp.Data.Groups { + allRules = append(allRules, group.Rules...) + } + return allRules, nil +} + +func TestBulkDeleteUserDefinedAlertRules(t *testing.T) { + f, err := framework.New() + if err != nil { + t.Fatalf("Failed to create framework: %v", err) + } + + ctx := context.Background() + + testNamespace, cleanup, err := f.CreateNamespace(ctx, "test-bulk-delete", false) + if err != nil { + t.Fatalf("Failed to create test namespace: %v", err) + } + defer cleanup() + + forDuration := monitoringv1.Duration("5m") + + testRule1 := monitoringv1.Rule{ + Alert: "TestBulkDeleteAlert1", + Expr: intstr.FromString("up == 0"), + For: &forDuration, + Labels: map[string]string{ + "severity": "warning", + }, + Annotations: map[string]string{ + "description": "Test alert 1 for bulk delete testing", + }, + } + + testRule2 := monitoringv1.Rule{ + Alert: "TestBulkDeleteAlert2", + Expr: intstr.FromString("up == 1"), + For: &forDuration, + Labels: map[string]string{ + "severity": "info", + }, + Annotations: map[string]string{ + "description": "Test alert 2 for bulk delete testing", + }, + } + + testRule3 := monitoringv1.Rule{ + Alert: "TestBulkDeleteAlert3", + Expr: intstr.FromString("up == 2"), + For: &forDuration, + Labels: map[string]string{ + "severity": "critical", + }, + Annotations: map[string]string{ + "description": "Test alert 3 for bulk delete testing", + }, + } + + _, err = createPrometheusRule(ctx, f, testNamespace, testRule1, testRule2, testRule3) + if err != nil { + t.Fatalf("Failed to create PrometheusRule: %v", err) + } + + var ruleIdsToDelete []string + err = wait.PollUntilContextTimeout(ctx, 2*time.Second, 2*time.Minute, true, func(ctx context.Context) (bool, error) { + rules, err := listRulesForAlertMgmt(ctx, f) + if err != nil { + t.Logf("Failed to list rules: %v", err) + return false, nil + } + + foundRuleIds := []string{} + for _, rule := range rules { + if rule.Alert == "TestBulkDeleteAlert1" || rule.Alert == "TestBulkDeleteAlert2" { + ruleId := rule.Labels[k8s.AlertRuleLabelId] + if ruleId != "" { + foundRuleIds = append(foundRuleIds, ruleId) + } + } + } + + if len(foundRuleIds) == 2 { + ruleIdsToDelete = foundRuleIds + t.Logf("Found rule IDs to delete: %v", ruleIdsToDelete) + return true, nil + } + + t.Logf("Found %d/2 test alerts in memory", len(foundRuleIds)) + return false, nil + }) + + if err != nil { + t.Fatalf("Timeout waiting for alerts to appear in memory: %v", err) + } + + reqBody := managementrouter.BulkDeleteAlertRulesRequest{ + RuleIds: ruleIdsToDelete, + } + + reqJSON, err := json.Marshal(reqBody) + if err != nil { + t.Fatalf("Failed to marshal request body: %v", err) + } + + bulkDeleteURL := fmt.Sprintf("%s/api/v1/alerting/rules", f.PluginURL) + req, err := http.NewRequestWithContext(ctx, http.MethodDelete, bulkDeleteURL, bytes.NewBuffer(reqJSON)) + if err != nil { + t.Fatalf("Failed to create HTTP request: %v", err) + } + req.Header.Set("Content-Type", "application/json") + + resp, err := f.HTTPClient().Do(req) + if err != nil { + t.Fatalf("Failed to make bulk delete request: %v", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + t.Fatalf("Expected status code %d, got %d. Response body: %s", http.StatusOK, resp.StatusCode, string(body)) + } + + var bulkDeleteResp managementrouter.BulkDeleteAlertRulesResponse + if err := json.NewDecoder(resp.Body).Decode(&bulkDeleteResp); err != nil { + t.Fatalf("Failed to decode response: %v", err) + } + + if len(bulkDeleteResp.Rules) != 2 { + t.Fatalf("Expected 2 rules in response, got %d", len(bulkDeleteResp.Rules)) + } + + for _, result := range bulkDeleteResp.Rules { + if result.StatusCode != http.StatusNoContent { + t.Errorf("Rule %s deletion failed with status %d: %v", result.Id, result.StatusCode, result.Message) + } else { + t.Logf("Rule %s deleted successfully", result.Id) + } + } + + promRule, err := f.Monitoringv1clientset.MonitoringV1().PrometheusRules(testNamespace).Get( + ctx, + "test-prometheus-rule", + metav1.GetOptions{}, + ) + if err != nil { + t.Fatalf("Failed to get PrometheusRule after deletion: %v", err) + } + + if len(promRule.Spec.Groups) != 1 { + t.Fatalf("Expected 1 rule group, got %d", len(promRule.Spec.Groups)) + } + + ruleGroup := promRule.Spec.Groups[0] + if len(ruleGroup.Rules) != 1 { + t.Fatalf("Expected 1 rule remaining, got %d: %+v", len(ruleGroup.Rules), ruleGroup.Rules) + } + + remainingRule := ruleGroup.Rules[0] + if remainingRule.Alert != "TestBulkDeleteAlert3" { + t.Errorf("Expected remaining rule to be TestBulkDeleteAlert3, got %s", remainingRule.Alert) + } + + if remainingRule.Labels["severity"] != "critical" { + t.Errorf("Expected severity=critical, got %s", remainingRule.Labels["severity"]) + } + + t.Log("Bulk delete test completed successfully - only TestBulkDeleteAlert3 remains") +}