From 4efe1c0987bffc23c008b65833850c106aaa2933 Mon Sep 17 00:00:00 2001 From: Gunju Kim Date: Fri, 27 Mar 2026 09:17:47 +0000 Subject: [PATCH] Add Prometheus metrics for GitHub API calls --- cmd/kelos-spawner/main.go | 2 +- cmd/kelos-spawner/reconciler.go | 1 + go.mod | 2 +- internal/source/metrics_transport.go | 97 +++++++++++++ internal/source/metrics_transport_test.go | 164 ++++++++++++++++++++++ 5 files changed, 264 insertions(+), 2 deletions(-) create mode 100644 internal/source/metrics_transport.go create mode 100644 internal/source/metrics_transport_test.go diff --git a/cmd/kelos-spawner/main.go b/cmd/kelos-spawner/main.go index 70f45487..811af242 100644 --- a/cmd/kelos-spawner/main.go +++ b/cmd/kelos-spawner/main.go @@ -94,7 +94,7 @@ func main() { log.Info("Starting spawner", "taskspawner", key, "oneShot", oneShot) httpClient := &http.Client{ - Transport: source.NewETagTransport(http.DefaultTransport, log), + Transport: source.NewETagTransport(source.NewMetricsTransport(http.DefaultTransport), log), } cfgArgs := spawnerRuntimeConfig{ diff --git a/cmd/kelos-spawner/reconciler.go b/cmd/kelos-spawner/reconciler.go index 211c6f53..dc2eca4e 100644 --- a/cmd/kelos-spawner/reconciler.go +++ b/cmd/kelos-spawner/reconciler.go @@ -91,6 +91,7 @@ func runOnce(ctx context.Context, cl client.Client, key types.NamespacedName, cf Token: token, TokenFile: cfg.GitHubTokenFile, BaseURL: cfg.GitHubAPIBaseURL, + Client: cfg.HTTPClient, }, } if err := runReportingCycle(ctx, cl, key, reporter); err != nil { diff --git a/go.mod b/go.mod index a714a62f..ffce2050 100644 --- a/go.mod +++ b/go.mod @@ -10,6 +10,7 @@ require ( github.com/onsi/gomega v1.38.3 github.com/posthog/posthog-go v1.10.0 github.com/prometheus/client_golang v1.23.2 + github.com/prometheus/client_model v0.6.2 github.com/robfig/cron/v3 v3.0.1 github.com/spf13/cobra v1.10.2 go.uber.org/zap v1.27.0 @@ -72,7 +73,6 @@ require ( github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect - github.com/prometheus/client_model v0.6.2 // indirect github.com/prometheus/common v0.66.1 // indirect github.com/prometheus/procfs v0.16.1 // indirect github.com/rogpeppe/go-internal v1.14.1 // indirect diff --git a/internal/source/metrics_transport.go b/internal/source/metrics_transport.go new file mode 100644 index 00000000..6027d4be --- /dev/null +++ b/internal/source/metrics_transport.go @@ -0,0 +1,97 @@ +package source + +import ( + "net/http" + "strconv" + "strings" + "time" + + "github.com/prometheus/client_golang/prometheus" + "sigs.k8s.io/controller-runtime/pkg/metrics" +) + +var ( + // githubAPIRequestsTotal counts the total number of GitHub API requests. + githubAPIRequestsTotal = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "kelos_github_api_requests_total", + Help: "Total number of GitHub API requests", + }, + []string{"method", "status_code", "resource"}, + ) + + // githubAPIRequestDuration records the duration of GitHub API requests. + githubAPIRequestDuration = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "kelos_github_api_request_duration_seconds", + Help: "Duration of GitHub API requests in seconds", + Buckets: prometheus.DefBuckets, + }, + []string{"method", "resource"}, + ) +) + +func init() { + metrics.Registry.MustRegister( + githubAPIRequestsTotal, + githubAPIRequestDuration, + ) +} + +type metricsTransport struct { + base http.RoundTripper +} + +// NewMetricsTransport wraps a base RoundTripper with Prometheus metrics +// that track the total number and duration of HTTP requests. +func NewMetricsTransport(base http.RoundTripper) http.RoundTripper { + return &metricsTransport{base: base} +} + +func (t *metricsTransport) RoundTrip(req *http.Request) (*http.Response, error) { + start := time.Now() + resource := classifyResource(req.URL.Path) + + resp, err := t.base.RoundTrip(req) + + duration := time.Since(start).Seconds() + githubAPIRequestDuration.WithLabelValues(req.Method, resource).Observe(duration) + + if err != nil { + githubAPIRequestsTotal.WithLabelValues(req.Method, "error", resource).Inc() + return nil, err + } + + githubAPIRequestsTotal.WithLabelValues(req.Method, strconv.Itoa(resp.StatusCode), resource).Inc() + + return resp, nil +} + +// classifyResource extracts the GitHub API resource type from a URL path. +// It walks backwards through the path segments and returns the first +// non-numeric segment that matches a known resource type, skipping +// unknown segments so that sub-resources like "events" do not shadow +// their parent (e.g. "issues"). +func classifyResource(urlPath string) string { + if i := strings.Index(urlPath, "?"); i != -1 { + urlPath = urlPath[:i] + } + segments := strings.Split(strings.Trim(urlPath, "/"), "/") + for i := len(segments) - 1; i >= 0; i-- { + if _, err := strconv.Atoi(segments[i]); err != nil { + switch segments[i] { + case "issues": + return "issues" + case "pulls": + return "pulls" + case "comments": + return "comments" + case "reviews": + return "reviews" + case "collaborators", "permission": + return "collaborators" + } + } + } + return "other" +} diff --git a/internal/source/metrics_transport_test.go b/internal/source/metrics_transport_test.go new file mode 100644 index 00000000..8ce5a525 --- /dev/null +++ b/internal/source/metrics_transport_test.go @@ -0,0 +1,164 @@ +package source + +import ( + "net/http" + "net/http/httptest" + "testing" + + "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" +) + +func counterValue(cv *prometheus.CounterVec, labels ...string) float64 { + m := &dto.Metric{} + if err := cv.WithLabelValues(labels...).Write(m); err != nil { + return 0 + } + return m.GetCounter().GetValue() +} + +func histogramCount(hv *prometheus.HistogramVec, labels ...string) uint64 { + m := &dto.Metric{} + observer, err := hv.GetMetricWithLabelValues(labels...) + if err != nil { + return 0 + } + if err := observer.(prometheus.Metric).Write(m); err != nil { + return 0 + } + return m.GetHistogram().GetSampleCount() +} + +func TestMetricsTransport_CountsRequests(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + w.Write([]byte("ok")) + })) + defer srv.Close() + + client := &http.Client{Transport: NewMetricsTransport(http.DefaultTransport)} + + before := counterValue(githubAPIRequestsTotal, "GET", "200", "issues") + + resp, err := client.Get(srv.URL + "/repos/owner/repo/issues") + if err != nil { + t.Fatal(err) + } + resp.Body.Close() + + after := counterValue(githubAPIRequestsTotal, "GET", "200", "issues") + if after-before != 1 { + t.Fatalf("Expected counter to increment by 1, got %f", after-before) + } +} + +func TestMetricsTransport_CountsErrorStatus(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusForbidden) + })) + defer srv.Close() + + client := &http.Client{Transport: NewMetricsTransport(http.DefaultTransport)} + + before := counterValue(githubAPIRequestsTotal, "GET", "403", "issues") + + resp, err := client.Get(srv.URL + "/repos/owner/repo/issues") + if err != nil { + t.Fatal(err) + } + resp.Body.Close() + + after := counterValue(githubAPIRequestsTotal, "GET", "403", "issues") + if after-before != 1 { + t.Fatalf("Expected counter to increment by 1 for 403, got %f", after-before) + } +} + +func TestMetricsTransport_RecordsDuration(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + })) + defer srv.Close() + + client := &http.Client{Transport: NewMetricsTransport(http.DefaultTransport)} + + beforeCount := histogramCount(githubAPIRequestDuration, "GET", "pulls") + + resp, err := client.Get(srv.URL + "/repos/owner/repo/pulls") + if err != nil { + t.Fatal(err) + } + resp.Body.Close() + + afterCount := histogramCount(githubAPIRequestDuration, "GET", "pulls") + if afterCount-beforeCount != 1 { + t.Fatalf("Expected histogram sample count to increment by 1, got %d", afterCount-beforeCount) + } +} + +func TestMetricsTransport_CountsTransportErrors(t *testing.T) { + client := &http.Client{Transport: NewMetricsTransport(http.DefaultTransport)} + + before := counterValue(githubAPIRequestsTotal, "GET", "error", "other") + + // Request to a closed server to trigger a transport error. + _, err := client.Get("http://127.0.0.1:1/unknown") + if err == nil { + t.Fatal("Expected transport error") + } + + after := counterValue(githubAPIRequestsTotal, "GET", "error", "other") + if after-before != 1 { + t.Fatalf("Expected error counter to increment by 1, got %f", after-before) + } +} + +func TestMetricsTransport_TracksPOST(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusCreated) + })) + defer srv.Close() + + client := &http.Client{Transport: NewMetricsTransport(http.DefaultTransport)} + + before := counterValue(githubAPIRequestsTotal, "POST", "201", "comments") + + resp, err := client.Post(srv.URL+"/repos/owner/repo/issues/1/comments", "application/json", nil) + if err != nil { + t.Fatal(err) + } + resp.Body.Close() + + after := counterValue(githubAPIRequestsTotal, "POST", "201", "comments") + if after-before != 1 { + t.Fatalf("Expected POST counter to increment by 1, got %f", after-before) + } +} + +func TestClassifyResource(t *testing.T) { + tests := []struct { + path string + expected string + }{ + {"/repos/owner/repo/issues", "issues"}, + {"/repos/owner/repo/issues?per_page=100", "issues"}, + {"/repos/owner/repo/issues/1/comments", "comments"}, + {"/repos/owner/repo/issues/1/comments?per_page=100", "comments"}, + {"/repos/owner/repo/pulls", "pulls"}, + {"/repos/owner/repo/pulls/1/reviews", "reviews"}, + {"/repos/owner/repo/pulls/1/comments", "comments"}, + {"/repos/owner/repo/issues/comments/123", "comments"}, + {"/repos/owner/repo/collaborators/user/permission", "collaborators"}, + {"/repos/owner/repo/issues/123/events", "issues"}, + {"/repos/owner/repo/pulls/1/requested_reviewers", "pulls"}, + {"/unknown/path", "other"}, + } + for _, tt := range tests { + t.Run(tt.path, func(t *testing.T) { + got := classifyResource(tt.path) + if got != tt.expected { + t.Errorf("classifyResource(%q) = %q, want %q", tt.path, got, tt.expected) + } + }) + } +}