diff --git a/.github/codeql/extensions/log-sanitizer-model.yml b/.github/codeql/extensions/log-sanitizer-model.yml
index a931db5..e5d3a0a 100644
--- a/.github/codeql/extensions/log-sanitizer-model.yml
+++ b/.github/codeql/extensions/log-sanitizer-model.yml
@@ -4,9 +4,17 @@ extensions:
       extensible: neutralModel
     data:
       - [
-          "github.com/jsonrpc-bench/runner/api",
+          "github.com/jsonrpc-bench/runner/internal/sanitize",
           "",
-          "SanitizeLogValue",
+          "LogValue",
+          "",
+          "summary",
+          "manual",
+        ]
+      - [
+          "github.com/jsonrpc-bench/runner/internal/sanitize",
+          "",
+          "LogError",
           "",
           "summary",
           "manual",
diff --git a/README.md b/README.md
index f45d70b..6f3ad26 100644
--- a/README.md
+++ b/README.md
@@ -353,6 +353,26 @@ For advanced time-series analysis and alerting, you can use Grafana:
 
 5. **Set up alerting** for performance regressions and system issues
 
+#### Scraping node-side metrics
+
+This tool only ships k6 client-side metrics (`k6_http_req_*`) to Prometheus.
+It does **not** scrape the Geth/Nethermind/etc. clients under test — bring
+your own observability for server-side metrics. To add them, point your own
+Prometheus at the node's metrics endpoint (each EL client publishes one):
+
+```yaml
+# prometheus.yml — example for a local Geth instance
+scrape_configs:
+  - job_name: 'geth'
+    metrics_path: /debug/metrics/prometheus
+    static_configs:
+      - targets: ['localhost:6060']
+```
+
+If you compose your own Prometheus alongside this stack, add the scrape
+job to that config; the bundled `metrics/prometheus.yml` only handles k6's
+remote-write target.
+
 ### Storage Configuration
 
 Configure PostgreSQL storage for historic tracking. Choose the appropriate configuration file based on your setup:
diff --git a/go.mod b/go.mod
index 4428ae2..9d3a2f5 100644
--- a/go.mod
+++ b/go.mod
@@ -3,7 +3,7 @@ module github.com/jsonrpc-bench
 go 1.25.4
 
 require (
-	github.com/ethereum/go-ethereum v1.16.9
+	github.com/ethereum/go-ethereum v1.17.3
 	github.com/google/uuid v1.6.0
 	github.com/gorilla/mux v1.8.1
 	github.com/gorilla/websocket v1.5.3
@@ -29,15 +29,13 @@ require (
 	github.com/bits-and-blooms/bitset v1.24.3 // indirect
 	github.com/cenkalti/backoff/v4 v4.3.0 // indirect
 	github.com/cespare/xxhash/v2 v2.3.0 // indirect
-	github.com/clipperhouse/uax29/v2 v2.2.0 // indirect
 	github.com/consensys/gnark-crypto v0.19.2 // indirect
 	github.com/containerd/errdefs v1.0.0 // indirect
 	github.com/containerd/errdefs/pkg v0.3.0 // indirect
 	github.com/containerd/log v0.1.0 // indirect
 	github.com/containerd/platforms v0.2.1 // indirect
 	github.com/cpuguy83/dockercfg v0.3.2 // indirect
-	github.com/crate-crypto/go-eth-kzg v1.4.0 // indirect
-	github.com/crate-crypto/go-ipa v0.0.0-20240724233137-53bbb0ceb27a // indirect
+	github.com/crate-crypto/go-eth-kzg v1.5.0 // indirect
 	github.com/davecgh/go-spew v1.1.1 // indirect
 	github.com/deckarep/golang-set/v2 v2.8.0 // indirect
 	github.com/decred/dcrd/dcrec/secp256k1/v4 v4.4.0 // indirect
@@ -46,9 +44,8 @@ require (
 	github.com/docker/go-units v0.5.0 // indirect
 	github.com/ebitengine/purego v0.10.0 // indirect
 	github.com/emicklei/dot v1.9.2 // indirect
-	github.com/ethereum/c-kzg-4844/v2 v2.1.5 // indirect
+	github.com/ethereum/c-kzg-4844/v2 v2.1.6 // indirect
 	github.com/ethereum/go-bigmodexpfix v0.0.0-20250911101455-f9e208c548ab // indirect
-	github.com/ethereum/go-verkle v0.2.2 // indirect
 	github.com/felixge/httpsnoop v1.0.4 // indirect
 	github.com/ferranbt/fastssz v1.0.0 // indirect
 	github.com/go-logr/logr v1.4.3 // indirect
@@ -66,7 +63,6 @@ require (
 	github.com/klauspost/cpuid/v2 v2.3.0 // indirect
 	github.com/lufia/plan9stats v0.0.0-20251013123823-9fd1530e3ec3 // indirect
 	github.com/magiconair/properties v1.8.10 // indirect
-	github.com/mattn/go-runewidth v0.0.19 // indirect
 	github.com/minio/sha256-simd v1.0.1 // indirect
 	github.com/mitchellh/mapstructure v1.5.0 // indirect
 	github.com/moby/docker-image-spec v1.3.1 // indirect
@@ -80,7 +76,6 @@ require (
 	github.com/moby/term v0.5.2 // indirect
 	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
 	github.com/modern-go/reflect2 v1.0.2 // indirect
-	github.com/olekukonko/tablewriter v0.0.5 // indirect
 	github.com/opencontainers/go-digest v1.0.0 // indirect
 	github.com/opencontainers/image-spec v1.1.1 // indirect
 	github.com/pion/dtls/v2 v2.2.12 // indirect
@@ -114,6 +109,6 @@ require (
 	golang.org/x/net v0.49.0 // indirect
 	golang.org/x/sync v0.19.0 // indirect
 	golang.org/x/sys v0.42.0 // indirect
-	google.golang.org/protobuf v1.36.10 // indirect
+	google.golang.org/protobuf v1.36.11 // indirect
 	gopkg.in/yaml.v2 v2.4.0 // indirect
 )
diff --git a/go.sum b/go.sum
index 0aa6245..5223e7c 100644
--- a/go.sum
+++ b/go.sum
@@ -22,8 +22,6 @@ github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK3
 github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
 github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
 github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
-github.com/clipperhouse/uax29/v2 v2.2.0 h1:ChwIKnQN3kcZteTXMgb1wztSgaU+ZemkgWdohwgs8tY=
-github.com/clipperhouse/uax29/v2 v2.2.0/go.mod h1:EFJ2TJMRUaplDxHKj1qAEhCtQPW2tJSwu5BF98AuoVM=
 github.com/cockroachdb/errors v1.11.3 h1:5bA+k2Y6r+oz/6Z/RFlNeVCesGARKuC6YymtcDrbC/I=
 github.com/cockroachdb/errors v1.11.3/go.mod h1:m4UIW4CDjx+R5cybPsNrRbreomiFqt8o1h1wUVazSd8=
 github.com/cockroachdb/fifo v0.0.0-20240606204812-0bbfbd93a7ce h1:giXvy4KSc/6g/esnpM7Geqxka4WSqI1SZc7sMJFd3y4=
@@ -49,10 +47,8 @@ github.com/containerd/platforms v0.2.1/go.mod h1:XHCb+2/hzowdiut9rkudds9bE5yJ7np
 github.com/cpuguy83/dockercfg v0.3.2 h1:DlJTyZGBDlXqUZ2Dk2Q3xHs/FtnooJJVaad2S9GKorA=
 github.com/cpuguy83/dockercfg v0.3.2/go.mod h1:sugsbF4//dDlL/i+S+rtpIWp+5h0BHJHfjj5/jFyUJc=
 github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
-github.com/crate-crypto/go-eth-kzg v1.4.0 h1:WzDGjHk4gFg6YzV0rJOAsTK4z3Qkz5jd4RE3DAvPFkg=
-github.com/crate-crypto/go-eth-kzg v1.4.0/go.mod h1:J9/u5sWfznSObptgfa92Jq8rTswn6ahQWEuiLHOjCUI=
-github.com/crate-crypto/go-ipa v0.0.0-20240724233137-53bbb0ceb27a h1:W8mUrRp6NOVl3J+MYp5kPMoUZPp7aOYHtaua31lwRHg=
-github.com/crate-crypto/go-ipa v0.0.0-20240724233137-53bbb0ceb27a/go.mod h1:sTwzHBvIzm2RfVCGNEBZgRyjwK40bVoun3ZnGOCafNM=
+github.com/crate-crypto/go-eth-kzg v1.5.0 h1:FYRiJMJG2iv+2Dy3fi14SVGjcPteZ5HAAUe4YWlJygc=
+github.com/crate-crypto/go-eth-kzg v1.5.0/go.mod h1:J9/u5sWfznSObptgfa92Jq8rTswn6ahQWEuiLHOjCUI=
 github.com/creack/pty v1.1.24 h1:bJrF4RRfyJnbTJqzRLHzcGaZK1NeM5kTC9jGgovnR1s=
 github.com/creack/pty v1.1.24/go.mod h1:08sCNb52WyoAwi2QDyzUCTgcvVFhUzewun7wtTfvcwE=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
@@ -74,14 +70,12 @@ github.com/ebitengine/purego v0.10.0 h1:QIw4xfpWT6GWTzaW5XEKy3HXoqrJGx1ijYHzTF0/
 github.com/ebitengine/purego v0.10.0/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ=
 github.com/emicklei/dot v1.9.2 h1:E/Wjz+BAH+JDhybEpISbo+QyDMNSiu/wqmIW9y922P8=
 github.com/emicklei/dot v1.9.2/go.mod h1:DeV7GvQtIw4h2u73RKBkkFdvVAz0D9fzeJrgPW6gy/s=
-github.com/ethereum/c-kzg-4844/v2 v2.1.5 h1:aVtoLK5xwJ6c5RiqO8g8ptJ5KU+2Hdquf6G3aXiHh5s=
-github.com/ethereum/c-kzg-4844/v2 v2.1.5/go.mod h1:u59hRTTah4Co6i9fDWtiCjTrblJv0UwsqZKCc0GfgUs=
+github.com/ethereum/c-kzg-4844/v2 v2.1.6 h1:xQymkKCT5E2Jiaoqf3v4wsNgjZLY0lRSkZn27fRjSls=
+github.com/ethereum/c-kzg-4844/v2 v2.1.6/go.mod h1:8HMkUZ5JRv4hpw/XUrYWSQNAUzhHMg2UDb/U+5m+XNw=
 github.com/ethereum/go-bigmodexpfix v0.0.0-20250911101455-f9e208c548ab h1:rvv6MJhy07IMfEKuARQ9TKojGqLVNxQajaXEp/BoqSk=
 github.com/ethereum/go-bigmodexpfix v0.0.0-20250911101455-f9e208c548ab/go.mod h1:IuLm4IsPipXKF7CW5Lzf68PIbZ5yl7FFd74l/E0o9A8=
-github.com/ethereum/go-ethereum v1.16.9 h1:UTJ93yoXD7BEMWg+9lSZ8/Zvf0oZfy2ZUmv0Gn0ZclE=
-github.com/ethereum/go-ethereum v1.16.9/go.mod h1:Fs6QebQbavneQTYcA39PEKv2+zIjX7rPUZ14DER46wk=
-github.com/ethereum/go-verkle v0.2.2 h1:I2W0WjnrFUIzzVPwm8ykY+7pL2d4VhlsePn4j7cnFk8=
-github.com/ethereum/go-verkle v0.2.2/go.mod h1:M3b90YRnzqKyyzBEWJGqj8Qff4IDeXnzFw0P9bFw3uk=
+github.com/ethereum/go-ethereum v1.17.3 h1:Ev/sQHH+UdKZHWjuVzhu2pxhi/sXaPZl23Q+Q5LDd4Q=
+github.com/ethereum/go-ethereum v1.17.3/go.mod h1:f2EhRwqewIZkGoQekywI2Y2RZAMTSavLNkD9qItFy1A=
 github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
 github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
 github.com/ferranbt/fastssz v1.0.0 h1:9EXXYsracSqQRBQiHeaVsG/KQeYblPf40hsQPb9Dzk8=
@@ -169,9 +163,6 @@ github.com/lufia/plan9stats v0.0.0-20251013123823-9fd1530e3ec3 h1:PwQumkgq4/acIi
 github.com/lufia/plan9stats v0.0.0-20251013123823-9fd1530e3ec3/go.mod h1:autxFIvghDt3jPTLoqZ9OZ7s9qTGNAWmYCjVFWPX/zg=
 github.com/magiconair/properties v1.8.10 h1:s31yESBquKXCV9a/ScB3ESkOjUYYv+X0rg8SYxI99mE=
 github.com/magiconair/properties v1.8.10/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0=
-github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
-github.com/mattn/go-runewidth v0.0.19 h1:v++JhqYnZuu5jSKrk9RbgF5v4CGUjqRfBm05byFGLdw=
-github.com/mattn/go-runewidth v0.0.19/go.mod h1:XBkDxAl56ILZc9knddidhrOlY5R/pDhgLpndooCuJAs=
 github.com/mdelapenya/tlscert v0.2.0 h1:7H81W6Z/4weDvZBNOfQte5GpIMo0lGYEeWbkGp5LJHI=
 github.com/mdelapenya/tlscert v0.2.0/go.mod h1:O4njj3ELLnJjGdkN7M/vIVCpZ+Cf0L6muqOG4tLSl8o=
 github.com/minio/sha256-simd v1.0.1 h1:6kaan5IFmwTNynnKKpDHe6FWHohJOHhCPchzK49dzMM=
@@ -207,8 +198,6 @@ github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+
 github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
 github.com/nxadm/tail v1.4.4 h1:DQuhQpB1tVlglWS2hLQ5OV6B5r8aGxSrPc5Qo6uTN78=
 github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A=
-github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec=
-github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY=
 github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
 github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk=
 github.com/onsi/ginkgo v1.14.0 h1:2mOpI4JVVPBN+WQRa0WKH2eXR+Ey+uK4n7Zj0aYpIQA=
@@ -315,8 +304,8 @@ go.opentelemetry.io/otel v1.41.0 h1:YlEwVsGAlCvczDILpUXpIpPSL/VPugt7zHThEMLce1c=
 go.opentelemetry.io/otel v1.41.0/go.mod h1:Yt4UwgEKeT05QbLwbyHXEwhnjxNO6D8L5PQP51/46dE=
 go.opentelemetry.io/otel/metric v1.41.0 h1:rFnDcs4gRzBcsO9tS8LCpgR0dxg4aaxWlJxCno7JlTQ=
 go.opentelemetry.io/otel/metric v1.41.0/go.mod h1:xPvCwd9pU0VN8tPZYzDZV/BMj9CM9vs00GuBjeKhJps=
-go.opentelemetry.io/otel/sdk v1.35.0 h1:iPctf8iprVySXSKJffSS79eOjl9pvxV9ZqOWT0QejKY=
-go.opentelemetry.io/otel/sdk v1.35.0/go.mod h1:+ga1bZliga3DxJ3CQGg3updiaAJoNECOgJREo9KHGQg=
+go.opentelemetry.io/otel/sdk v1.40.0 h1:KHW/jUzgo6wsPh9At46+h4upjtccTmuZCFAc9OJ71f8=
+go.opentelemetry.io/otel/sdk v1.40.0/go.mod h1:Ph7EFdYvxq72Y8Li9q8KebuYUr2KoeyHx0DRMKrYBUE=
 go.opentelemetry.io/otel/sdk/metric v1.35.0 h1:1RriWBmCKgkeHEhM7a2uMjMUfP7MsOF5JpUCaEqEI9o=
 go.opentelemetry.io/otel/sdk/metric v1.35.0/go.mod h1:is6XYCUMpcKi+ZsOvfluY5YstFnhW0BidkR+gL+qN+w=
 go.opentelemetry.io/otel/trace v1.41.0 h1:Vbk2co6bhj8L59ZJ6/xFTskY+tGAbOnCtQGVVa9TIN0=
@@ -416,8 +405,8 @@ google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQ
 google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
 google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
 google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
-google.golang.org/protobuf v1.36.10 h1:AYd7cD/uASjIL6Q9LiTjz8JLcrh/88q5UObnmY3aOOE=
-google.golang.org/protobuf v1.36.10/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=
+google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE=
+google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
diff --git a/runner/analysis/baseline.go b/runner/analysis/baseline.go
index f8db5e9..7c3c86f 100644
--- a/runner/analysis/baseline.go
+++ b/runner/analysis/baseline.go
@@ -12,6 +12,7 @@ import (
 
 	"github.com/sirupsen/logrus"
 
+	"github.com/jsonrpc-bench/runner/internal/sanitize"
 	"github.com/jsonrpc-bench/runner/storage"
 	"github.com/jsonrpc-bench/runner/types"
 )
@@ -186,8 +187,8 @@ func (bm *baselineManager) Stop() error {
 // SetBaseline creates or updates a performance baseline
 func (bm *baselineManager) SetBaseline(ctx context.Context, runID, name, description string) (*Baseline, error) {
 	bm.log.WithFields(logrus.Fields{
-		"run_id": runID,
-		"name":   name,
+		"run_id": sanitize.LogValue(runID),
+		"name":   sanitize.LogValue(name),
 	}).Info("Setting baseline")
 
 	// Get the historic run to use as baseline
@@ -222,9 +223,9 @@ func (bm *baselineManager) SetBaseline(ctx context.Context, runID, name, descrip
 	}
 
 	bm.log.WithFields(logrus.Fields{
-		"baseline_id": baseline.ID,
-		"name":        name,
-		"test_name":   run.TestName,
+		"baseline_id": sanitize.LogValue(baseline.ID),
+		"name":        sanitize.LogValue(name),
+		"test_name":   sanitize.LogValue(run.TestName),
 	}).Info("Baseline set successfully")
 
 	return baseline, nil
@@ -327,7 +328,7 @@ func (bm *baselineManager) ListBaselines(ctx context.Context, testName string) (
 
 // DeleteBaseline soft deletes a baseline by marking it as inactive
 func (bm *baselineManager) DeleteBaseline(ctx context.Context, name string) error {
-	bm.log.WithField("name", name).Info("Deleting baseline")
+	bm.log.WithField("name", sanitize.LogValue(name)).Info("Deleting baseline")
 
 	query := `UPDATE baselines SET is_active = false, updated_at = CURRENT_TIMESTAMP WHERE name = $1`
 	result, err := bm.db.ExecContext(ctx, query, name)
@@ -344,15 +345,15 @@ func (bm *baselineManager) DeleteBaseline(ctx context.Context, name string) erro
 		return fmt.Errorf("baseline not found: %s", name)
 	}
 
-	bm.log.WithField("name", name).Info("Baseline deleted successfully")
+	bm.log.WithField("name", sanitize.LogValue(name)).Info("Baseline deleted successfully")
 	return nil
 }
 
 // CompareToBaseline compares a run against a specific baseline
 func (bm *baselineManager) CompareToBaseline(ctx context.Context, runID, baselineName string) (*BaselineComparison, error) {
 	bm.log.WithFields(logrus.Fields{
-		"run_id":        runID,
-		"baseline_name": baselineName,
+		"run_id":        sanitize.LogValue(runID),
+		"baseline_name": sanitize.LogValue(baselineName),
 	}).Info("Comparing run to baseline")
 
 	// Get the run and baseline
@@ -379,8 +380,8 @@ func (bm *baselineManager) CompareToBaseline(ctx context.Context, runID, baselin
 	}
 
 	bm.log.WithFields(logrus.Fields{
-		"run_id":        runID,
-		"baseline_name": baselineName,
+		"run_id":        sanitize.LogValue(runID),
+		"baseline_name": sanitize.LogValue(baselineName),
 		"status":        comparison.Status,
 		"risk_level":    comparison.RiskLevel,
 	}).Info("Comparison completed")
@@ -431,8 +432,8 @@ func (bm *baselineManager) CompareToAllBaselines(ctx context.Context, runID stri
 // DetectRegressions detects performance regressions compared to a baseline
 func (bm *baselineManager) DetectRegressions(ctx context.Context, runID, baselineName string, thresholds RegressionThresholds) ([]*types.Regression, error) {
 	bm.log.WithFields(logrus.Fields{
-		"run_id":        runID,
-		"baseline_name": baselineName,
+		"run_id":        sanitize.LogValue(runID),
+		"baseline_name": sanitize.LogValue(baselineName),
 	}).Info("Detecting regressions")
 
 	// Get comparison first
@@ -458,8 +459,8 @@ func (bm *baselineManager) DetectRegressions(ctx context.Context, runID, baselin
 	})
 
 	bm.log.WithFields(logrus.Fields{
-		"run_id":            runID,
-		"baseline_name":     baselineName,
+		"run_id":            sanitize.LogValue(runID),
+		"baseline_name":     sanitize.LogValue(baselineName),
 		"regressions_found": len(regressions),
 	}).Info("Regression detection completed")
 
diff --git a/runner/analysis/baseline_test.go b/runner/analysis/baseline_test.go
deleted file mode 100644
index 6c14f64..0000000
--- a/runner/analysis/baseline_test.go
+++ /dev/null
@@ -1,801 +0,0 @@
-package analysis
-
-import (
-	"context"
-	"database/sql"
-	"encoding/json"
-	"fmt"
-	"testing"
-	"time"
-
-	"github.com/google/uuid"
-	"github.com/sirupsen/logrus"
-	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/mock"
-	"github.com/stretchr/testify/require"
-	"github.com/stretchr/testify/suite"
-
-	"github.com/jsonrpc-bench/runner/types"
-)
-
-// MockHistoricStorage is a mock implementation of storage.HistoricStorage
-type MockHistoricStorage struct {
-	mock.Mock
-}
-
-func (m *MockHistoricStorage) Start(ctx context.Context) error {
-	args := m.Called(ctx)
-	return args.Error(0)
-}
-
-func (m *MockHistoricStorage) Stop() error {
-	args := m.Called()
-	return args.Error(0)
-}
-
-func (m *MockHistoricStorage) SaveHistoricRun(ctx context.Context, result *types.BenchmarkResult) (*types.HistoricRun, error) {
-	args := m.Called(ctx, result)
-	return args.Get(0).(*types.HistoricRun), args.Error(1)
-}
-
-func (m *MockHistoricStorage) GetHistoricRun(ctx context.Context, runID string) (*types.HistoricRun, error) {
-	args := m.Called(ctx, runID)
-	if args.Get(0) == nil {
-		return nil, args.Error(1)
-	}
-	return args.Get(0).(*types.HistoricRun), args.Error(1)
-}
-
-func (m *MockHistoricStorage) ListHistoricRuns(ctx context.Context, testName string, limit int) ([]*types.HistoricRun, error) {
-	args := m.Called(ctx, testName, limit)
-	return args.Get(0).([]*types.HistoricRun), args.Error(1)
-}
-
-func (m *MockHistoricStorage) DeleteHistoricRun(ctx context.Context, runID string) error {
-	args := m.Called(ctx, runID)
-	return args.Error(0)
-}
-
-func (m *MockHistoricStorage) GetHistoricTrends(ctx context.Context, testName, client, metric string, days int) (*types.HistoricTrend, error) {
-	args := m.Called(ctx, testName, client, metric, days)
-	if args.Get(0) == nil {
-		return nil, args.Error(1)
-	}
-	return args.Get(0).(*types.HistoricTrend), args.Error(1)
-}
-
-func (m *MockHistoricStorage) CompareRuns(ctx context.Context, runID1, runID2 string) (*types.HistoricComparison, error) {
-	args := m.Called(ctx, runID1, runID2)
-	if args.Get(0) == nil {
-		return nil, args.Error(1)
-	}
-	return args.Get(0).(*types.HistoricComparison), args.Error(1)
-}
-
-func (m *MockHistoricStorage) GetHistoricSummary(ctx context.Context, testName string) (*types.HistoricSummary, error) {
-	args := m.Called(ctx, testName)
-	if args.Get(0) == nil {
-		return nil, args.Error(1)
-	}
-	return args.Get(0).(*types.HistoricSummary), args.Error(1)
-}
-
-func (m *MockHistoricStorage) SaveResultFiles(ctx context.Context, runID string, result *types.BenchmarkResult) error {
-	args := m.Called(ctx, runID, result)
-	return args.Error(0)
-}
-
-func (m *MockHistoricStorage) GetResultFiles(ctx context.Context, runID string) (string, error) {
-	args := m.Called(ctx, runID)
-	return args.String(0), args.Error(1)
-}
-
-func (m *MockHistoricStorage) CleanupOldFiles(ctx context.Context) error {
-	args := m.Called(ctx)
-	return args.Error(0)
-}
-
-// BaselineTestSuite contains all baseline management tests
-type BaselineTestSuite struct {
-	suite.Suite
-	mockStorage *MockHistoricStorage
-	mockDB      *sql.DB
-	manager     BaselineManager
-	ctx         context.Context
-}
-
-func (suite *BaselineTestSuite) SetupTest() {
-	suite.mockStorage = new(MockHistoricStorage)
-	suite.ctx = context.Background()
-
-	// Create in-memory SQLite database for testing
-	db, err := sql.Open("sqlite3", ":memory:")
-	require.NoError(suite.T(), err)
-	suite.mockDB = db
-
-	// Create baseline manager
-	logger := logrus.New()
-	logger.SetLevel(logrus.ErrorLevel) // Reduce noise in tests
-	suite.manager = NewBaselineManager(suite.mockStorage, suite.mockDB, logger)
-
-	// Start the manager and create tables
-	err = suite.manager.Start(suite.ctx)
-	require.NoError(suite.T(), err)
-}
-
-func (suite *BaselineTestSuite) TearDownTest() {
-	if suite.mockDB != nil {
-		suite.mockDB.Close()
-	}
-	suite.mockStorage.AssertExpectations(suite.T())
-}
-
-// TestBaselineCreation tests baseline creation functionality
-func (suite *BaselineTestSuite) TestBaselineCreation() {
-	// Create test data
-	testRun := suite.createTestHistoricRun("test-run-1", "test-benchmark")
-	_ = suite.createTestBenchmarkResult()
-
-	// Mock storage expectations
-	suite.mockStorage.On("GetHistoricRun", suite.ctx, "test-run-1").Return(testRun, nil)
-
-	// Test baseline creation
-	baseline, err := suite.manager.SetBaseline(suite.ctx, "test-run-1", "test-baseline", "Test baseline for unit tests")
-
-	require.NoError(suite.T(), err)
-	assert.NotNil(suite.T(), baseline)
-	assert.Equal(suite.T(), "test-baseline", baseline.Name)
-	assert.Equal(suite.T(), "Test baseline for unit tests", baseline.Description)
-	assert.Equal(suite.T(), "test-benchmark", baseline.TestName)
-	assert.Equal(suite.T(), "test-run-1", baseline.RunID)
-	assert.True(suite.T(), baseline.IsActive)
-
-	// Verify baseline metrics were extracted correctly
-	assert.Equal(suite.T(), testRun.OverallErrorRate, baseline.BaselineMetrics.OverallErrorRate)
-	assert.Equal(suite.T(), testRun.AvgLatencyMs, baseline.BaselineMetrics.AvgLatencyMs)
-	assert.Equal(suite.T(), testRun.P95LatencyMs, baseline.BaselineMetrics.P95LatencyMs)
-	assert.Equal(suite.T(), testRun.P99LatencyMs, baseline.BaselineMetrics.P99LatencyMs)
-	assert.Equal(suite.T(), testRun.TotalRequests, baseline.BaselineMetrics.TotalRequests)
-	assert.Equal(suite.T(), testRun.TotalErrors, baseline.BaselineMetrics.TotalErrors)
-
-	// Verify client metrics
-	assert.NotEmpty(suite.T(), baseline.BaselineMetrics.ClientMetrics)
-	clientBaseline, exists := baseline.BaselineMetrics.ClientMetrics["geth"]
-	assert.True(suite.T(), exists)
-	assert.Equal(suite.T(), 0.02, clientBaseline.ErrorRate)
-	assert.Equal(suite.T(), 150.0, clientBaseline.AvgLatency)
-}
-
-// TestBaselineRetrieval tests baseline retrieval functionality
-func (suite *BaselineTestSuite) TestBaselineRetrieval() {
-	// Create and save a baseline first
-	testRun := suite.createTestHistoricRun("test-run-2", "test-benchmark")
-	suite.mockStorage.On("GetHistoricRun", suite.ctx, "test-run-2").Return(testRun, nil)
-
-	createdBaseline, err := suite.manager.SetBaseline(suite.ctx, "test-run-2", "retrieval-baseline", "Test retrieval")
-	require.NoError(suite.T(), err)
-
-	// Test baseline retrieval
-	retrievedBaseline, err := suite.manager.GetBaseline(suite.ctx, "retrieval-baseline")
-
-	require.NoError(suite.T(), err)
-	assert.NotNil(suite.T(), retrievedBaseline)
-	assert.Equal(suite.T(), createdBaseline.Name, retrievedBaseline.Name)
-	assert.Equal(suite.T(), createdBaseline.Description, retrievedBaseline.Description)
-	assert.Equal(suite.T(), createdBaseline.TestName, retrievedBaseline.TestName)
-	assert.Equal(suite.T(), createdBaseline.RunID, retrievedBaseline.RunID)
-}
-
-// TestBaselineNotFound tests handling of non-existent baselines
-func (suite *BaselineTestSuite) TestBaselineNotFound() {
-	_, err := suite.manager.GetBaseline(suite.ctx, "non-existent-baseline")
-
-	assert.Error(suite.T(), err)
-	assert.Contains(suite.T(), err.Error(), "baseline not found")
-}
-
-// TestBaselineList tests listing baselines
-func (suite *BaselineTestSuite) TestBaselineList() {
-	// Create multiple baselines for the same test
-	testRun1 := suite.createTestHistoricRun("test-run-3", "list-test")
-	testRun2 := suite.createTestHistoricRun("test-run-4", "list-test")
-	testRun3 := suite.createTestHistoricRun("test-run-5", "different-test")
-
-	suite.mockStorage.On("GetHistoricRun", suite.ctx, "test-run-3").Return(testRun1, nil)
-	suite.mockStorage.On("GetHistoricRun", suite.ctx, "test-run-4").Return(testRun2, nil)
-	suite.mockStorage.On("GetHistoricRun", suite.ctx, "test-run-5").Return(testRun3, nil)
-
-	// Create baselines
-	_, err := suite.manager.SetBaseline(suite.ctx, "test-run-3", "baseline-1", "First baseline")
-	require.NoError(suite.T(), err)
-
-	_, err = suite.manager.SetBaseline(suite.ctx, "test-run-4", "baseline-2", "Second baseline")
-	require.NoError(suite.T(), err)
-
-	_, err = suite.manager.SetBaseline(suite.ctx, "test-run-5", "baseline-3", "Third baseline")
-	require.NoError(suite.T(), err)
-
-	// Test listing baselines for specific test
-	baselines, err := suite.manager.ListBaselines(suite.ctx, "list-test")
-	require.NoError(suite.T(), err)
-	assert.Len(suite.T(), baselines, 2)
-
-	// Test listing all baselines
-	allBaselines, err := suite.manager.ListBaselines(suite.ctx, "")
-	require.NoError(suite.T(), err)
-	assert.Len(suite.T(), allBaselines, 3)
-}
-
-// TestBaselineDeletion tests baseline deletion
-func (suite *BaselineTestSuite) TestBaselineDeletion() {
-	// Create a baseline first
-	testRun := suite.createTestHistoricRun("test-run-6", "delete-test")
-	suite.mockStorage.On("GetHistoricRun", suite.ctx, "test-run-6").Return(testRun, nil)
-
-	_, err := suite.manager.SetBaseline(suite.ctx, "test-run-6", "delete-baseline", "To be deleted")
-	require.NoError(suite.T(), err)
-
-	// Verify baseline exists
-	baseline, err := suite.manager.GetBaseline(suite.ctx, "delete-baseline")
-	require.NoError(suite.T(), err)
-	assert.NotNil(suite.T(), baseline)
-
-	// Delete baseline
-	err = suite.manager.DeleteBaseline(suite.ctx, "delete-baseline")
-	require.NoError(suite.T(), err)
-
-	// Verify baseline no longer exists
-	_, err = suite.manager.GetBaseline(suite.ctx, "delete-baseline")
-	assert.Error(suite.T(), err)
-	assert.Contains(suite.T(), err.Error(), "baseline not found")
-}
-
-// TestBaselineComparison tests baseline comparison functionality
-func (suite *BaselineTestSuite) TestBaselineComparison() {
-	// Create baseline run
-	baselineRun := suite.createTestHistoricRun("baseline-run", "comparison-test")
-	currentRun := suite.createTestHistoricRun("current-run", "comparison-test")
-
-	// Modify current run to have different metrics (worse performance)
-	currentRun.AvgLatencyMs = 200.0    // Worse than baseline 150ms
-	currentRun.OverallErrorRate = 0.05 // Worse than baseline 0.02
-
-	// Update the full results to reflect changes
-	currentResult := suite.createTestBenchmarkResult()
-	currentResult.ClientMetrics["geth"].Latency.Avg = 200.0
-	currentResult.ClientMetrics["geth"].ErrorRate = 0.05
-	currentResultJSON, _ := json.Marshal(currentResult)
-	currentRun.FullResults = currentResultJSON
-
-	suite.mockStorage.On("GetHistoricRun", suite.ctx, "baseline-run").Return(baselineRun, nil)
-	suite.mockStorage.On("GetHistoricRun", suite.ctx, "current-run").Return(currentRun, nil)
-
-	// Create baseline
-	_, err := suite.manager.SetBaseline(suite.ctx, "baseline-run", "comparison-baseline", "Baseline for comparison")
-	require.NoError(suite.T(), err)
-
-	// Test comparison
-	comparison, err := suite.manager.CompareToBaseline(suite.ctx, "current-run", "comparison-baseline")
-
-	require.NoError(suite.T(), err)
-	assert.NotNil(suite.T(), comparison)
-	assert.Equal(suite.T(), "current-run", comparison.RunID)
-	assert.Equal(suite.T(), "comparison-baseline", comparison.BaselineName)
-	assert.Equal(suite.T(), "baseline-run", comparison.BaselineRunID)
-
-	// Verify overall change calculation
-	expectedPercentChange := ((200.0 - 150.0) / 150.0) * 100 // ~33.33% increase
-	assert.InDelta(suite.T(), expectedPercentChange, comparison.OverallChange.PercentChange, 0.1)
-	assert.False(suite.T(), comparison.OverallChange.IsImprovement) // Latency increase is not improvement
-
-	// Verify client changes
-	assert.NotEmpty(suite.T(), comparison.ClientChanges)
-	gethChange, exists := comparison.ClientChanges["geth"]
-	assert.True(suite.T(), exists)
-	assert.Equal(suite.T(), "degraded", gethChange.Status)
-}
-
-// TestRegressionDetection tests regression detection logic
-func (suite *BaselineTestSuite) TestRegressionDetection() {
-	// Create baseline and current runs with significant regression
-	baselineRun := suite.createTestHistoricRun("regression-baseline", "regression-test")
-	currentRun := suite.createTestHistoricRun("regression-current", "regression-test")
-
-	// Create a significant regression (50% latency increase)
-	currentRun.AvgLatencyMs = 225.0 // 50% increase from 150ms
-	currentRun.P95LatencyMs = 450.0 // 50% increase from 300ms
-
-	// Update full results to match
-	currentResult := suite.createTestBenchmarkResult()
-	currentResult.ClientMetrics["geth"].Latency.Avg = 225.0
-	currentResult.ClientMetrics["geth"].Latency.P95 = 450.0
-	currentResultJSON, _ := json.Marshal(currentResult)
-	currentRun.FullResults = currentResultJSON
-
-	suite.mockStorage.On("GetHistoricRun", suite.ctx, "regression-baseline").Return(baselineRun, nil)
-	suite.mockStorage.On("GetHistoricRun", suite.ctx, "regression-current").Return(currentRun, nil)
-
-	// Create baseline
-	_, err := suite.manager.SetBaseline(suite.ctx, "regression-baseline", "regression-baseline", "Baseline for regression test")
-	require.NoError(suite.T(), err)
-
-	// Test regression detection
-	thresholds := RegressionThresholds{
-		ErrorRateThreshold:  0.01, // 1% absolute increase
-		LatencyThreshold:    10.0, // 10% increase threshold
-		ThroughputThreshold: 10.0, // 10% decrease threshold
-		SignificanceLevel:   0.05,
-		MinSampleSize:       10,
-		ConsecutiveRuns:     1,
-	}
-
-	regressions, err := suite.manager.DetectRegressions(suite.ctx, "regression-current", "regression-baseline", thresholds)
-
-	require.NoError(suite.T(), err)
-	assert.NotEmpty(suite.T(), regressions)
-
-	// Verify regression details
-	var latencyRegression *types.Regression
-	for _, reg := range regressions {
-		if reg.Metric == "avg_latency" {
-			latencyRegression = reg
-			break
-		}
-	}
-
-	require.NotNil(suite.T(), latencyRegression)
-	assert.Equal(suite.T(), "geth", latencyRegression.Client)
-	assert.Equal(suite.T(), "avg_latency", latencyRegression.Metric)
-	assert.Equal(suite.T(), 150.0, latencyRegression.BaselineValue)
-	assert.Equal(suite.T(), 225.0, latencyRegression.CurrentValue)
-	assert.InDelta(suite.T(), 50.0, latencyRegression.PercentChange, 0.1)
-	assert.Equal(suite.T(), "critical", latencyRegression.Severity) // 50% increase should be critical
-}
-
-// TestBaselineHistory tests baseline history functionality
-func (suite *BaselineTestSuite) TestBaselineHistory() {
-	// Create baseline
-	baselineRun := suite.createTestHistoricRun("history-baseline", "history-test")
-	suite.mockStorage.On("GetHistoricRun", suite.ctx, "history-baseline").Return(baselineRun, nil)
-
-	_, err := suite.manager.SetBaseline(suite.ctx, "history-baseline", "history-baseline", "Baseline for history test")
-	require.NoError(suite.T(), err)
-
-	// Test baseline history retrieval
-	history, err := suite.manager.GetBaselineHistory(suite.ctx, "history-baseline", 7)
-
-	require.NoError(suite.T(), err)
-	assert.NotNil(suite.T(), history)
-	// Note: This test would require additional setup to create historic runs in the database
-	// For now, we verify that the method doesn't error
-}
-
-// TestStatisticalSignificance tests statistical significance calculations
-func (suite *BaselineTestSuite) TestStatisticalSignificance() {
-	// Create test data with known statistical properties
-	baseline1 := ComparisonMetric{
-		BaselineValue:  100.0,
-		CurrentValue:   110.0,
-		AbsoluteChange: 10.0,
-		PercentChange:  10.0,
-	}
-
-	baseline2 := ComparisonMetric{
-		BaselineValue:  100.0,
-		CurrentValue:   105.0,
-		AbsoluteChange: 5.0,
-		PercentChange:  5.0,
-	}
-
-	// Test significance determination
-	// 10% change should be considered significant with default thresholds
-	assert.True(suite.T(), baseline1.PercentChange >= 5.0) // Default significance threshold
-
-	// 5% change is at the boundary
-	assert.True(suite.T(), baseline2.PercentChange >= 5.0)
-}
-
-// TestConcurrentOperations tests concurrent baseline operations
-func (suite *BaselineTestSuite) TestConcurrentOperations() {
-	// Create test data
-	testRun := suite.createTestHistoricRun("concurrent-run", "concurrent-test")
-	suite.mockStorage.On("GetHistoricRun", suite.ctx, "concurrent-run").Return(testRun, nil).Maybe()
-
-	// Test concurrent baseline creation
-	done := make(chan bool, 10)
-	errors := make(chan error, 10)
-
-	for i := 0; i < 10; i++ {
-		go func(id int) {
-			defer func() { done <- true }()
-
-			baselineName := fmt.Sprintf("concurrent-baseline-%d", id)
-			_, err := suite.manager.SetBaseline(suite.ctx, "concurrent-run", baselineName, "Concurrent test")
-			if err != nil {
-				errors <- err
-				return
-			}
-
-			// Try to retrieve the baseline
-			_, err = suite.manager.GetBaseline(suite.ctx, baselineName)
-			if err != nil {
-				errors <- err
-			}
-		}(i)
-	}
-
-	// Wait for all goroutines to complete
-	for i := 0; i < 10; i++ {
-		<-done
-	}
-
-	close(errors)
-	for err := range errors {
-		suite.T().Errorf("Concurrent operation failed: %v", err)
-	}
-}
-
-// TestBaselineUpdate tests baseline update functionality
-func (suite *BaselineTestSuite) TestBaselineUpdate() {
-	// Create initial baseline
-	testRun1 := suite.createTestHistoricRun("update-run-1", "update-test")
-	testRun2 := suite.createTestHistoricRun("update-run-2", "update-test")
-
-	suite.mockStorage.On("GetHistoricRun", suite.ctx, "update-run-1").Return(testRun1, nil)
-	suite.mockStorage.On("GetHistoricRun", suite.ctx, "update-run-2").Return(testRun2, nil)
-
-	// Create initial baseline
-	baseline1, err := suite.manager.SetBaseline(suite.ctx, "update-run-1", "update-baseline", "Initial baseline")
-	require.NoError(suite.T(), err)
-	assert.Equal(suite.T(), "update-run-1", baseline1.RunID)
-
-	// Update baseline with new run
-	baseline2, err := suite.manager.SetBaseline(suite.ctx, "update-run-2", "update-baseline", "Updated baseline")
-	require.NoError(suite.T(), err)
-	assert.Equal(suite.T(), "update-run-2", baseline2.RunID)
-	assert.Equal(suite.T(), "Updated baseline", baseline2.Description)
-
-	// Verify only one baseline exists with the name
-	retrieved, err := suite.manager.GetBaseline(suite.ctx, "update-baseline")
-	require.NoError(suite.T(), err)
-	assert.Equal(suite.T(), "update-run-2", retrieved.RunID)
-}
-
-// TestEdgeCases tests various edge cases
-func (suite *BaselineTestSuite) TestEdgeCases() {
-	// Test with empty test name
-	emptyRun := suite.createTestHistoricRun("empty-run", "")
-	suite.mockStorage.On("GetHistoricRun", suite.ctx, "empty-run").Return(emptyRun, nil)
-
-	baseline, err := suite.manager.SetBaseline(suite.ctx, "empty-run", "empty-baseline", "Empty test name")
-	require.NoError(suite.T(), err)
-	assert.Equal(suite.T(), "", baseline.TestName)
-
-	// Test with zero metrics
-	zeroRun := suite.createTestHistoricRun("zero-run", "zero-test")
-	zeroRun.AvgLatencyMs = 0
-	zeroRun.P95LatencyMs = 0
-	zeroRun.TotalRequests = 0
-	zeroRun.TotalErrors = 0
-
-	suite.mockStorage.On("GetHistoricRun", suite.ctx, "zero-run").Return(zeroRun, nil)
-
-	zeroBaseline, err := suite.manager.SetBaseline(suite.ctx, "zero-run", "zero-baseline", "Zero metrics")
-	require.NoError(suite.T(), err)
-	assert.Equal(suite.T(), 0.0, zeroBaseline.BaselineMetrics.AvgLatencyMs)
-}
-
-// TestPerformanceComparison tests performance metric comparison accuracy
-func (suite *BaselineTestSuite) TestPerformanceComparison() {
-	// Test data with known mathematical relationships
-	testCases := []struct {
-		name           string
-		baselineValue  float64
-		currentValue   float64
-		expectedChange float64
-		isImprovement  bool
-	}{
-		{
-			name:           "50% increase",
-			baselineValue:  100.0,
-			currentValue:   150.0,
-			expectedChange: 50.0,
-			isImprovement:  false, // For latency, increase is bad
-		},
-		{
-			name:           "25% decrease",
-			baselineValue:  200.0,
-			currentValue:   150.0,
-			expectedChange: -25.0,
-			isImprovement:  true, // For latency, decrease is good
-		},
-		{
-			name:           "No change",
-			baselineValue:  100.0,
-			currentValue:   100.0,
-			expectedChange: 0.0,
-			isImprovement:  false,
-		},
-		{
-			name:           "Small increase",
-			baselineValue:  100.0,
-			currentValue:   103.0,
-			expectedChange: 3.0,
-			isImprovement:  false,
-		},
-	}
-
-	for _, tc := range testCases {
-		suite.T().Run(tc.name, func(t *testing.T) {
-			change := calculatePercentChange(tc.baselineValue, tc.currentValue)
-			assert.InDelta(t, tc.expectedChange, change, 0.001)
-
-			// For latency metrics, lower is better
-			isImprovement := tc.currentValue < tc.baselineValue
-			assert.Equal(t, tc.isImprovement, isImprovement)
-		})
-	}
-}
-
-// Helper functions for creating test data
-
-func (suite *BaselineTestSuite) createTestHistoricRun(runID, testName string) *types.HistoricRun {
-	result := suite.createTestBenchmarkResult()
-	resultJSON, _ := json.Marshal(result)
-
-	return &types.HistoricRun{
-		ID:               runID,
-		TestName:         testName,
-		Description:      "Test run for baseline testing",
-		GitCommit:        "abc123",
-		GitBranch:        "main",
-		Tags:             []string{"test", "baseline"},
-		Timestamp:        time.Now(),
-		StartTime:        time.Now().Add(-10 * time.Minute),
-		EndTime:          time.Now(),
-		Duration:         "10m",
-		TotalRequests:    1000,
-		TotalErrors:      20,
-		OverallErrorRate: 0.02,
-		AvgLatencyMs:     150.0,
-		P95LatencyMs:     300.0,
-		P99LatencyMs:     500.0,
-		MaxLatencyMs:     1000.0,
-		PerformanceScores: map[string]float64{
-			"geth":       85.0,
-			"nethermind": 82.0,
-		},
-		BestClient:  "geth",
-		FullResults: resultJSON,
-		Environment: types.EnvironmentInfo{
-			OS:        "linux",
-			Arch:      "amd64",
-			GoVersion: "1.21.0",
-		},
-		Notes:     "Test notes",
-		CreatedAt: time.Now(),
-		UpdatedAt: time.Now(),
-	}
-}
-
-func (suite *BaselineTestSuite) createTestBenchmarkResult() *types.BenchmarkResult {
-	return &types.BenchmarkResult{
-		TestName:  "test-benchmark",
-		StartTime: time.Now().Add(-10 * time.Minute),
-		EndTime:   time.Now(),
-		Duration:  time.Minute * 10,
-		ClientMetrics: map[string]*types.ClientMetrics{
-			"geth": {
-				Name:          "geth",
-				TotalRequests: 500,
-				TotalErrors:   10,
-				ErrorRate:     0.02,
-				Latency: types.LatencyMetrics{
-					Avg:        150.0,
-					P50:        120.0,
-					P95:        300.0,
-					P99:        500.0,
-					Max:        1000.0,
-					Throughput: 50.0,
-				},
-				Methods: map[string]types.MetricSummary{
-					"eth_getBalance": {
-						Count:      250,
-						ErrorRate:  0.01,
-						Avg:        140.0,
-						P50:        110.0,
-						P95:        280.0,
-						P99:        480.0,
-						Max:        900.0,
-						Throughput: 25.0,
-					},
-					"eth_getBlockByNumber": {
-						Count:      250,
-						ErrorRate:  0.03,
-						Avg:        160.0,
-						P50:        130.0,
-						P95:        320.0,
-						P99:        520.0,
-						Max:        1000.0,
-						Throughput: 25.0,
-					},
-				},
-			},
-			"nethermind": {
-				Name:          "nethermind",
-				TotalRequests: 500,
-				TotalErrors:   10,
-				ErrorRate:     0.02,
-				Latency: types.LatencyMetrics{
-					Avg:        155.0,
-					P50:        125.0,
-					P95:        310.0,
-					P99:        510.0,
-					Max:        1050.0,
-					Throughput: 48.0,
-				},
-				Methods: map[string]types.MetricSummary{
-					"eth_getBalance": {
-						Count:      250,
-						ErrorRate:  0.015,
-						Avg:        145.0,
-						P50:        115.0,
-						P95:        290.0,
-						P99:        490.0,
-						Max:        950.0,
-						Throughput: 24.0,
-					},
-					"eth_getBlockByNumber": {
-						Count:      250,
-						ErrorRate:  0.025,
-						Avg:        165.0,
-						P50:        135.0,
-						P95:        330.0,
-						P99:        530.0,
-						Max:        1050.0,
-						Throughput: 24.0,
-					},
-				},
-			},
-		},
-	}
-}
-
-// Benchmark tests for performance validation
-func BenchmarkBaselineCreation(b *testing.B) {
-	mockStorage := new(MockHistoricStorage)
-	db, _ := sql.Open("sqlite3", ":memory:")
-	defer db.Close()
-
-	logger := logrus.New()
-	logger.SetLevel(logrus.ErrorLevel)
-	manager := NewBaselineManager(mockStorage, db, logger)
-	manager.Start(context.Background())
-
-	testRun := &types.HistoricRun{
-		ID:               "bench-run",
-		TestName:         "benchmark-test",
-		OverallErrorRate: 0.02,
-		AvgLatencyMs:     150.0,
-		P95LatencyMs:     300.0,
-		TotalRequests:    1000,
-		TotalErrors:      20,
-		FullResults:      json.RawMessage(`{"client_metrics":{}}`),
-		Timestamp:        time.Now(),
-	}
-
-	mockStorage.On("GetHistoricRun", mock.Anything, "bench-run").Return(testRun, nil)
-
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		baselineName := fmt.Sprintf("bench-baseline-%d", i)
-		_, err := manager.SetBaseline(context.Background(), "bench-run", baselineName, "Benchmark baseline")
-		if err != nil {
-			b.Fatal(err)
-		}
-	}
-}
-
-func BenchmarkBaselineComparison(b *testing.B) {
-	mockStorage := new(MockHistoricStorage)
-	db, _ := sql.Open("sqlite3", ":memory:")
-	defer db.Close()
-
-	logger := logrus.New()
-	logger.SetLevel(logrus.ErrorLevel)
-	manager := NewBaselineManager(mockStorage, db, logger)
-	manager.Start(context.Background())
-
-	// Create test data
-	result := &types.BenchmarkResult{
-		ClientMetrics: map[string]*types.ClientMetrics{
-			"test-client": {
-				Name:      "test-client",
-				ErrorRate: 0.02,
-				Latency: types.LatencyMetrics{
-					Avg: 150.0,
-					P95: 300.0,
-					P99: 500.0,
-				},
-			},
-		},
-	}
-	resultJSON, _ := json.Marshal(result)
-
-	baselineRun := &types.HistoricRun{
-		ID:               "baseline-run",
-		TestName:         "benchmark-test",
-		OverallErrorRate: 0.02,
-		AvgLatencyMs:     150.0,
-		P95LatencyMs:     300.0,
-		TotalRequests:    1000,
-		TotalErrors:      20,
-		FullResults:      resultJSON,
-		Timestamp:        time.Now(),
-	}
-
-	currentRun := &types.HistoricRun{
-		ID:               "current-run",
-		TestName:         "benchmark-test",
-		OverallErrorRate: 0.03,
-		AvgLatencyMs:     180.0,
-		P95LatencyMs:     360.0,
-		TotalRequests:    1000,
-		TotalErrors:      30,
-		FullResults:      resultJSON,
-		Timestamp:        time.Now(),
-	}
-
-	mockStorage.On("GetHistoricRun", mock.Anything, "baseline-run").Return(baselineRun, nil)
-	mockStorage.On("GetHistoricRun", mock.Anything, "current-run").Return(currentRun, nil)
-
-	// Create baseline
-	_, err := manager.SetBaseline(context.Background(), "baseline-run", "bench-baseline", "Benchmark baseline")
-	if err != nil {
-		b.Fatal(err)
-	}
-
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		_, err := manager.CompareToBaseline(context.Background(), "current-run", "bench-baseline")
-		if err != nil {
-			b.Fatal(err)
-		}
-	}
-}
-
-// Run the test suite
-func TestBaselineTestSuite(t *testing.T) {
-	suite.Run(t, new(BaselineTestSuite))
-}
-
-// Test mathematical accuracy of calculations
-func TestCalculatePercentChange(t *testing.T) {
-	testCases := []struct {
-		baseline float64
-		current  float64
-		expected float64
-	}{
-		{100.0, 150.0, 50.0},
-		{100.0, 50.0, -50.0},
-		{200.0, 220.0, 10.0},
-		{50.0, 45.0, -10.0},
-		{0.0, 10.0, 0.0}, // Division by zero case
-		{100.0, 100.0, 0.0},
-	}
-
-	for _, tc := range testCases {
-		result := calculatePercentChange(tc.baseline, tc.current)
-		assert.InDelta(t, tc.expected, result, 0.001)
-	}
-}
-
-func TestCalculateOverallScore(t *testing.T) {
-	// Test score calculation with known inputs
-	score1 := calculateOverallScore(0.01, 100.0, 1000.0) // Low error, good latency, good throughput
-	score2 := calculateOverallScore(0.1, 500.0, 100.0)   // High error, bad latency, poor throughput
-
-	assert.True(t, score1 > score2, "Better metrics should yield higher score")
-	assert.True(t, score1 <= 100.0, "Score should not exceed 100")
-	assert.True(t, score2 >= 0.0, "Score should not be negative")
-}
diff --git a/runner/analysis/regression.go b/runner/analysis/regression.go
index f12e9e8..660bd31 100644
--- a/runner/analysis/regression.go
+++ b/runner/analysis/regression.go
@@ -13,6 +13,7 @@ import (
 	"github.com/google/uuid"
 	"github.com/sirupsen/logrus"
 
+	"github.com/jsonrpc-bench/runner/internal/sanitize"
 	"github.com/jsonrpc-bench/runner/storage"
 	"github.com/jsonrpc-bench/runner/types"
 )
@@ -351,8 +352,8 @@ func (rd *regressionDetector) GetThresholds() map[string]RegressionThreshold {
 // DetectRegressions performs comprehensive regression detection on a run
 func (rd *regressionDetector) DetectRegressions(ctx context.Context, runID string, options DetectionOptions) (*RegressionReport, error) {
 	rd.log.WithFields(logrus.Fields{
-		"run_id":          runID,
-		"comparison_mode": options.ComparisonMode,
+		"run_id":          sanitize.LogValue(runID),
+		"comparison_mode": sanitize.LogValue(options.ComparisonMode),
 	}).Info("Detecting regressions")
 
 	// Get the run
@@ -453,7 +454,7 @@ func (rd *regressionDetector) DetectRegressions(ctx context.Context, runID strin
 	}
 
 	rd.log.WithFields(logrus.Fields{
-		"run_id":            runID,
+		"run_id":            sanitize.LogValue(runID),
 		"regressions_found": len(regressions),
 		"improvements":      len(improvements),
 		"overall_health":    report.Summary.OverallHealthScore,
@@ -465,7 +466,7 @@ func (rd *regressionDetector) DetectRegressions(ctx context.Context, runID strin
 
 // AnalyzeRun performs comprehensive analysis of a single run
 func (rd *regressionDetector) AnalyzeRun(ctx context.Context, runID string) (*RunAnalysis, error) {
-	rd.log.WithField("run_id", runID).Info("Analyzing run")
+	rd.log.WithField("run_id", sanitize.LogValue(runID)).Info("Analyzing run")
 
 	// Get the run
 	run, err := rd.storage.GetHistoricRun(ctx, runID)
@@ -512,7 +513,7 @@ func (rd *regressionDetector) AnalyzeRun(ctx context.Context, runID string) (*Ru
 	analysis.ComparisonHistory = rd.getComparisonHistory(ctx, runID, 5)
 
 	rd.log.WithFields(logrus.Fields{
-		"run_id":        runID,
+		"run_id":        sanitize.LogValue(runID),
 		"health_score":  analysis.OverallHealthScore,
 		"perf_score":    analysis.PerformanceScore,
 		"anomalies":     len(analysis.Anomalies),
@@ -559,7 +560,7 @@ func (rd *regressionDetector) ClassifyRegression(regression *types.Regression) s
 // CompareToSequential compares against previous sequential runs
 func (rd *regressionDetector) CompareToSequential(ctx context.Context, runID string, lookback int) ([]*types.Regression, error) {
 	rd.log.WithFields(logrus.Fields{
-		"run_id":   runID,
+		"run_id":   sanitize.LogValue(runID),
 		"lookback": lookback,
 	}).Info("Comparing to sequential runs")
 
@@ -611,8 +612,8 @@ func (rd *regressionDetector) CompareToSequential(ctx context.Context, runID str
 // CompareToBaseline compares against a specific baseline
 func (rd *regressionDetector) CompareToBaseline(ctx context.Context, runID, baselineName string) ([]*types.Regression, error) {
 	rd.log.WithFields(logrus.Fields{
-		"run_id":        runID,
-		"baseline_name": baselineName,
+		"run_id":        sanitize.LogValue(runID),
+		"baseline_name": sanitize.LogValue(baselineName),
 	}).Info("Comparing to baseline")
 
 	// Use baseline manager to detect regressions
@@ -631,7 +632,7 @@ func (rd *regressionDetector) CompareToBaseline(ctx context.Context, runID, base
 // CompareToRollingAverage compares against rolling average of previous runs
 func (rd *regressionDetector) CompareToRollingAverage(ctx context.Context, runID string, windowSize int) ([]*types.Regression, error) {
 	rd.log.WithFields(logrus.Fields{
-		"run_id":      runID,
+		"run_id":      sanitize.LogValue(runID),
 		"window_size": windowSize,
 	}).Info("Comparing to rolling average")
 
@@ -768,8 +769,8 @@ func (rd *regressionDetector) GetRegressions(ctx context.Context, runID string)
 // AcknowledgeRegression marks a regression as acknowledged
 func (rd *regressionDetector) AcknowledgeRegression(ctx context.Context, regressionID, acknowledgedBy string) error {
 	rd.log.WithFields(logrus.Fields{
-		"regression_id":   regressionID,
-		"acknowledged_by": acknowledgedBy,
+		"regression_id":   sanitize.LogValue(regressionID),
+		"acknowledged_by": sanitize.LogValue(acknowledgedBy),
 	}).Info("Acknowledging regression")
 
 	query := `
diff --git a/runner/analysis/regression_test.go b/runner/analysis/regression_test.go
deleted file mode 100644
index 01bdf67..0000000
--- a/runner/analysis/regression_test.go
+++ /dev/null
@@ -1,1291 +0,0 @@
-package analysis
-
-import (
-	"context"
-	"database/sql"
-	"encoding/json"
-	"fmt"
-	"math"
-	"testing"
-	"time"
-
-	"github.com/sirupsen/logrus"
-	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/mock"
-	"github.com/stretchr/testify/require"
-	"github.com/stretchr/testify/suite"
-
-	"github.com/jsonrpc-bench/runner/types"
-)
-
-// MockBaselineManager is a mock implementation of BaselineManager
-type MockBaselineManager struct {
-	mock.Mock
-}
-
-func (m *MockBaselineManager) Start(ctx context.Context) error {
-	args := m.Called(ctx)
-	return args.Error(0)
-}
-
-func (m *MockBaselineManager) Stop() error {
-	args := m.Called()
-	return args.Error(0)
-}
-
-func (m *MockBaselineManager) SetBaseline(ctx context.Context, runID, name, description string) (*Baseline, error) {
-	args := m.Called(ctx, runID, name, description)
-	if args.Get(0) == nil {
-		return nil, args.Error(1)
-	}
-	return args.Get(0).(*Baseline), args.Error(1)
-}
-
-func (m *MockBaselineManager) GetBaseline(ctx context.Context, name string) (*Baseline, error) {
-	args := m.Called(ctx, name)
-	if args.Get(0) == nil {
-		return nil, args.Error(1)
-	}
-	return args.Get(0).(*Baseline), args.Error(1)
-}
-
-func (m *MockBaselineManager) ListBaselines(ctx context.Context, testName string) ([]*Baseline, error) {
-	args := m.Called(ctx, testName)
-	return args.Get(0).([]*Baseline), args.Error(1)
-}
-
-func (m *MockBaselineManager) DeleteBaseline(ctx context.Context, name string) error {
-	args := m.Called(ctx, name)
-	return args.Error(0)
-}
-
-func (m *MockBaselineManager) CompareToBaseline(ctx context.Context, runID, baselineName string) (*BaselineComparison, error) {
-	args := m.Called(ctx, runID, baselineName)
-	if args.Get(0) == nil {
-		return nil, args.Error(1)
-	}
-	return args.Get(0).(*BaselineComparison), args.Error(1)
-}
-
-func (m *MockBaselineManager) CompareToAllBaselines(ctx context.Context, runID string) ([]*BaselineComparison, error) {
-	args := m.Called(ctx, runID)
-	return args.Get(0).([]*BaselineComparison), args.Error(1)
-}
-
-func (m *MockBaselineManager) DetectRegressions(ctx context.Context, runID, baselineName string, thresholds RegressionThresholds) ([]*types.Regression, error) {
-	args := m.Called(ctx, runID, baselineName, thresholds)
-	return args.Get(0).([]*types.Regression), args.Error(1)
-}
-
-func (m *MockBaselineManager) GetBaselineHistory(ctx context.Context, baselineName string, days int) ([]*BaselineHistoryPoint, error) {
-	args := m.Called(ctx, baselineName, days)
-	return args.Get(0).([]*BaselineHistoryPoint), args.Error(1)
-}
-
-// RegressionTestSuite contains all regression detection tests
-type RegressionTestSuite struct {
-	suite.Suite
-	mockStorage         *MockHistoricStorage
-	mockBaselineManager *MockBaselineManager
-	mockDB              *sql.DB
-	detector            RegressionDetector
-	ctx                 context.Context
-}
-
-func (suite *RegressionTestSuite) SetupTest() {
-	suite.mockStorage = new(MockHistoricStorage)
-	suite.mockBaselineManager = new(MockBaselineManager)
-	suite.ctx = context.Background()
-
-	// Create in-memory SQLite database for testing
-	db, err := sql.Open("sqlite3", ":memory:")
-	require.NoError(suite.T(), err)
-	suite.mockDB = db
-
-	// Create regression detector
-	logger := logrus.New()
-	logger.SetLevel(logrus.ErrorLevel) // Reduce noise in tests
-	suite.detector = NewRegressionDetector(suite.mockStorage, suite.mockBaselineManager, suite.mockDB, logger)
-
-	// Start the detector
-	err = suite.detector.Start(suite.ctx)
-	require.NoError(suite.T(), err)
-}
-
-func (suite *RegressionTestSuite) TearDownTest() {
-	if suite.mockDB != nil {
-		suite.mockDB.Close()
-	}
-	suite.detector.Stop()
-	suite.mockStorage.AssertExpectations(suite.T())
-	suite.mockBaselineManager.AssertExpectations(suite.T())
-}
-
-// TestRegressionThresholds tests threshold management
-func (suite *RegressionTestSuite) TestRegressionThresholds() {
-	// Test setting custom thresholds
-	customThreshold := RegressionThreshold{
-		MinorThreshold:    3.0,
-		MajorThreshold:    8.0,
-		CriticalThreshold: 15.0,
-		MinSampleSize:     5,
-		SignificanceLevel: 0.01,
-		IsPercentage:      true,
-		Direction:         "increase",
-	}
-
-	err := suite.detector.SetThreshold("custom_latency", customThreshold)
-	require.NoError(suite.T(), err)
-
-	// Test getting thresholds
-	thresholds := suite.detector.GetThresholds()
-	assert.NotEmpty(suite.T(), thresholds)
-
-	// Verify custom threshold was set
-	retrievedThreshold, exists := thresholds["custom_latency"]
-	assert.True(suite.T(), exists)
-	assert.Equal(suite.T(), "custom_latency", retrievedThreshold.MetricName)
-	assert.Equal(suite.T(), 3.0, retrievedThreshold.MinorThreshold)
-	assert.Equal(suite.T(), 8.0, retrievedThreshold.MajorThreshold)
-	assert.Equal(suite.T(), 15.0, retrievedThreshold.CriticalThreshold)
-
-	// Verify default thresholds exist
-	defaultThreshold, exists := thresholds["default"]
-	assert.True(suite.T(), exists)
-	assert.Equal(suite.T(), 5.0, defaultThreshold.MinorThreshold)
-	assert.Equal(suite.T(), 10.0, defaultThreshold.MajorThreshold)
-	assert.Equal(suite.T(), 20.0, defaultThreshold.CriticalThreshold)
-}
-
-// TestSeverityClassification tests regression severity classification
-func (suite *RegressionTestSuite) TestSeverityClassification() {
-	testCases := []struct {
-		metric        string
-		percentChange float64
-		expected      string
-	}{
-		{"latency", 25.0, "medium"},
-		{"latency", 35.0, "high"},
-		{"latency", 55.0, "critical"},
-		{"latency", 3.0, "low"},
-		{"error_rate", 2.0, "minor"}, // Absolute values for error rate
-		{"error_rate", 7.0, "major"},
-		{"error_rate", 12.0, "critical"},
-		{"throughput", 15.0, "medium"},
-		{"throughput", 25.0, "high"},
-		{"throughput", 45.0, "critical"},
-	}
-
-	for _, tc := range testCases {
-		suite.T().Run(fmt.Sprintf("%s_%.1f", tc.metric, tc.percentChange), func(t *testing.T) {
-			severity := suite.detector.GetSeverity(tc.metric, tc.percentChange)
-			assert.Equal(t, tc.expected, severity)
-		})
-	}
-}
-
-// TestSequentialComparison tests sequential run comparison
-func (suite *RegressionTestSuite) TestSequentialComparison() {
-	// Create current run
-	currentRun := suite.createTestRun("current-run", "seq-test")
-	currentRun.AvgLatencyMs = 200.0 // Worse than previous
-	currentRun.OverallErrorRate = 0.05
-
-	// Create previous runs
-	previousRuns := []*types.HistoricRun{
-		suite.createTestRun("prev-run-1", "seq-test"), // Most recent
-		suite.createTestRun("prev-run-2", "seq-test"),
-		suite.createTestRun("prev-run-3", "seq-test"),
-	}
-
-	// Set timestamps to ensure proper ordering
-	baseTime := time.Now()
-	currentRun.Timestamp = baseTime
-	for i, run := range previousRuns {
-		run.Timestamp = baseTime.Add(time.Duration(-(i + 1)) * time.Hour)
-		run.AvgLatencyMs = 150.0 // Better baseline performance
-		run.OverallErrorRate = 0.02
-	}
-
-	// Mock storage expectations
-	suite.mockStorage.On("GetHistoricRun", suite.ctx, "current-run").Return(currentRun, nil)
-	allRuns := append([]*types.HistoricRun{currentRun}, previousRuns...)
-	suite.mockStorage.On("ListHistoricRuns", suite.ctx, "seq-test", 4).Return(allRuns, nil)
-
-	// Test sequential comparison
-	options := DetectionOptions{
-		ComparisonMode: "sequential",
-		LookbackCount:  3,
-	}
-
-	regressions, err := suite.detector.CompareToSequential(suite.ctx, "current-run", 3)
-
-	require.NoError(suite.T(), err)
-	assert.NotEmpty(suite.T(), regressions)
-
-	// Verify regression detection
-	found := false
-	for _, regression := range regressions {
-		if regression.Metric == "avg_latency" {
-			found = true
-			assert.Equal(suite.T(), "current-run", regression.RunID)
-			assert.Equal(suite.T(), "prev-run-1", regression.BaselineRunID) // Most recent previous
-			assert.Equal(suite.T(), 150.0, regression.BaselineValue)
-			assert.Equal(suite.T(), 200.0, regression.CurrentValue)
-			expectedChange := ((200.0 - 150.0) / 150.0) * 100 // 33.33%
-			assert.InDelta(suite.T(), expectedChange, regression.PercentChange, 0.1)
-		}
-	}
-	assert.True(suite.T(), found, "Should detect latency regression")
-}
-
-// TestBaselineComparison tests baseline comparison mode
-func (suite *RegressionTestSuite) TestBaselineComparison() {
-	// Create test regressions that baseline manager should return
-	expectedRegressions := []*types.Regression{
-		{
-			ID:             "reg-1",
-			RunID:          "current-run",
-			BaselineRunID:  "baseline-run",
-			Client:         "geth",
-			Metric:         "avg_latency",
-			BaselineValue:  150.0,
-			CurrentValue:   225.0,
-			PercentChange:  50.0,
-			AbsoluteChange: 75.0,
-			Severity:       "critical",
-			IsSignificant:  true,
-			DetectedAt:     time.Now(),
-		},
-	}
-
-	// Mock baseline manager expectations
-	suite.mockBaselineManager.On("DetectRegressions", suite.ctx, "current-run", "test-baseline", mock.AnythingOfType("RegressionThresholds")).Return(expectedRegressions, nil)
-
-	// Test baseline comparison
-	regressions, err := suite.detector.CompareToBaseline(suite.ctx, "current-run", "test-baseline")
-
-	require.NoError(suite.T(), err)
-	assert.Len(suite.T(), regressions, 1)
-	assert.Equal(suite.T(), expectedRegressions[0].ID, regressions[0].ID)
-	assert.Equal(suite.T(), expectedRegressions[0].Severity, regressions[0].Severity)
-}
-
-// TestRollingAverageComparison tests rolling average comparison
-func (suite *RegressionTestSuite) TestRollingAverageComparison() {
-	// Create current run with worse performance
-	currentRun := suite.createTestRun("current-run", "rolling-test")
-	currentRun.AvgLatencyMs = 300.0
-	currentRun.OverallErrorRate = 0.08
-
-	// Create window of previous runs with better performance
-	windowRuns := []*types.HistoricRun{currentRun}
-	baseTime := time.Now()
-	currentRun.Timestamp = baseTime
-
-	for i := 1; i <= 5; i++ {
-		run := suite.createTestRun(fmt.Sprintf("window-run-%d", i), "rolling-test")
-		run.Timestamp = baseTime.Add(time.Duration(-i) * time.Hour)
-		run.AvgLatencyMs = 150.0 + float64(i)*2.0 // Slight variation around 150ms
-		run.OverallErrorRate = 0.02 + float64(i)*0.001
-		windowRuns = append(windowRuns, run)
-	}
-
-	// Mock storage expectations
-	suite.mockStorage.On("GetHistoricRun", suite.ctx, "current-run").Return(currentRun, nil)
-	suite.mockStorage.On("ListHistoricRuns", suite.ctx, "rolling-test", 6).Return(windowRuns, nil)
-
-	// Test rolling average comparison
-	regressions, err := suite.detector.CompareToRollingAverage(suite.ctx, "current-run", 5)
-
-	require.NoError(suite.T(), err)
-	assert.NotEmpty(suite.T(), regressions)
-
-	// Verify regression against rolling average
-	foundLatencyRegression := false
-	for _, regression := range regressions {
-		if regression.Metric == "avg_latency" {
-			foundLatencyRegression = true
-			assert.Equal(suite.T(), "current-run", regression.RunID)
-			assert.Equal(suite.T(), "rolling_average", regression.BaselineRunID)
-			// Rolling average should be around 156ms ((150+152+154+156+158)/5)
-			assert.InDelta(suite.T(), 154.0, regression.BaselineValue, 5.0)
-			assert.Equal(suite.T(), 300.0, regression.CurrentValue)
-			// Should be a significant regression
-			assert.True(suite.T(), regression.PercentChange > 50.0)
-		}
-	}
-	assert.True(suite.T(), foundLatencyRegression, "Should detect latency regression against rolling average")
-}
-
-// TestComprehensiveRegressionDetection tests the main regression detection workflow
-func (suite *RegressionTestSuite) TestComprehensiveRegressionDetection() {
-	// Create test run with multiple regressions
-	currentRun := suite.createTestRunWithRegressions("comprehensive-run", "comprehensive-test")
-
-	// Mock storage expectations
-	suite.mockStorage.On("GetHistoricRun", suite.ctx, "comprehensive-run").Return(currentRun, nil)
-
-	// Create previous runs for comparison
-	previousRuns := []*types.HistoricRun{
-		suite.createTestRun("prev-run", "comprehensive-test"),
-	}
-	previousRuns[0].Timestamp = time.Now().Add(-1 * time.Hour)
-
-	suite.mockStorage.On("ListHistoricRuns", suite.ctx, "comprehensive-test", 2).Return(append([]*types.HistoricRun{currentRun}, previousRuns...), nil)
-
-	// Test comprehensive regression detection
-	options := DetectionOptions{
-		ComparisonMode:     "sequential",
-		LookbackCount:      1,
-		EnableStatistical:  true,
-		MinConfidence:      0.5,
-		IgnoreImprovements: false,
-	}
-
-	report, err := suite.detector.DetectRegressions(suite.ctx, "comprehensive-run", options)
-
-	require.NoError(suite.T(), err)
-	assert.NotNil(suite.T(), report)
-	assert.Equal(suite.T(), "comprehensive-run", report.RunID)
-	assert.Equal(suite.T(), "comprehensive-test", report.TestName)
-	assert.Equal(suite.T(), "sequential", report.ComparisonMode)
-
-	// Verify summary statistics
-	assert.True(suite.T(), report.Summary.TotalRegressions > 0)
-	assert.True(suite.T(), report.Summary.OverallHealthScore < 100.0)
-	assert.NotEmpty(suite.T(), report.Summary.RecommendedAction)
-
-	// Verify client analysis
-	assert.NotEmpty(suite.T(), report.ClientAnalysis)
-	for clientName, analysis := range report.ClientAnalysis {
-		assert.NotEmpty(suite.T(), clientName)
-		assert.NotNil(suite.T(), analysis)
-		assert.True(suite.T(), analysis.HealthScore >= 0.0 && analysis.HealthScore <= 100.0)
-		assert.Contains(suite.T(), []string{"improved", "degraded", "stable"}, analysis.OverallStatus)
-		assert.Contains(suite.T(), []string{"low", "medium", "high", "critical"}, analysis.RiskLevel)
-	}
-
-	// Verify risk assessment
-	assert.NotNil(suite.T(), report.RiskAssessment)
-	assert.Contains(suite.T(), []string{"low", "medium", "high", "critical"}, report.RiskAssessment.OverallRisk)
-	assert.True(suite.T(), report.RiskAssessment.RiskScore >= 0.0 && report.RiskAssessment.RiskScore <= 100.0)
-	assert.NotEmpty(suite.T(), report.RiskAssessment.ImpactAssessment)
-
-	// Verify recommendations
-	assert.NotEmpty(suite.T(), report.Recommendations)
-}
-
-// TestRunAnalysis tests comprehensive run analysis
-func (suite *RegressionTestSuite) TestRunAnalysis() {
-	// Create test run for analysis
-	testRun := suite.createTestRunWithMetrics("analysis-run", "analysis-test")
-
-	// Mock storage expectations
-	suite.mockStorage.On("GetHistoricRun", suite.ctx, "analysis-run").Return(testRun, nil)
-
-	// Mock trend data (optional)
-	trendData := &types.HistoricTrend{
-		TestName:   "analysis-test",
-		Client:     "overall",
-		Metric:     "avg_latency",
-		Points:     []types.TrendPoint{},
-		Trend:      "stable",
-		TrendSlope: 0.1,
-		R2:         0.6,
-	}
-	suite.mockStorage.On("GetHistoricTrends", suite.ctx, "analysis-test", "overall", "avg_latency", 30).Return(trendData, nil)
-
-	// Test run analysis
-	analysis, err := suite.detector.AnalyzeRun(suite.ctx, "analysis-run")
-
-	require.NoError(suite.T(), err)
-	assert.NotNil(suite.T(), analysis)
-	assert.Equal(suite.T(), "analysis-run", analysis.RunID)
-	assert.Equal(suite.T(), "analysis-test", analysis.TestName)
-
-	// Verify health and performance scores
-	assert.True(suite.T(), analysis.OverallHealthScore >= 0.0 && analysis.OverallHealthScore <= 100.0)
-	assert.True(suite.T(), analysis.PerformanceScore >= 0.0 && analysis.PerformanceScore <= 100.0)
-
-	// Verify client scores
-	assert.NotEmpty(suite.T(), analysis.ClientScores)
-	for client, score := range analysis.ClientScores {
-		assert.NotEmpty(suite.T(), client)
-		assert.True(suite.T(), score >= 0.0 && score <= 100.0)
-	}
-
-	// Verify quality metrics
-	assert.NotNil(suite.T(), analysis.QualityMetrics)
-	assert.True(suite.T(), analysis.QualityMetrics.OverallQuality >= 0.0 && analysis.QualityMetrics.OverallQuality <= 1.0)
-	assert.True(suite.T(), analysis.QualityMetrics.DataCompleteness >= 0.0 && analysis.QualityMetrics.DataCompleteness <= 1.0)
-	assert.True(suite.T(), analysis.QualityMetrics.ReliabilityScore >= 0.0 && analysis.QualityMetrics.ReliabilityScore <= 1.0)
-
-	// Verify anomaly detection
-	assert.NotNil(suite.T(), analysis.Anomalies)
-
-	// Verify trend analysis
-	assert.NotNil(suite.T(), analysis.Trends)
-
-	// Verify recommendations
-	assert.NotNil(suite.T(), analysis.Recommendations)
-}
-
-// TestStatisticalSignificance tests statistical significance calculations
-func (suite *RegressionTestSuite) TestStatisticalSignificance() {
-	// Test with statistically significant changes
-	significantCases := []struct {
-		name          string
-		baselineValue float64
-		currentValue  float64
-		sampleSize    int
-		expectedSig   bool
-	}{
-		{
-			name:          "large_change_large_sample",
-			baselineValue: 100.0,
-			currentValue:  150.0, // 50% increase
-			sampleSize:    100,
-			expectedSig:   true,
-		},
-		{
-			name:          "small_change_large_sample",
-			baselineValue: 100.0,
-			currentValue:  102.0, // 2% increase
-			sampleSize:    1000,
-			expectedSig:   false, // Small change, even with large sample
-		},
-		{
-			name:          "large_change_small_sample",
-			baselineValue: 100.0,
-			currentValue:  200.0, // 100% increase
-			sampleSize:    5,
-			expectedSig:   true, // Very large change
-		},
-	}
-
-	for _, tc := range significantCases {
-		suite.T().Run(tc.name, func(t *testing.T) {
-			percentChange := ((tc.currentValue - tc.baselineValue) / tc.baselineValue) * 100
-
-			// Simple significance test based on magnitude and threshold
-			isSignificant := math.Abs(percentChange) > 10.0 // 10% threshold for significance
-
-			if tc.expectedSig {
-				assert.True(t, isSignificant || math.Abs(percentChange) > 25.0)
-			}
-		})
-	}
-}
-
-// TestCustomThresholds tests custom threshold application
-func (suite *RegressionTestSuite) TestCustomThresholds() {
-	// Create test run
-	currentRun := suite.createTestRun("custom-run", "custom-test")
-	currentRun.AvgLatencyMs = 108.0 // 8% increase from baseline 100ms
-
-	baselineRun := suite.createTestRun("baseline-run", "custom-test")
-	baselineRun.AvgLatencyMs = 100.0
-	baselineRun.Timestamp = time.Now().Add(-1 * time.Hour)
-
-	// Mock storage expectations
-	suite.mockStorage.On("GetHistoricRun", suite.ctx, "custom-run").Return(currentRun, nil)
-	suite.mockStorage.On("ListHistoricRuns", suite.ctx, "custom-test", 2).Return([]*types.HistoricRun{currentRun, baselineRun}, nil)
-
-	// Test with default thresholds (should NOT trigger regression for 8% change)
-	defaultOptions := DetectionOptions{
-		ComparisonMode: "sequential",
-		LookbackCount:  1,
-	}
-
-	defaultRegressions, err := suite.detector.CompareToSequential(suite.ctx, "custom-run", 1)
-	require.NoError(suite.T(), err)
-
-	// With default 10% threshold, 8% change should not be detected
-	foundRegression := false
-	for _, reg := range defaultRegressions {
-		if reg.Metric == "avg_latency" && reg.Severity != "low" {
-			foundRegression = true
-		}
-	}
-	assert.False(suite.T(), foundRegression, "8% change should not trigger regression with default 10% threshold")
-
-	// Test with custom lower thresholds (should trigger regression for 8% change)
-	customThresholds := map[string]RegressionThreshold{
-		"avg_latency": {
-			MinorThreshold:    5.0, // Lower threshold
-			MajorThreshold:    8.0,
-			CriticalThreshold: 15.0,
-			IsPercentage:      true,
-			Direction:         "increase",
-		},
-	}
-
-	customOptions := DetectionOptions{
-		ComparisonMode:   "sequential",
-		LookbackCount:    1,
-		CustomThresholds: customThresholds,
-	}
-
-	// Create fresh mocks for second call
-	suite.mockStorage.On("GetHistoricRun", suite.ctx, "custom-run").Return(currentRun, nil)
-	suite.mockStorage.On("ListHistoricRuns", suite.ctx, "custom-test", 2).Return([]*types.HistoricRun{currentRun, baselineRun}, nil)
-
-	report, err := suite.detector.DetectRegressions(suite.ctx, "custom-run", customOptions)
-	require.NoError(suite.T(), err)
-
-	// With custom 5% threshold, 8% change should be detected
-	foundMajorRegression := false
-	for _, reg := range report.Regressions {
-		if reg.Metric == "avg_latency" && reg.Severity == "major" {
-			foundMajorRegression = true
-		}
-	}
-	assert.True(suite.T(), foundMajorRegression, "8% change should trigger major regression with 5% threshold")
-}
-
-// TestClientMethodFiltering tests client and method filtering
-func (suite *RegressionTestSuite) TestClientMethodFiltering() {
-	// Create test run with multiple clients
-	currentRun := suite.createTestRunWithMultipleClients("filter-run", "filter-test")
-	previousRun := suite.createTestRunWithMultipleClients("prev-run", "filter-test")
-	previousRun.Timestamp = time.Now().Add(-1 * time.Hour)
-
-	// Make some metrics worse to trigger regressions
-	currentResult := suite.parseFullResults(currentRun.FullResults)
-	if gethMetrics, exists := currentResult.ClientMetrics["geth"]; exists {
-		gethMetrics.Latency.Avg = 250.0 // Worse than baseline ~150ms
-		gethMetrics.ErrorRate = 0.08    // Worse than baseline ~0.02
-	}
-	if nethermindMetrics, exists := currentResult.ClientMetrics["nethermind"]; exists {
-		nethermindMetrics.Latency.Avg = 200.0 // Also worse
-		nethermindMetrics.ErrorRate = 0.06
-	}
-
-	// Update full results
-	updatedJSON, _ := json.Marshal(currentResult)
-	currentRun.FullResults = updatedJSON
-
-	// Mock storage expectations
-	suite.mockStorage.On("GetHistoricRun", suite.ctx, "filter-run").Return(currentRun, nil)
-	suite.mockStorage.On("ListHistoricRuns", suite.ctx, "filter-test", 2).Return([]*types.HistoricRun{currentRun, previousRun}, nil)
-
-	// Test with client filtering (include only geth)
-	options := DetectionOptions{
-		ComparisonMode: "sequential",
-		LookbackCount:  1,
-		IncludeClients: []string{"geth"},
-	}
-
-	report, err := suite.detector.DetectRegressions(suite.ctx, "filter-run", options)
-	require.NoError(suite.T(), err)
-
-	// Verify only geth regressions are included
-	for _, regression := range report.Regressions {
-		assert.Equal(suite.T(), "geth", regression.Client)
-	}
-
-	// Verify client analysis only includes geth
-	_, gethExists := report.ClientAnalysis["geth"]
-	_, nethermindExists := report.ClientAnalysis["nethermind"]
-	assert.True(suite.T(), gethExists)
-	assert.False(suite.T(), nethermindExists)
-
-	// Test with client exclusion
-	suite.mockStorage.On("GetHistoricRun", suite.ctx, "filter-run").Return(currentRun, nil)
-	suite.mockStorage.On("ListHistoricRuns", suite.ctx, "filter-test", 2).Return([]*types.HistoricRun{currentRun, previousRun}, nil)
-
-	optionsExclude := DetectionOptions{
-		ComparisonMode: "sequential",
-		LookbackCount:  1,
-		ExcludeClients: []string{"geth"},
-	}
-
-	reportExclude, err := suite.detector.DetectRegressions(suite.ctx, "filter-run", optionsExclude)
-	require.NoError(suite.T(), err)
-
-	// Verify geth regressions are excluded
-	for _, regression := range reportExclude.Regressions {
-		assert.NotEqual(suite.T(), "geth", regression.Client)
-	}
-}
-
-// TestConcurrentRegressionDetection tests concurrent regression detection operations
-func (suite *RegressionTestSuite) TestConcurrentRegressionDetection() {
-	// Create test data
-	testRun := suite.createTestRun("concurrent-run", "concurrent-test")
-	previousRun := suite.createTestRun("prev-run", "concurrent-test")
-	previousRun.Timestamp = time.Now().Add(-1 * time.Hour)
-
-	suite.mockStorage.On("GetHistoricRun", suite.ctx, "concurrent-run").Return(testRun, nil).Maybe()
-	suite.mockStorage.On("ListHistoricRuns", suite.ctx, "concurrent-test", mock.AnythingOfType("int")).Return([]*types.HistoricRun{testRun, previousRun}, nil).Maybe()
-
-	// Test concurrent detection operations
-	done := make(chan bool, 5)
-	errors := make(chan error, 5)
-
-	for i := 0; i < 5; i++ {
-		go func(id int) {
-			defer func() { done <- true }()
-
-			options := DetectionOptions{
-				ComparisonMode: "sequential",
-				LookbackCount:  1,
-			}
-
-			_, err := suite.detector.DetectRegressions(suite.ctx, "concurrent-run", options)
-			if err != nil {
-				errors <- err
-			}
-		}(i)
-	}
-
-	// Wait for all operations to complete
-	for i := 0; i < 5; i++ {
-		<-done
-	}
-
-	close(errors)
-	for err := range errors {
-		suite.T().Errorf("Concurrent regression detection failed: %v", err)
-	}
-}
-
-// TestRegressionPersistence tests saving and retrieving regressions
-func (suite *RegressionTestSuite) TestRegressionPersistence() {
-	// Create test regressions
-	regressions := []*types.Regression{
-		{
-			ID:             "test-regression-1",
-			RunID:          "test-run",
-			BaselineRunID:  "baseline-run",
-			Client:         "geth",
-			Metric:         "avg_latency",
-			Method:         "",
-			BaselineValue:  150.0,
-			CurrentValue:   225.0,
-			PercentChange:  50.0,
-			AbsoluteChange: 75.0,
-			Severity:       "critical",
-			IsSignificant:  true,
-			PValue:         0.001,
-			DetectedAt:     time.Now(),
-			Notes:          "Test regression",
-		},
-		{
-			ID:             "test-regression-2",
-			RunID:          "test-run",
-			BaselineRunID:  "baseline-run",
-			Client:         "nethermind",
-			Metric:         "error_rate",
-			Method:         "eth_getBalance",
-			BaselineValue:  0.02,
-			CurrentValue:   0.08,
-			PercentChange:  300.0,
-			AbsoluteChange: 0.06,
-			Severity:       "major",
-			IsSignificant:  true,
-			PValue:         0.005,
-			DetectedAt:     time.Now(),
-			Notes:          "Method-specific regression",
-		},
-	}
-
-	// Test saving regressions
-	err := suite.detector.SaveRegressions(suite.ctx, regressions)
-	require.NoError(suite.T(), err)
-
-	// Test retrieving regressions
-	retrievedRegressions, err := suite.detector.GetRegressions(suite.ctx, "test-run")
-	require.NoError(suite.T(), err)
-	assert.Len(suite.T(), retrievedRegressions, 2)
-
-	// Verify regression details
-	for _, retrieved := range retrievedRegressions {
-		var original *types.Regression
-		for _, orig := range regressions {
-			if orig.ID == retrieved.ID {
-				original = orig
-				break
-			}
-		}
-
-		require.NotNil(suite.T(), original)
-		assert.Equal(suite.T(), original.RunID, retrieved.RunID)
-		assert.Equal(suite.T(), original.BaselineRunID, retrieved.BaselineRunID)
-		assert.Equal(suite.T(), original.Client, retrieved.Client)
-		assert.Equal(suite.T(), original.Metric, retrieved.Metric)
-		assert.Equal(suite.T(), original.Method, retrieved.Method)
-		assert.InDelta(suite.T(), original.BaselineValue, retrieved.BaselineValue, 0.001)
-		assert.InDelta(suite.T(), original.CurrentValue, retrieved.CurrentValue, 0.001)
-		assert.InDelta(suite.T(), original.PercentChange, retrieved.PercentChange, 0.001)
-		assert.Equal(suite.T(), original.Severity, retrieved.Severity)
-		assert.Equal(suite.T(), original.IsSignificant, retrieved.IsSignificant)
-	}
-}
-
-// TestRegressionAcknowledgment tests regression acknowledgment functionality
-func (suite *RegressionTestSuite) TestRegressionAcknowledgment() {
-	// First save a regression
-	regression := &types.Regression{
-		ID:             "ack-test-regression",
-		RunID:          "ack-test-run",
-		BaselineRunID:  "ack-baseline-run",
-		Client:         "geth",
-		Metric:         "avg_latency",
-		BaselineValue:  150.0,
-		CurrentValue:   225.0,
-		PercentChange:  50.0,
-		AbsoluteChange: 75.0,
-		Severity:       "critical",
-		IsSignificant:  true,
-		DetectedAt:     time.Now(),
-	}
-
-	err := suite.detector.SaveRegressions(suite.ctx, []*types.Regression{regression})
-	require.NoError(suite.T(), err)
-
-	// Test acknowledging the regression
-	err = suite.detector.AcknowledgeRegression(suite.ctx, "ack-test-regression", "test-user")
-	require.NoError(suite.T(), err)
-
-	// Verify acknowledgment was saved
-	retrievedRegressions, err := suite.detector.GetRegressions(suite.ctx, "ack-test-run")
-	require.NoError(suite.T(), err)
-	assert.Len(suite.T(), retrievedRegressions, 1)
-
-	acknowledged := retrievedRegressions[0]
-	assert.NotNil(suite.T(), acknowledged.AcknowledgedAt)
-	assert.Equal(suite.T(), "test-user", acknowledged.AcknowledgedBy)
-
-	// Test acknowledging non-existent regression
-	err = suite.detector.AcknowledgeRegression(suite.ctx, "non-existent-regression", "test-user")
-	assert.Error(suite.T(), err)
-	assert.Contains(suite.T(), err.Error(), "regression not found")
-}
-
-// TestEdgeCasesAndErrorHandling tests various edge cases
-func (suite *RegressionTestSuite) TestEdgeCasesAndErrorHandling() {
-	// Test with non-existent run
-	options := DetectionOptions{
-		ComparisonMode: "sequential",
-		LookbackCount:  1,
-	}
-
-	suite.mockStorage.On("GetHistoricRun", suite.ctx, "non-existent-run").Return(nil, fmt.Errorf("run not found"))
-
-	_, err := suite.detector.DetectRegressions(suite.ctx, "non-existent-run", options)
-	assert.Error(suite.T(), err)
-	assert.Contains(suite.T(), err.Error(), "failed to get historic run")
-
-	// Test with invalid comparison mode
-	invalidOptions := DetectionOptions{
-		ComparisonMode: "invalid-mode",
-	}
-
-	testRun := suite.createTestRun("valid-run", "valid-test")
-	suite.mockStorage.On("GetHistoricRun", suite.ctx, "valid-run").Return(testRun, nil)
-
-	_, err = suite.detector.DetectRegressions(suite.ctx, "valid-run", invalidOptions)
-	assert.Error(suite.T(), err)
-	assert.Contains(suite.T(), err.Error(), "invalid comparison mode")
-
-	// Test baseline mode without baseline name
-	baselineOptions := DetectionOptions{
-		ComparisonMode: "baseline",
-		BaselineName:   "", // Missing baseline name
-	}
-
-	suite.mockStorage.On("GetHistoricRun", suite.ctx, "valid-run").Return(testRun, nil)
-
-	_, err = suite.detector.DetectRegressions(suite.ctx, "valid-run", baselineOptions)
-	assert.Error(suite.T(), err)
-	assert.Contains(suite.T(), err.Error(), "baseline name required")
-
-	// Test with insufficient historical data
-	shortOptions := DetectionOptions{
-		ComparisonMode: "sequential",
-		LookbackCount:  5,
-	}
-
-	suite.mockStorage.On("GetHistoricRun", suite.ctx, "valid-run").Return(testRun, nil)
-	suite.mockStorage.On("ListHistoricRuns", suite.ctx, "valid-test", 6).Return([]*types.HistoricRun{testRun}, nil) // Only current run
-
-	regressions, err := suite.detector.CompareToSequential(suite.ctx, "valid-run", 5)
-	require.NoError(suite.T(), err)
-	assert.Empty(suite.T(), regressions) // Should return empty list, not error
-}
-
-// Helper functions for creating test data
-
-func (suite *RegressionTestSuite) createTestRun(runID, testName string) *types.HistoricRun {
-	result := suite.createBasicBenchmarkResult()
-	resultJSON, _ := json.Marshal(result)
-
-	return &types.HistoricRun{
-		ID:               runID,
-		TestName:         testName,
-		Timestamp:        time.Now(),
-		AvgLatencyMs:     150.0,
-		P95LatencyMs:     300.0,
-		P99LatencyMs:     500.0,
-		OverallErrorRate: 0.02,
-		TotalRequests:    1000,
-		TotalErrors:      20,
-		FullResults:      resultJSON,
-		Duration:         "10m",
-	}
-}
-
-func (suite *RegressionTestSuite) createTestRunWithRegressions(runID, testName string) *types.HistoricRun {
-	run := suite.createTestRun(runID, testName)
-
-	// Modify to have worse performance
-	run.AvgLatencyMs = 250.0 // Significant increase
-	run.P95LatencyMs = 500.0
-	run.OverallErrorRate = 0.08 // Much higher error rate
-
-	// Update full results to match
-	result := suite.parseFullResults(run.FullResults)
-	for _, clientMetrics := range result.ClientMetrics {
-		clientMetrics.Latency.Avg = 250.0
-		clientMetrics.Latency.P95 = 500.0
-		clientMetrics.ErrorRate = 0.08
-	}
-	updatedJSON, _ := json.Marshal(result)
-	run.FullResults = updatedJSON
-
-	return run
-}
-
-func (suite *RegressionTestSuite) createTestRunWithMetrics(runID, testName string) *types.HistoricRun {
-	result := &types.BenchmarkResult{
-		TestName:  testName,
-		StartTime: time.Now().Add(-10 * time.Minute),
-		EndTime:   time.Now(),
-		Duration:  10 * time.Minute,
-		ClientMetrics: map[string]*types.ClientMetrics{
-			"geth": {
-				Name:          "geth",
-				TotalRequests: 500,
-				TotalErrors:   10,
-				ErrorRate:     0.02,
-				Latency: types.LatencyMetrics{
-					Avg:        150.0,
-					P50:        120.0,
-					P95:        300.0,
-					P99:        500.0,
-					Max:        1000.0,
-					Throughput: 50.0,
-				},
-			},
-			"nethermind": {
-				Name:          "nethermind",
-				TotalRequests: 500,
-				TotalErrors:   15,
-				ErrorRate:     0.03,
-				Latency: types.LatencyMetrics{
-					Avg:        160.0,
-					P50:        130.0,
-					P95:        320.0,
-					P99:        520.0,
-					Max:        1100.0,
-					Throughput: 48.0,
-				},
-			},
-		},
-	}
-
-	resultJSON, _ := json.Marshal(result)
-
-	return &types.HistoricRun{
-		ID:               runID,
-		TestName:         testName,
-		Timestamp:        time.Now(),
-		AvgLatencyMs:     155.0, // Average of clients
-		P95LatencyMs:     310.0,
-		P99LatencyMs:     510.0,
-		OverallErrorRate: 0.025,
-		TotalRequests:    1000,
-		TotalErrors:      25,
-		FullResults:      resultJSON,
-		Duration:         "10m",
-	}
-}
-
-func (suite *RegressionTestSuite) createTestRunWithMultipleClients(runID, testName string) *types.HistoricRun {
-	result := &types.BenchmarkResult{
-		ClientMetrics: map[string]*types.ClientMetrics{
-			"geth": {
-				Name:          "geth",
-				TotalRequests: 333,
-				TotalErrors:   7,
-				ErrorRate:     0.02,
-				Latency: types.LatencyMetrics{
-					Avg:        150.0,
-					P95:        300.0,
-					P99:        500.0,
-					Throughput: 33.3,
-				},
-				Methods: map[string]types.MetricSummary{
-					"eth_getBalance": {
-						Count:      167,
-						ErrorRate:  0.015,
-						Avg:        140.0,
-						P95:        280.0,
-						Throughput: 16.7,
-					},
-					"eth_getBlockByNumber": {
-						Count:      166,
-						ErrorRate:  0.025,
-						Avg:        160.0,
-						P95:        320.0,
-						Throughput: 16.6,
-					},
-				},
-			},
-			"nethermind": {
-				Name:          "nethermind",
-				TotalRequests: 333,
-				TotalErrors:   10,
-				ErrorRate:     0.03,
-				Latency: types.LatencyMetrics{
-					Avg:        155.0,
-					P95:        310.0,
-					P99:        510.0,
-					Throughput: 33.3,
-				},
-				Methods: map[string]types.MetricSummary{
-					"eth_getBalance": {
-						Count:      167,
-						ErrorRate:  0.025,
-						Avg:        145.0,
-						P95:        290.0,
-						Throughput: 16.7,
-					},
-					"eth_getBlockByNumber": {
-						Count:      166,
-						ErrorRate:  0.035,
-						Avg:        165.0,
-						P95:        330.0,
-						Throughput: 16.6,
-					},
-				},
-			},
-			"erigon": {
-				Name:          "erigon",
-				TotalRequests: 334,
-				TotalErrors:   8,
-				ErrorRate:     0.024,
-				Latency: types.LatencyMetrics{
-					Avg:        148.0,
-					P95:        295.0,
-					P99:        495.0,
-					Throughput: 33.4,
-				},
-				Methods: map[string]types.MetricSummary{
-					"eth_getBalance": {
-						Count:      167,
-						ErrorRate:  0.018,
-						Avg:        138.0,
-						P95:        275.0,
-						Throughput: 16.7,
-					},
-					"eth_getBlockByNumber": {
-						Count:      167,
-						ErrorRate:  0.030,
-						Avg:        158.0,
-						P95:        315.0,
-						Throughput: 16.7,
-					},
-				},
-			},
-		},
-	}
-
-	resultJSON, _ := json.Marshal(result)
-
-	return &types.HistoricRun{
-		ID:               runID,
-		TestName:         testName,
-		Timestamp:        time.Now(),
-		AvgLatencyMs:     151.0, // Average across clients
-		P95LatencyMs:     301.7,
-		P99LatencyMs:     501.7,
-		OverallErrorRate: 0.025,
-		TotalRequests:    1000,
-		TotalErrors:      25,
-		FullResults:      resultJSON,
-		Duration:         "10m",
-	}
-}
-
-func (suite *RegressionTestSuite) createBasicBenchmarkResult() *types.BenchmarkResult {
-	return &types.BenchmarkResult{
-		ClientMetrics: map[string]*types.ClientMetrics{
-			"geth": {
-				Name:          "geth",
-				TotalRequests: 500,
-				TotalErrors:   10,
-				ErrorRate:     0.02,
-				Latency: types.LatencyMetrics{
-					Avg:        150.0,
-					P95:        300.0,
-					P99:        500.0,
-					Throughput: 50.0,
-				},
-			},
-			"nethermind": {
-				Name:          "nethermind",
-				TotalRequests: 500,
-				TotalErrors:   10,
-				ErrorRate:     0.02,
-				Latency: types.LatencyMetrics{
-					Avg:        150.0,
-					P95:        300.0,
-					P99:        500.0,
-					Throughput: 50.0,
-				},
-			},
-		},
-	}
-}
-
-func (suite *RegressionTestSuite) parseFullResults(fullResults json.RawMessage) *types.BenchmarkResult {
-	var result types.BenchmarkResult
-	json.Unmarshal(fullResults, &result)
-	return &result
-}
-
-// Benchmark tests for performance validation
-
-func BenchmarkRegressionDetection(b *testing.B) {
-	mockStorage := new(MockHistoricStorage)
-	mockBaselineManager := new(MockBaselineManager)
-	db, _ := sql.Open("sqlite3", ":memory:")
-	defer db.Close()
-
-	logger := logrus.New()
-	logger.SetLevel(logrus.ErrorLevel)
-	detector := NewRegressionDetector(mockStorage, mockBaselineManager, db, logger)
-	detector.Start(context.Background())
-
-	// Create test data
-	currentRun := &types.HistoricRun{
-		ID:               "bench-current",
-		TestName:         "benchmark-test",
-		Timestamp:        time.Now(),
-		AvgLatencyMs:     200.0,
-		OverallErrorRate: 0.05,
-		FullResults:      json.RawMessage(`{"client_metrics":{"test":{"latency":{"avg":200},"error_rate":0.05}}}`),
-	}
-
-	previousRun := &types.HistoricRun{
-		ID:               "bench-previous",
-		TestName:         "benchmark-test",
-		Timestamp:        time.Now().Add(-1 * time.Hour),
-		AvgLatencyMs:     150.0,
-		OverallErrorRate: 0.02,
-		FullResults:      json.RawMessage(`{"client_metrics":{"test":{"latency":{"avg":150},"error_rate":0.02}}}`),
-	}
-
-	mockStorage.On("GetHistoricRun", mock.Anything, "bench-current").Return(currentRun, nil)
-	mockStorage.On("ListHistoricRuns", mock.Anything, "benchmark-test", 2).Return([]*types.HistoricRun{currentRun, previousRun}, nil)
-
-	options := DetectionOptions{
-		ComparisonMode: "sequential",
-		LookbackCount:  1,
-	}
-
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		_, err := detector.DetectRegressions(context.Background(), "bench-current", options)
-		if err != nil {
-			b.Fatal(err)
-		}
-	}
-}
-
-func BenchmarkRunAnalysis(b *testing.B) {
-	mockStorage := new(MockHistoricStorage)
-	mockBaselineManager := new(MockBaselineManager)
-	db, _ := sql.Open("sqlite3", ":memory:")
-	defer db.Close()
-
-	logger := logrus.New()
-	logger.SetLevel(logrus.ErrorLevel)
-	detector := NewRegressionDetector(mockStorage, mockBaselineManager, db, logger)
-	detector.Start(context.Background())
-
-	// Create complex test run
-	complexResult := &types.BenchmarkResult{
-		ClientMetrics: map[string]*types.ClientMetrics{},
-	}
-
-	// Add multiple clients for comprehensive analysis
-	for i := 0; i < 5; i++ {
-		clientName := fmt.Sprintf("client-%d", i)
-		complexResult.ClientMetrics[clientName] = &types.ClientMetrics{
-			Name:          clientName,
-			TotalRequests: 1000,
-			TotalErrors:   20,
-			ErrorRate:     0.02,
-			Latency: types.LatencyMetrics{
-				Avg:        150.0 + float64(i)*10,
-				P95:        300.0 + float64(i)*20,
-				P99:        500.0 + float64(i)*30,
-				Throughput: 100.0 - float64(i)*5,
-			},
-		}
-	}
-
-	resultJSON, _ := json.Marshal(complexResult)
-	testRun := &types.HistoricRun{
-		ID:               "complex-run",
-		TestName:         "complex-test",
-		Timestamp:        time.Now(),
-		AvgLatencyMs:     170.0,
-		OverallErrorRate: 0.02,
-		FullResults:      resultJSON,
-	}
-
-	mockStorage.On("GetHistoricRun", mock.Anything, "complex-run").Return(testRun, nil)
-	mockStorage.On("GetHistoricTrends", mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(nil, fmt.Errorf("no trends"))
-
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		_, err := detector.AnalyzeRun(context.Background(), "complex-run")
-		if err != nil {
-			b.Fatal(err)
-		}
-	}
-}
-
-// Run the test suite
-func TestRegressionTestSuite(t *testing.T) {
-	suite.Run(t, new(RegressionTestSuite))
-}
-
-// Test mathematical accuracy of regression calculations
-func TestRegressionCalculationAccuracy(t *testing.T) {
-	testCases := []struct {
-		name        string
-		baseline    float64
-		current     float64
-		expectedPct float64
-		expectedAbs float64
-	}{
-		{"50% increase", 100.0, 150.0, 50.0, 50.0},
-		{"25% decrease", 200.0, 150.0, -25.0, -50.0},
-		{"100% increase", 50.0, 100.0, 100.0, 50.0},
-		{"no change", 100.0, 100.0, 0.0, 0.0},
-		{"small increase", 1000.0, 1005.0, 0.5, 5.0},
-	}
-
-	for _, tc := range testCases {
-		t.Run(tc.name, func(t *testing.T) {
-			absoluteChange := tc.current - tc.baseline
-			percentChange := 0.0
-			if tc.baseline != 0 {
-				percentChange = (absoluteChange / tc.baseline) * 100
-			}
-
-			assert.InDelta(t, tc.expectedPct, percentChange, 0.001)
-			assert.InDelta(t, tc.expectedAbs, absoluteChange, 0.001)
-		})
-	}
-}
-
-func TestSeverityThresholds(t *testing.T) {
-	// Test default threshold values
-	detector := &regressionDetector{
-		thresholds: make(map[string]RegressionThreshold),
-	}
-	detector.initializeDefaultThresholds()
-
-	// Test latency thresholds
-	latencyThreshold := detector.thresholds["latency"]
-	assert.Equal(t, 5.0, latencyThreshold.MinorThreshold)
-	assert.Equal(t, 15.0, latencyThreshold.MajorThreshold)
-	assert.Equal(t, 30.0, latencyThreshold.CriticalThreshold)
-	assert.Equal(t, "increase", latencyThreshold.Direction)
-
-	// Test error rate thresholds
-	errorThreshold := detector.thresholds["error_rate"]
-	assert.Equal(t, 1.0, errorThreshold.MinorThreshold)
-	assert.Equal(t, 5.0, errorThreshold.MajorThreshold)
-	assert.Equal(t, 10.0, errorThreshold.CriticalThreshold)
-	assert.Equal(t, "increase", errorThreshold.Direction)
-	assert.False(t, errorThreshold.IsPercentage) // Absolute values
-
-	// Test throughput thresholds
-	throughputThreshold := detector.thresholds["throughput"]
-	assert.Equal(t, 5.0, throughputThreshold.MinorThreshold)
-	assert.Equal(t, 15.0, throughputThreshold.MajorThreshold)
-	assert.Equal(t, 30.0, throughputThreshold.CriticalThreshold)
-	assert.Equal(t, "decrease", throughputThreshold.Direction)
-}
-
-func TestRiskScoreCalculation(t *testing.T) {
-	// Test risk score calculation logic
-	testCases := []struct {
-		criticalCount int
-		majorCount    int
-		minorCount    int
-		expectedRisk  string
-	}{
-		{2, 0, 0, "critical"}, // 2 * 40 = 80 points
-		{1, 1, 0, "high"},     // 40 + 20 = 60 points
-		{0, 2, 1, "medium"},   // 20 + 20 + 5 = 45 points
-		{0, 1, 2, "medium"},   // 20 + 5 + 5 = 30 points
-		{0, 0, 3, "low"},      // 5 + 5 + 5 = 15 points
-		{0, 0, 0, "low"},      // 0 points
-	}
-
-	for _, tc := range testCases {
-		t.Run(fmt.Sprintf("c%d_m%d_mi%d", tc.criticalCount, tc.majorCount, tc.minorCount), func(t *testing.T) {
-			score := float64(tc.criticalCount)*40.0 + float64(tc.majorCount)*20.0 + float64(tc.minorCount)*5.0
-
-			var expectedRisk string
-			if score >= 80 {
-				expectedRisk = "critical"
-			} else if score >= 60 {
-				expectedRisk = "high"
-			} else if score >= 30 {
-				expectedRisk = "medium"
-			} else {
-				expectedRisk = "low"
-			}
-
-			assert.Equal(t, tc.expectedRisk, expectedRisk)
-		})
-	}
-}
-
-func TestConfidenceCalculation(t *testing.T) {
-	// Test confidence calculation factors
-	testCases := []struct {
-		dataPoints    int
-		rSquared      float64
-		volatility    float64
-		minConfidence float64
-	}{
-		{100, 0.9, 0.1, 0.8}, // High data quality, good fit, low volatility
-		{50, 0.7, 0.2, 0.6},  // Medium quality
-		{20, 0.5, 0.4, 0.4},  // Lower quality
-		{10, 0.3, 0.6, 0.2},  // Poor quality
-	}
-
-	for _, tc := range testCases {
-		t.Run(fmt.Sprintf("dp%d_r2%.1f_vol%.1f", tc.dataPoints, tc.rSquared, tc.volatility), func(t *testing.T) {
-			dataQuality := math.Min(float64(tc.dataPoints)/100.0, 1.0)
-			fitQuality := tc.rSquared
-			stabilityQuality := 1 - tc.volatility
-
-			confidence := (dataQuality + fitQuality + stabilityQuality) / 3.0
-
-			assert.True(t, confidence >= tc.minConfidence)
-			assert.True(t, confidence >= 0.0 && confidence <= 1.0)
-		})
-	}
-}
diff --git a/runner/analysis/trends.go b/runner/analysis/trends.go
index 872f6b3..0cb5369 100644
--- a/runner/analysis/trends.go
+++ b/runner/analysis/trends.go
@@ -11,6 +11,7 @@ import (
 
 	"github.com/sirupsen/logrus"
 
+	"github.com/jsonrpc-bench/runner/internal/sanitize"
 	"github.com/jsonrpc-bench/runner/storage"
 	"github.com/jsonrpc-bench/runner/types"
 )
@@ -368,7 +369,7 @@ func (ta *trendAnalyzer) Stop() error {
 // CalculateTrends performs comprehensive trend analysis for a test
 func (ta *trendAnalyzer) CalculateTrends(ctx context.Context, testName string, days int) (*TrendAnalysisResult, error) {
 	ta.log.WithFields(logrus.Fields{
-		"test_name": testName,
+		"test_name": sanitize.LogValue(testName),
 		"days":      days,
 	}).Info("Calculating comprehensive trends")
 
@@ -431,7 +432,7 @@ func (ta *trendAnalyzer) CalculateTrends(ctx context.Context, testName string, d
 	}
 
 	ta.log.WithFields(logrus.Fields{
-		"test_name": testName,
+		"test_name": sanitize.LogValue(testName),
 		"metrics":   len(trends),
 		"insights":  len(insights),
 	}).Info("Trend analysis completed")
@@ -442,8 +443,8 @@ func (ta *trendAnalyzer) CalculateTrends(ctx context.Context, testName string, d
 // GetMethodTrends analyzes trends for a specific method
 func (ta *trendAnalyzer) GetMethodTrends(ctx context.Context, testName, method string, days int) (*MethodTrendAnalysis, error) {
 	ta.log.WithFields(logrus.Fields{
-		"test_name": testName,
-		"method":    method,
+		"test_name": sanitize.LogValue(testName),
+		"method":    sanitize.LogValue(method),
 		"days":      days,
 	}).Info("Analyzing method trends")
 
@@ -460,9 +461,9 @@ func (ta *trendAnalyzer) GetMethodTrends(ctx context.Context, testName, method s
 	for _, metric := range metrics {
 		trendData, err := ta.analyzeMethodTrendForMetric(ctx, methodData, metric, method)
 		if err != nil {
-			ta.log.WithError(err).WithFields(logrus.Fields{
-				"method": method,
-				"metric": metric,
+			ta.log.WithError(sanitize.LogError(err)).WithFields(logrus.Fields{
+				"method": sanitize.LogValue(method),
+				"metric": sanitize.LogValue(metric),
 			}).Warn("Failed to analyze method trend")
 			continue
 		}
@@ -491,15 +492,15 @@ func (ta *trendAnalyzer) GetMethodTrends(ctx context.Context, testName, method s
 		Ranking:    ranking,
 	}
 
-	ta.log.WithField("method", method).Info("Method trend analysis completed")
+	ta.log.WithField("method", sanitize.LogValue(method)).Info("Method trend analysis completed")
 	return result, nil
 }
 
 // GetClientTrends analyzes trends for a specific client
 func (ta *trendAnalyzer) GetClientTrends(ctx context.Context, testName, client string, days int) (*ClientTrendAnalysis, error) {
 	ta.log.WithFields(logrus.Fields{
-		"test_name": testName,
-		"client":    client,
+		"test_name": sanitize.LogValue(testName),
+		"client":    sanitize.LogValue(client),
 		"days":      days,
 	}).Info("Analyzing client trends")
 
@@ -516,9 +517,9 @@ func (ta *trendAnalyzer) GetClientTrends(ctx context.Context, testName, client s
 	for _, metric := range metrics {
 		trendData, err := ta.analyzeClientTrendForMetric(ctx, clientData, metric, client)
 		if err != nil {
-			ta.log.WithError(err).WithFields(logrus.Fields{
-				"client": client,
-				"metric": metric,
+			ta.log.WithError(sanitize.LogError(err)).WithFields(logrus.Fields{
+				"client": sanitize.LogValue(client),
+				"metric": sanitize.LogValue(metric),
 			}).Warn("Failed to analyze client trend")
 			continue
 		}
@@ -547,15 +548,15 @@ func (ta *trendAnalyzer) GetClientTrends(ctx context.Context, testName, client s
 		Ranking:    ranking,
 	}
 
-	ta.log.WithField("client", client).Info("Client trend analysis completed")
+	ta.log.WithField("client", sanitize.LogValue(client)).Info("Client trend analysis completed")
 	return result, nil
 }
 
 // CalculateMovingAverage calculates moving averages for a metric
 func (ta *trendAnalyzer) CalculateMovingAverage(ctx context.Context, testName, metric string, windowSize, days int) (*MovingAverageResult, error) {
 	ta.log.WithFields(logrus.Fields{
-		"test_name":   testName,
-		"metric":      metric,
+		"test_name":   sanitize.LogValue(testName),
+		"metric":      sanitize.LogValue(metric),
 		"window_size": windowSize,
 		"days":        days,
 	}).Info("Calculating moving average")
@@ -621,8 +622,8 @@ func (ta *trendAnalyzer) CalculateMovingAverage(ctx context.Context, testName, m
 	}
 
 	ta.log.WithFields(logrus.Fields{
-		"test_name":     testName,
-		"metric":        metric,
+		"test_name":     sanitize.LogValue(testName),
+		"metric":        sanitize.LogValue(metric),
 		"points":        len(points),
 		"smoothness":    smoothness,
 		"trend_clarity": trendClarity,
@@ -693,8 +694,8 @@ func (ta *trendAnalyzer) DetectTrendDirection(ctx context.Context, testName, met
 // ForecastTrend generates forecasts for a metric
 func (ta *trendAnalyzer) ForecastTrend(ctx context.Context, testName, metric string, days, forecastDays int) (*TrendForecast, error) {
 	ta.log.WithFields(logrus.Fields{
-		"test_name":     testName,
-		"metric":        metric,
+		"test_name":     sanitize.LogValue(testName),
+		"metric":        sanitize.LogValue(metric),
 		"days":          days,
 		"forecast_days": forecastDays,
 	}).Info("Generating trend forecast")
@@ -762,8 +763,8 @@ func (ta *trendAnalyzer) ForecastTrend(ctx context.Context, testName, metric str
 	}
 
 	ta.log.WithFields(logrus.Fields{
-		"test_name":   testName,
-		"metric":      metric,
+		"test_name":   sanitize.LogValue(testName),
+		"metric":      sanitize.LogValue(metric),
 		"predictions": len(predictions),
 		"accuracy":    result.Accuracy,
 	}).Info("Trend forecast completed")
diff --git a/runner/analysis/trends_test.go b/runner/analysis/trends_test.go
deleted file mode 100644
index c52c668..0000000
--- a/runner/analysis/trends_test.go
+++ /dev/null
@@ -1,809 +0,0 @@
-package analysis
-
-import (
-	"context"
-	"database/sql"
-	"math"
-	"testing"
-	"time"
-
-	"github.com/sirupsen/logrus"
-	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/mock"
-	"github.com/stretchr/testify/require"
-	"github.com/stretchr/testify/suite"
-
-	"github.com/jsonrpc-bench/runner/types"
-)
-
-// TrendTestSuite contains all trend analysis tests
-type TrendTestSuite struct {
-	suite.Suite
-	mockStorage *MockHistoricStorage
-	mockDB      *sql.DB
-	analyzer    TrendAnalyzer
-	ctx         context.Context
-}
-
-func (suite *TrendTestSuite) SetupTest() {
-	suite.mockStorage = new(MockHistoricStorage)
-	suite.ctx = context.Background()
-
-	// Create in-memory SQLite database for testing
-	db, err := sql.Open("sqlite3", ":memory:")
-	require.NoError(suite.T(), err)
-	suite.mockDB = db
-
-	// Create trend analyzer
-	logger := logrus.New()
-	logger.SetLevel(logrus.ErrorLevel) // Reduce noise in tests
-	suite.analyzer = NewTrendAnalyzer(suite.mockStorage, suite.mockDB, logger)
-
-	// Start the analyzer
-	err = suite.analyzer.Start(suite.ctx)
-	require.NoError(suite.T(), err)
-}
-
-func (suite *TrendTestSuite) TearDownTest() {
-	if suite.mockDB != nil {
-		suite.mockDB.Close()
-	}
-	suite.analyzer.Stop()
-	suite.mockStorage.AssertExpectations(suite.T())
-}
-
-// TestTrendCalculation tests basic trend calculation functionality
-func (suite *TrendTestSuite) TestTrendCalculation() {
-	// Create trending data - improving performance (decreasing latency)
-	runs := suite.createTrendingRuns("trend-test", 10, func(i int) *types.HistoricRun {
-		run := suite.createBasicHistoricRun(i, "trend-test")
-		// Simulate improving performance over time
-		run.AvgLatencyMs = 200.0 - float64(i)*5.0      // Decreasing from 200ms to 155ms
-		run.P95LatencyMs = 400.0 - float64(i)*10.0     // Decreasing from 400ms to 310ms
-		run.OverallErrorRate = 0.05 - float64(i)*0.002 // Decreasing from 5% to 3.2%
-		return run
-	})
-
-	suite.mockStorage.On("ListHistoricRuns", suite.ctx, "trend-test", mock.AnythingOfType("int")).Return(runs, nil)
-
-	// Test trend calculation
-	result, err := suite.analyzer.CalculateTrends(suite.ctx, "trend-test", 10)
-
-	require.NoError(suite.T(), err)
-	assert.NotNil(suite.T(), result)
-	assert.Equal(suite.T(), "trend-test", result.TestName)
-	assert.Equal(suite.T(), 10, result.Period)
-	assert.NotEmpty(suite.T(), result.Metrics)
-	assert.NotEmpty(suite.T(), result.Trends)
-
-	// Check avg_latency trend (should be improving)
-	avgLatencyTrend, exists := result.Trends["avg_latency"]
-	assert.True(suite.T(), exists)
-	assert.Equal(suite.T(), "improving", avgLatencyTrend.Direction.Direction)
-	assert.True(suite.T(), avgLatencyTrend.Direction.Slope < 0) // Negative slope = decreasing latency = improving
-
-	// Check error_rate trend (should be improving)
-	errorRateTrend, exists := result.Trends["error_rate"]
-	assert.True(suite.T(), exists)
-	assert.Equal(suite.T(), "improving", errorRateTrend.Direction.Direction)
-
-	// Verify insights and recommendations are generated
-	assert.NotEmpty(suite.T(), result.Insights)
-	assert.NotEmpty(suite.T(), result.Recommendations)
-}
-
-// TestDegradingTrendDetection tests detection of degrading performance trends
-func (suite *TrendTestSuite) TestDegradingTrendDetection() {
-	// Create degrading trend data
-	runs := suite.createTrendingRuns("degrading-test", 8, func(i int) *types.HistoricRun {
-		run := suite.createBasicHistoricRun(i, "degrading-test")
-		// Simulate degrading performance over time
-		run.AvgLatencyMs = 150.0 + float64(i)*10.0     // Increasing from 150ms to 220ms
-		run.P95LatencyMs = 300.0 + float64(i)*20.0     // Increasing from 300ms to 440ms
-		run.OverallErrorRate = 0.02 + float64(i)*0.005 // Increasing from 2% to 5.5%
-		return run
-	})
-
-	suite.mockStorage.On("ListHistoricRuns", suite.ctx, "degrading-test", mock.AnythingOfType("int")).Return(runs, nil)
-
-	result, err := suite.analyzer.CalculateTrends(suite.ctx, "degrading-test", 10)
-
-	require.NoError(suite.T(), err)
-	assert.NotNil(suite.T(), result)
-
-	// Check that degrading trends are detected
-	avgLatencyTrend, exists := result.Trends["avg_latency"]
-	assert.True(suite.T(), exists)
-	assert.Equal(suite.T(), "degrading", avgLatencyTrend.Direction.Direction)
-	assert.True(suite.T(), avgLatencyTrend.Direction.Slope > 0) // Positive slope = increasing latency = degrading
-
-	// Verify overall summary reflects degrading trend
-	assert.Equal(suite.T(), "degrading", result.Summary.OverallDirection)
-	assert.Contains(suite.T(), []string{"poor", "concerning"}, result.Summary.OverallHealth)
-}
-
-// TestStableTrendDetection tests detection of stable performance trends
-func (suite *TrendTestSuite) TestStableTrendDetection() {
-	// Create stable trend data
-	runs := suite.createTrendingRuns("stable-test", 10, func(i int) *types.HistoricRun {
-		run := suite.createBasicHistoricRun(i, "stable-test")
-		// Simulate stable performance with minor fluctuations
-		baseLatency := 150.0
-		fluctuation := math.Sin(float64(i)*0.5) * 2.0 // Small sine wave fluctuation
-		run.AvgLatencyMs = baseLatency + fluctuation
-		run.P95LatencyMs = 300.0 + fluctuation*2.0
-		run.OverallErrorRate = 0.02 + fluctuation*0.001
-		return run
-	})
-
-	suite.mockStorage.On("ListHistoricRuns", suite.ctx, "stable-test", mock.AnythingOfType("int")).Return(runs, nil)
-
-	result, err := suite.analyzer.CalculateTrends(suite.ctx, "stable-test", 10)
-
-	require.NoError(suite.T(), err)
-	assert.NotNil(suite.T(), result)
-
-	// Check that stable trends are detected
-	avgLatencyTrend, exists := result.Trends["avg_latency"]
-	assert.True(suite.T(), exists)
-	assert.Equal(suite.T(), "stable", avgLatencyTrend.Direction.Direction)
-	assert.True(suite.T(), math.Abs(avgLatencyTrend.Direction.Slope) < 1.0) // Very small slope
-
-	// Verify overall summary reflects stable trend
-	assert.Equal(suite.T(), "stable", result.Summary.OverallDirection)
-	assert.Contains(suite.T(), []string{"good", "excellent"}, result.Summary.OverallHealth)
-}
-
-// TestMovingAverageCalculation tests moving average calculations
-func (suite *TrendTestSuite) TestMovingAverageCalculation() {
-	// Create test data with known values for mathematical verification
-	runs := suite.createTrendingRuns("ma-test", 15, func(i int) *types.HistoricRun {
-		run := suite.createBasicHistoricRun(i, "ma-test")
-		run.AvgLatencyMs = float64(100 + i*10) // Linear increase: 100, 110, 120, ...
-		return run
-	})
-
-	suite.mockStorage.On("ListHistoricRuns", suite.ctx, "ma-test", mock.AnythingOfType("int")).Return(runs, nil)
-
-	// Test moving average calculation with window size 5
-	result, err := suite.analyzer.CalculateMovingAverage(suite.ctx, "ma-test", "avg_latency", 5, 15)
-
-	require.NoError(suite.T(), err)
-	assert.NotNil(suite.T(), result)
-	assert.Equal(suite.T(), 5, result.WindowSize)
-	assert.NotEmpty(suite.T(), result.Points)
-
-	// Verify mathematical accuracy of moving average
-	// For the 5th point (index 4), the moving average should be (100+110+120+130+140)/5 = 120
-	if len(result.Points) >= 1 {
-		expectedMA := (100.0 + 110.0 + 120.0 + 130.0 + 140.0) / 5.0
-		assert.InDelta(suite.T(), expectedMA, result.Points[0].MovingAverage, 0.001)
-	}
-
-	// Verify trend clarity and smoothness calculations
-	assert.True(suite.T(), result.TrendClarity >= 0.0 && result.TrendClarity <= 1.0)
-	assert.True(suite.T(), result.Smoothness >= 0.0 && result.Smoothness <= 1.0)
-}
-
-// TestLinearRegressionAccuracy tests mathematical accuracy of linear regression
-func (suite *TrendTestSuite) TestLinearRegressionAccuracy() {
-	// Create perfect linear data for regression testing
-	dataPoints := make([]TrendDataPoint, 10)
-	for i := 0; i < 10; i++ {
-		dataPoints[i] = TrendDataPoint{
-			Timestamp: time.Now().Add(time.Duration(i) * time.Hour),
-			Value:     float64(i*2 + 5), // y = 2x + 5 (slope=2, intercept=5)
-			RunID:     fmt.Sprintf("run-%d", i),
-		}
-	}
-
-	// Test linear regression calculation
-	analyzer := suite.analyzer.(*trendAnalyzer)
-	regression := analyzer.calculateLinearRegression(dataPoints)
-
-	// Verify mathematical accuracy
-	assert.InDelta(suite.T(), 2.0, regression.Slope, 0.001)
-	assert.InDelta(suite.T(), 5.0, regression.Intercept, 0.001)
-	assert.True(suite.T(), regression.RSquared > 0.99) // Should be nearly perfect for linear data
-	assert.True(suite.T(), regression.Significant)
-	assert.Equal(suite.T(), "y = 2.0000x + 5.0000", regression.Equation)
-}
-
-// TestTrendDirectionDetection tests trend direction detection logic
-func (suite *TrendTestSuite) TestTrendDirectionDetection() {
-	// Create test data with known trend direction
-	runs := suite.createTrendingRuns("direction-test", 10, func(i int) *types.HistoricRun {
-		run := suite.createBasicHistoricRun(i, "direction-test")
-		run.AvgLatencyMs = 200.0 - float64(i)*5.0 // Strong decreasing trend
-		return run
-	})
-
-	suite.mockStorage.On("ListHistoricRuns", suite.ctx, "direction-test", mock.AnythingOfType("int")).Return(runs, nil)
-
-	// Test trend direction detection
-	direction, err := suite.analyzer.DetectTrendDirection(suite.ctx, "direction-test", "avg_latency", 10)
-
-	require.NoError(suite.T(), err)
-	assert.NotNil(suite.T(), direction)
-	assert.Equal(suite.T(), "improving", direction.Direction) // Decreasing latency is improving
-	assert.Contains(suite.T(), []string{"moderate", "strong", "very_strong"}, direction.Strength)
-	assert.True(suite.T(), direction.Confidence > 0.5)
-	assert.True(suite.T(), direction.Slope < 0) // Negative slope for decreasing values
-	assert.True(suite.T(), direction.TrendScore > 0)
-}
-
-// TestAnomalyDetection tests anomaly detection algorithms
-func (suite *TrendTestSuite) TestAnomalyDetection() {
-	// Create data with clear anomalies
-	runs := suite.createTrendingRuns("anomaly-test", 20, func(i int) *types.HistoricRun {
-		run := suite.createBasicHistoricRun(i, "anomaly-test")
-		if i == 10 {
-			// Insert anomaly at position 10
-			run.AvgLatencyMs = 1000.0   // Spike
-			run.OverallErrorRate = 0.50 // 50% error rate spike
-		} else {
-			// Normal values
-			run.AvgLatencyMs = 150.0 + float64(i)*1.0
-			run.OverallErrorRate = 0.02 + float64(i)*0.001
-		}
-		return run
-	})
-
-	suite.mockStorage.On("ListHistoricRuns", suite.ctx, "anomaly-test", mock.AnythingOfType("int")).Return(runs, nil)
-
-	// Test anomaly detection
-	result, err := suite.analyzer.DetectAnomalies(suite.ctx, "anomaly-test", 20)
-
-	require.NoError(suite.T(), err)
-	assert.NotNil(suite.T(), result)
-	assert.NotEmpty(suite.T(), result.Anomalies)
-
-	// Verify anomalies are detected correctly
-	found := false
-	for _, anomaly := range result.Anomalies {
-		if anomaly.RunID == "run-10" { // The anomalous run
-			found = true
-			assert.Contains(suite.T(), []string{"moderate", "severe"}, anomaly.Severity)
-			assert.True(suite.T(), anomaly.DeviationScore > 2.0) // Significant deviation
-		}
-	}
-	assert.True(suite.T(), found, "Anomaly should be detected in run-10")
-
-	// Verify anomaly rate calculation
-	assert.True(suite.T(), result.AnomalyRate > 0.0)
-	assert.True(suite.T(), result.AnomalyRate < 1.0)
-}
-
-// TestForecastingAccuracy tests trend forecasting capabilities
-func (suite *TrendTestSuite) TestForecastingAccuracy() {
-	// Create predictable linear trend for forecasting
-	runs := suite.createTrendingRuns("forecast-test", 15, func(i int) *types.HistoricRun {
-		run := suite.createBasicHistoricRun(i, "forecast-test")
-		run.AvgLatencyMs = 100.0 + float64(i)*5.0 // Linear increase: 100, 105, 110, ...
-		return run
-	})
-
-	suite.mockStorage.On("ListHistoricRuns", suite.ctx, "forecast-test", mock.AnythingOfType("int")).Return(runs, nil)
-
-	// Test forecasting
-	forecast, err := suite.analyzer.ForecastTrend(suite.ctx, "forecast-test", "avg_latency", 15, 5)
-
-	require.NoError(suite.T(), err)
-	assert.NotNil(suite.T(), forecast)
-	assert.Equal(suite.T(), "linear_regression", forecast.Method)
-	assert.Len(suite.T(), forecast.Predictions, 5)
-
-	// Verify forecast accuracy for linear data
-	// Next point should be approximately 100 + 15*5 = 175
-	expectedNextValue := 175.0
-	assert.InDelta(suite.T(), expectedNextValue, forecast.Predictions[0].PredictedValue, 5.0)
-
-	// Verify confidence intervals
-	for _, prediction := range forecast.Predictions {
-		assert.True(suite.T(), prediction.UpperBound > prediction.PredictedValue)
-		assert.True(suite.T(), prediction.LowerBound < prediction.PredictedValue)
-		assert.True(suite.T(), prediction.Confidence >= 0.0 && prediction.Confidence <= 1.0)
-	}
-
-	// Verify validation metrics
-	assert.True(suite.T(), forecast.Validation.MAE >= 0.0)
-	assert.True(suite.T(), forecast.Validation.RMSE >= 0.0)
-	assert.True(suite.T(), forecast.Validation.MAPE >= 0.0)
-}
-
-// TestStatisticalCalculations tests various statistical calculations
-func (suite *TrendTestSuite) TestStatisticalCalculations() {
-	// Create test data with known statistical properties
-	runs := suite.createTrendingRuns("stats-test", 100, func(i int) *types.HistoricRun {
-		run := suite.createBasicHistoricRun(i, "stats-test")
-		// Create normal distribution around 150ms
-		run.AvgLatencyMs = 150.0 + math.Sin(float64(i)*0.1)*10.0
-		return run
-	})
-
-	suite.mockStorage.On("ListHistoricRuns", suite.ctx, "stats-test", mock.AnythingOfType("int")).Return(runs, nil)
-
-	// Test statistics calculation
-	stats, err := suite.analyzer.CalculateStatistics(suite.ctx, "stats-test", 30)
-
-	require.NoError(suite.T(), err)
-	assert.NotNil(suite.T(), stats)
-	assert.Equal(suite.T(), 100, stats.Count)
-
-	// Verify basic statistics
-	assert.True(suite.T(), stats.Mean > 0)
-	assert.True(suite.T(), stats.Median > 0)
-	assert.True(suite.T(), stats.StandardDev >= 0)
-	assert.True(suite.T(), stats.Variance >= 0)
-	assert.True(suite.T(), stats.Min <= stats.Max)
-
-	// Verify percentiles are in order
-	assert.True(suite.T(), stats.Percentiles["p10"] <= stats.Percentiles["p25"])
-	assert.True(suite.T(), stats.Percentiles["p25"] <= stats.Percentiles["p50"])
-	assert.True(suite.T(), stats.Percentiles["p50"] <= stats.Percentiles["p75"])
-	assert.True(suite.T(), stats.Percentiles["p75"] <= stats.Percentiles["p90"])
-	assert.True(suite.T(), stats.Percentiles["p90"] <= stats.Percentiles["p95"])
-	assert.True(suite.T(), stats.Percentiles["p95"] <= stats.Percentiles["p99"])
-
-	// Verify median equals p50
-	assert.InDelta(suite.T(), stats.Median, stats.Percentiles["p50"], 0.001)
-
-	// Verify autocorrelation calculation
-	assert.NotEmpty(suite.T(), stats.Autocorrelation)
-	assert.InDelta(suite.T(), 1.0, stats.Autocorrelation[0], 0.001) // First value should be 1.0
-}
-
-// TestMethodTrends tests method-specific trend analysis
-func (suite *TrendTestSuite) TestMethodTrends() {
-	// Create test data for method analysis
-	runs := suite.createTrendingRuns("method-test", 10, func(i int) *types.HistoricRun {
-		return suite.createBasicHistoricRun(i, "method-test")
-	})
-
-	suite.mockStorage.On("ListHistoricRuns", suite.ctx, "method-test", mock.AnythingOfType("int")).Return(runs, nil)
-
-	// Test method trend analysis
-	result, err := suite.analyzer.GetMethodTrends(suite.ctx, "method-test", "eth_getBalance", 10)
-
-	require.NoError(suite.T(), err)
-	assert.NotNil(suite.T(), result)
-	assert.Equal(suite.T(), "method-test", result.TestName)
-	assert.Equal(suite.T(), "eth_getBalance", result.Method)
-	assert.NotEmpty(suite.T(), result.Trends)
-
-	// Verify method-specific trends are calculated
-	assert.NotNil(suite.T(), result.Comparison)
-	assert.NotNil(suite.T(), result.Ranking)
-}
-
-// TestClientTrends tests client-specific trend analysis
-func (suite *TrendTestSuite) TestClientTrends() {
-	// Create test data for client analysis
-	runs := suite.createTrendingRuns("client-test", 10, func(i int) *types.HistoricRun {
-		return suite.createBasicHistoricRun(i, "client-test")
-	})
-
-	suite.mockStorage.On("ListHistoricRuns", suite.ctx, "client-test", mock.AnythingOfType("int")).Return(runs, nil)
-
-	// Test client trend analysis
-	result, err := suite.analyzer.GetClientTrends(suite.ctx, "client-test", "geth", 10)
-
-	require.NoError(suite.T(), err)
-	assert.NotNil(suite.T(), result)
-	assert.Equal(suite.T(), "client-test", result.TestName)
-	assert.Equal(suite.T(), "geth", result.Client)
-	assert.NotEmpty(suite.T(), result.Trends)
-
-	// Verify client-specific trends are calculated
-	assert.NotNil(suite.T(), result.Comparison)
-	assert.NotNil(suite.T(), result.Ranking)
-}
-
-// TestInsightGeneration tests automatic insight generation
-func (suite *TrendTestSuite) TestInsightGeneration() {
-	// Create data with specific patterns for insight generation
-	runs := suite.createTrendingRuns("insight-test", 15, func(i int) *types.HistoricRun {
-		run := suite.createBasicHistoricRun(i, "insight-test")
-		if i >= 10 {
-			// Introduce degradation in last 5 runs
-			run.AvgLatencyMs = 150.0 + float64(i-9)*20.0
-		} else {
-			// Stable performance in first 10 runs
-			run.AvgLatencyMs = 150.0 + float64(i)*1.0
-		}
-		return run
-	})
-
-	suite.mockStorage.On("ListHistoricRuns", suite.ctx, "insight-test", mock.AnythingOfType("int")).Return(runs, nil)
-
-	result, err := suite.analyzer.CalculateTrends(suite.ctx, "insight-test", 15)
-
-	require.NoError(suite.T(), err)
-	assert.NotNil(suite.T(), result)
-	assert.NotEmpty(suite.T(), result.Insights)
-
-	// Verify insights contain relevant information
-	foundDegradationInsight := false
-	for _, insight := range result.Insights {
-		assert.NotEmpty(suite.T(), insight.Title)
-		assert.NotEmpty(suite.T(), insight.Description)
-		assert.Contains(suite.T(), []string{"low", "medium", "high", "critical"}, insight.Severity)
-		assert.True(suite.T(), insight.Confidence >= 0.0 && insight.Confidence <= 1.0)
-
-		if insight.Type == "performance_degradation" {
-			foundDegradationInsight = true
-		}
-	}
-
-	// Should detect degradation in the data
-	assert.True(suite.T(), foundDegradationInsight, "Should detect performance degradation")
-}
-
-// TestVolatilityCalculation tests volatility calculations
-func (suite *TrendTestSuite) TestVolatilityCalculation() {
-	analyzer := suite.analyzer.(*trendAnalyzer)
-
-	// Test with stable data (low volatility)
-	stableData := []TrendDataPoint{
-		{Value: 100.0}, {Value: 101.0}, {Value: 99.0}, {Value: 100.5}, {Value: 99.5},
-	}
-	stableVolatility := analyzer.calculateVolatility(stableData)
-
-	// Test with volatile data (high volatility)
-	volatileData := []TrendDataPoint{
-		{Value: 100.0}, {Value: 150.0}, {Value: 50.0}, {Value: 200.0}, {Value: 25.0},
-	}
-	volatileVolatility := analyzer.calculateVolatility(volatileData)
-
-	// Volatile data should have higher volatility
-	assert.True(suite.T(), volatileVolatility > stableVolatility)
-	assert.True(suite.T(), stableVolatility >= 0.0)
-	assert.True(suite.T(), volatileVolatility >= 0.0)
-}
-
-// TestChangePointDetection tests change point detection
-func (suite *TrendTestSuite) TestChangePointDetection() {
-	analyzer := suite.analyzer.(*trendAnalyzer)
-
-	// Create data with a clear change point
-	dataPoints := make([]TrendDataPoint, 30)
-	for i := 0; i < 30; i++ {
-		if i < 15 {
-			// First half: stable around 100
-			dataPoints[i] = TrendDataPoint{
-				Timestamp: time.Now().Add(time.Duration(i) * time.Hour),
-				Value:     100.0 + float64(i%3), // Small fluctuation
-				RunID:     fmt.Sprintf("run-%d", i),
-			}
-		} else {
-			// Second half: stable around 200 (level shift)
-			dataPoints[i] = TrendDataPoint{
-				Timestamp: time.Now().Add(time.Duration(i) * time.Hour),
-				Value:     200.0 + float64(i%3), // Small fluctuation around new level
-				RunID:     fmt.Sprintf("run-%d", i),
-			}
-		}
-	}
-
-	changePoints := analyzer.detectChangePoints(dataPoints)
-
-	// Should detect a change point around position 15
-	assert.NotEmpty(suite.T(), changePoints)
-
-	found := false
-	for _, cp := range changePoints {
-		if cp.Type == "level" && math.Abs(cp.Magnitude) > 50 {
-			found = true
-			assert.True(suite.T(), cp.Confidence > 0.5)
-			assert.NotEmpty(suite.T(), cp.Description)
-		}
-	}
-	assert.True(suite.T(), found, "Should detect significant level change")
-}
-
-// TestConcurrentAnalysis tests concurrent trend analysis operations
-func (suite *TrendTestSuite) TestConcurrentAnalysis() {
-	// Create test data
-	runs := suite.createTrendingRuns("concurrent-test", 20, func(i int) *types.HistoricRun {
-		return suite.createBasicHistoricRun(i, "concurrent-test")
-	})
-
-	suite.mockStorage.On("ListHistoricRuns", suite.ctx, "concurrent-test", mock.AnythingOfType("int")).Return(runs, nil).Maybe()
-
-	// Test concurrent analysis operations
-	done := make(chan bool, 5)
-	errors := make(chan error, 5)
-
-	operations := []func(){
-		func() {
-			_, err := suite.analyzer.CalculateTrends(suite.ctx, "concurrent-test", 20)
-			if err != nil {
-				errors <- err
-			}
-		},
-		func() {
-			_, err := suite.analyzer.DetectTrendDirection(suite.ctx, "concurrent-test", "avg_latency", 20)
-			if err != nil {
-				errors <- err
-			}
-		},
-		func() {
-			_, err := suite.analyzer.CalculateMovingAverage(suite.ctx, "concurrent-test", "avg_latency", 5, 20)
-			if err != nil {
-				errors <- err
-			}
-		},
-		func() {
-			_, err := suite.analyzer.DetectAnomalies(suite.ctx, "concurrent-test", 20)
-			if err != nil {
-				errors <- err
-			}
-		},
-		func() {
-			_, err := suite.analyzer.CalculateStatistics(suite.ctx, "concurrent-test", 20)
-			if err != nil {
-				errors <- err
-			}
-		},
-	}
-
-	// Run operations concurrently
-	for _, op := range operations {
-		go func(operation func()) {
-			defer func() { done <- true }()
-			operation()
-		}(op)
-	}
-
-	// Wait for all operations to complete
-	for i := 0; i < len(operations); i++ {
-		<-done
-	}
-
-	close(errors)
-	for err := range errors {
-		suite.T().Errorf("Concurrent operation failed: %v", err)
-	}
-}
-
-// TestEdgeCases tests various edge cases
-func (suite *TrendTestSuite) TestEdgeCases() {
-	// Test with insufficient data points
-	shortRuns := suite.createTrendingRuns("short-test", 3, func(i int) *types.HistoricRun {
-		return suite.createBasicHistoricRun(i, "short-test")
-	})
-
-	suite.mockStorage.On("ListHistoricRuns", suite.ctx, "short-test", mock.AnythingOfType("int")).Return(shortRuns, nil)
-
-	_, err := suite.analyzer.CalculateTrends(suite.ctx, "short-test", 10)
-	assert.Error(suite.T(), err)
-	assert.Contains(suite.T(), err.Error(), "insufficient data points")
-
-	// Test with empty data
-	emptyRuns := []*types.HistoricRun{}
-	suite.mockStorage.On("ListHistoricRuns", suite.ctx, "empty-test", mock.AnythingOfType("int")).Return(emptyRuns, nil)
-
-	_, err = suite.analyzer.CalculateTrends(suite.ctx, "empty-test", 10)
-	assert.Error(suite.T(), err)
-
-	// Test with single data point
-	singleRun := suite.createTrendingRuns("single-test", 1, func(i int) *types.HistoricRun {
-		return suite.createBasicHistoricRun(i, "single-test")
-	})
-
-	suite.mockStorage.On("ListHistoricRuns", suite.ctx, "single-test", mock.AnythingOfType("int")).Return(singleRun, nil)
-
-	_, err = suite.analyzer.CalculateTrends(suite.ctx, "single-test", 10)
-	assert.Error(suite.T(), err)
-}
-
-// Helper functions for creating test data
-
-func (suite *TrendTestSuite) createTrendingRuns(testName string, count int, modifier func(int) *types.HistoricRun) []*types.HistoricRun {
-	runs := make([]*types.HistoricRun, count)
-	for i := 0; i < count; i++ {
-		runs[i] = modifier(i)
-	}
-	return runs
-}
-
-func (suite *TrendTestSuite) createBasicHistoricRun(index int, testName string) *types.HistoricRun {
-	return &types.HistoricRun{
-		ID:               fmt.Sprintf("run-%d", index),
-		TestName:         testName,
-		Timestamp:        time.Now().Add(time.Duration(-index) * time.Hour), // Reverse chronological
-		AvgLatencyMs:     150.0,
-		P95LatencyMs:     300.0,
-		P99LatencyMs:     500.0,
-		OverallErrorRate: 0.02,
-		TotalRequests:    1000,
-		TotalErrors:      20,
-		GitCommit:        fmt.Sprintf("commit-%d", index),
-		Duration:         "10m",
-	}
-}
-
-// Benchmark tests for performance validation
-
-func BenchmarkTrendCalculation(b *testing.B) {
-	mockStorage := new(MockHistoricStorage)
-	db, _ := sql.Open("sqlite3", ":memory:")
-	defer db.Close()
-
-	logger := logrus.New()
-	logger.SetLevel(logrus.ErrorLevel)
-	analyzer := NewTrendAnalyzer(mockStorage, db, logger)
-	analyzer.Start(context.Background())
-
-	// Create large dataset for benchmarking
-	runs := make([]*types.HistoricRun, 1000)
-	for i := 0; i < 1000; i++ {
-		runs[i] = &types.HistoricRun{
-			ID:               fmt.Sprintf("bench-run-%d", i),
-			TestName:         "benchmark-test",
-			Timestamp:        time.Now().Add(time.Duration(-i) * time.Hour),
-			AvgLatencyMs:     150.0 + float64(i)*0.1,
-			P95LatencyMs:     300.0 + float64(i)*0.2,
-			OverallErrorRate: 0.02 + float64(i)*0.0001,
-			TotalRequests:    1000,
-		}
-	}
-
-	mockStorage.On("ListHistoricRuns", mock.Anything, "benchmark-test", mock.AnythingOfType("int")).Return(runs, nil)
-
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		_, err := analyzer.CalculateTrends(context.Background(), "benchmark-test", 30)
-		if err != nil {
-			b.Fatal(err)
-		}
-	}
-}
-
-func BenchmarkLinearRegression(b *testing.B) {
-	// Create test data
-	dataPoints := make([]TrendDataPoint, 100)
-	for i := 0; i < 100; i++ {
-		dataPoints[i] = TrendDataPoint{
-			Timestamp: time.Now().Add(time.Duration(i) * time.Hour),
-			Value:     float64(i)*2.5 + 10.0,
-			RunID:     fmt.Sprintf("run-%d", i),
-		}
-	}
-
-	analyzer := &trendAnalyzer{}
-
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		_ = analyzer.calculateLinearRegression(dataPoints)
-	}
-}
-
-func BenchmarkMovingAverage(b *testing.B) {
-	mockStorage := new(MockHistoricStorage)
-	db, _ := sql.Open("sqlite3", ":memory:")
-	defer db.Close()
-
-	logger := logrus.New()
-	logger.SetLevel(logrus.ErrorLevel)
-	analyzer := NewTrendAnalyzer(mockStorage, db, logger)
-	analyzer.Start(context.Background())
-
-	runs := make([]*types.HistoricRun, 200)
-	for i := 0; i < 200; i++ {
-		runs[i] = &types.HistoricRun{
-			ID:           fmt.Sprintf("ma-run-%d", i),
-			TestName:     "ma-benchmark",
-			Timestamp:    time.Now().Add(time.Duration(-i) * time.Hour),
-			AvgLatencyMs: 150.0 + float64(i)*0.5,
-		}
-	}
-
-	mockStorage.On("ListHistoricRuns", mock.Anything, "ma-benchmark", mock.AnythingOfType("int")).Return(runs, nil)
-
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		_, err := analyzer.CalculateMovingAverage(context.Background(), "ma-benchmark", "avg_latency", 10, 100)
-		if err != nil {
-			b.Fatal(err)
-		}
-	}
-}
-
-// Run the test suite
-func TestTrendTestSuite(t *testing.T) {
-	suite.Run(t, new(TrendTestSuite))
-}
-
-// Test mathematical accuracy of statistical functions
-func TestStatisticalAccuracy(t *testing.T) {
-	// Test mean calculation
-	values := []float64{1.0, 2.0, 3.0, 4.0, 5.0}
-	analyzer := &trendAnalyzer{}
-
-	mean := analyzer.calculateMean(values)
-	assert.InDelta(t, 3.0, mean, 0.001)
-
-	// Test median calculation
-	median := analyzer.calculateMedian(values)
-	assert.InDelta(t, 3.0, median, 0.001)
-
-	// Test standard deviation
-	stdDev := analyzer.calculateStandardDeviation(values)
-	expectedStdDev := math.Sqrt(2.5) // Known value for this dataset
-	assert.InDelta(t, expectedStdDev, stdDev, 0.001)
-
-	// Test percentile calculation
-	p50 := analyzer.calculatePercentile(values, 50)
-	assert.InDelta(t, 3.0, p50, 0.001)
-
-	p25 := analyzer.calculatePercentile(values, 25)
-	assert.InDelta(t, 2.0, p25, 0.001)
-
-	p75 := analyzer.calculatePercentile(values, 75)
-	assert.InDelta(t, 4.0, p75, 0.001)
-}
-
-func TestAutocorrelation(t *testing.T) {
-	// Test autocorrelation with known pattern
-	analyzer := &trendAnalyzer{}
-
-	// Create a simple pattern
-	values := []float64{1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5}
-
-	autocorr := analyzer.calculateAutocorrelation(values, 5)
-
-	// First value should be 1.0 (perfect correlation with itself)
-	assert.InDelta(t, 1.0, autocorr[0], 0.001)
-
-	// Values should be between -1 and 1
-	for _, val := range autocorr {
-		assert.True(t, val >= -1.0 && val <= 1.0)
-	}
-}
-
-func TestTrendStrengthClassification(t *testing.T) {
-	analyzer := &trendAnalyzer{}
-
-	// Test trend strength determination
-	testCases := []struct {
-		rSquared   float64
-		volatility float64
-		expected   string
-	}{
-		{0.9, 0.1, "very_strong"},
-		{0.7, 0.2, "strong"},
-		{0.5, 0.3, "moderate"},
-		{0.3, 0.5, "weak"},
-		{0.1, 0.8, "weak"},
-	}
-
-	for _, tc := range testCases {
-		strength := analyzer.determineTrendStrength(tc.rSquared, tc.volatility)
-		assert.Equal(t, tc.expected, strength)
-	}
-}
-
-func TestTrendDirectionLogic(t *testing.T) {
-	analyzer := &trendAnalyzer{}
-
-	// Test direction determination for different metrics
-	testCases := []struct {
-		slope    float64
-		metric   string
-		expected string
-	}{
-		{-5.0, "avg_latency", "improving"}, // Decreasing latency is improving
-		{5.0, "avg_latency", "degrading"},  // Increasing latency is degrading
-		{-0.02, "error_rate", "improving"}, // Decreasing error rate is improving
-		{0.02, "error_rate", "degrading"},  // Increasing error rate is degrading
-		{5.0, "throughput", "improving"},   // Increasing throughput is improving
-		{-5.0, "throughput", "degrading"},  // Decreasing throughput is degrading
-		{0.005, "avg_latency", "stable"},   // Small slope is stable
-	}
-
-	for _, tc := range testCases {
-		direction := analyzer.determineTrendDirection(tc.slope, tc.metric)
-		assert.Equal(t, tc.expected, direction)
-	}
-}
diff --git a/runner/api/grafana_api.go b/runner/api/grafana_api.go
index 7254d40..51784ed 100644
--- a/runner/api/grafana_api.go
+++ b/runner/api/grafana_api.go
@@ -13,6 +13,7 @@ import (
 
 	"github.com/sirupsen/logrus"
 
+	"github.com/jsonrpc-bench/runner/internal/sanitize"
 	"github.com/jsonrpc-bench/runner/storage"
 )
 
@@ -98,7 +99,7 @@ func (g *grafanaAPI) HandleGrafanaSearch(w http.ResponseWriter, r *http.Request)
 		return
 	}
 
-	g.log.WithField("target", SanitizeLogValue(req.Target)).Debug("Search target")
+	g.log.WithField("target", sanitize.LogValue(req.Target)).Debug("Search target")
 
 	// Get available metrics based on search target
 	metrics, err := g.getAvailableMetrics(ctx, req.Target)
@@ -124,8 +125,8 @@ func (g *grafanaAPI) HandleGrafanaQuery(w http.ResponseWriter, r *http.Request)
 	}
 
 	g.log.WithFields(logrus.Fields{
-		"from":    SanitizeLogValue(req.Range.From),
-		"to":      SanitizeLogValue(req.Range.To),
+		"from":    sanitize.LogValue(req.Range.From),
+		"to":      sanitize.LogValue(req.Range.To),
 		"targets": len(req.Targets),
 	}).Debug("Query parameters")
 
@@ -134,14 +135,14 @@ func (g *grafanaAPI) HandleGrafanaQuery(w http.ResponseWriter, r *http.Request)
 	// raw error chain.
 	fromTime, err := g.parseGrafanaTime(req.Range.From)
 	if err != nil {
-		g.log.WithField("error", SanitizeLogValue(err.Error())).Error("Failed to parse from time")
+		g.log.WithField("error", sanitize.LogValue(err.Error())).Error("Failed to parse from time")
 		g.writeGrafanaErrorResponse(w, http.StatusBadRequest, "Invalid from time format")
 		return
 	}
 
 	toTime, err := g.parseGrafanaTime(req.Range.To)
 	if err != nil {
-		g.log.WithField("error", SanitizeLogValue(err.Error())).Error("Failed to parse to time")
+		g.log.WithField("error", sanitize.LogValue(err.Error())).Error("Failed to parse to time")
 		g.writeGrafanaErrorResponse(w, http.StatusBadRequest, "Invalid to time format")
 		return
 	}
@@ -154,7 +155,7 @@ func (g *grafanaAPI) HandleGrafanaQuery(w http.ResponseWriter, r *http.Request)
 			continue
 		}
 
-		safeTarget := SanitizeLogValue(target.Target)
+		safeTarget := sanitize.LogValue(target.Target)
 		g.log.WithField("target", safeTarget).Debug("Processing target")
 
 		// Parse target to extract metric information
@@ -169,7 +170,7 @@ func (g *grafanaAPI) HandleGrafanaQuery(w http.ResponseWriter, r *http.Request)
 		case "table":
 			tableData, err := g.queryTableData(ctx, metricInfo, fromTime, toTime)
 			if err != nil {
-				g.log.WithError(err).WithField("target", safeTarget).Error("Failed to query table data")
+				g.log.WithError(sanitize.LogError(err)).WithField("target", safeTarget).Error("Failed to query table data")
 				continue
 			}
 			response = append(response, tableData)
@@ -177,7 +178,7 @@ func (g *grafanaAPI) HandleGrafanaQuery(w http.ResponseWriter, r *http.Request)
 		default: // time series
 			timeSeriesData, err := g.queryTimeSeriesData(ctx, metricInfo, fromTime, toTime)
 			if err != nil {
-				g.log.WithError(err).WithField("target", safeTarget).Error("Failed to query time series data")
+				g.log.WithError(sanitize.LogError(err)).WithField("target", safeTarget).Error("Failed to query time series data")
 				continue
 			}
 			response = append(response, timeSeriesData)
@@ -203,14 +204,14 @@ func (g *grafanaAPI) HandleGrafanaAnnotations(w http.ResponseWriter, r *http.Req
 	// the formatted error string instead of using WithError.
 	fromTime, err := g.parseGrafanaTime(req.Range.From)
 	if err != nil {
-		g.log.WithField("error", SanitizeLogValue(err.Error())).Error("Failed to parse from time")
+		g.log.WithField("error", sanitize.LogValue(err.Error())).Error("Failed to parse from time")
 		g.writeGrafanaErrorResponse(w, http.StatusBadRequest, "Invalid from time format")
 		return
 	}
 
 	toTime, err := g.parseGrafanaTime(req.Range.To)
 	if err != nil {
-		g.log.WithField("error", SanitizeLogValue(err.Error())).Error("Failed to parse to time")
+		g.log.WithField("error", sanitize.LogValue(err.Error())).Error("Failed to parse to time")
 		g.writeGrafanaErrorResponse(w, http.StatusBadRequest, "Invalid to time format")
 		return
 	}
diff --git a/runner/api/grafana_api_test.go b/runner/api/grafana_api_test.go
deleted file mode 100644
index d4da055..0000000
--- a/runner/api/grafana_api_test.go
+++ /dev/null
@@ -1,1093 +0,0 @@
-package api
-
-import (
-	"bytes"
-	"context"
-	"database/sql"
-	"encoding/json"
-	"fmt"
-	"net/http"
-	"net/http/httptest"
-	"testing"
-	"time"
-
-	"github.com/sirupsen/logrus"
-	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/mock"
-	"github.com/stretchr/testify/require"
-
-	"github.com/jsonrpc-bench/runner/storage"
-	"github.com/jsonrpc-bench/runner/types"
-)
-
-// Helper function to setup Grafana API for testing
-func setupGrafanaAPI() (*grafanaAPI, *MockHistoricStorage, *MockDB) {
-	log := logrus.New()
-	log.SetLevel(logrus.ErrorLevel) // Reduce noise in tests
-
-	storage := &MockHistoricStorage{}
-	db := &MockDB{}
-
-	api := &grafanaAPI{
-		storage: storage,
-		db:      db,
-		log:     log.WithField("component", "grafana-api"),
-	}
-
-	return api, storage, db
-}
-
-// Test Grafana API creation and lifecycle
-
-func TestNewGrafanaAPI(t *testing.T) {
-	log := logrus.New()
-	storage := &MockHistoricStorage{}
-	db := &MockDB{}
-
-	api := NewGrafanaAPI(storage, db, log)
-
-	assert.NotNil(t, api)
-	assert.Implements(t, (*GrafanaAPI)(nil), api)
-}
-
-func TestGrafanaAPIStartStop(t *testing.T) {
-	api, _, _ := setupGrafanaAPI()
-
-	ctx := context.Background()
-
-	// Test Start
-	err := api.Start(ctx)
-	assert.NoError(t, err)
-
-	// Test Stop
-	err = api.Stop()
-	assert.NoError(t, err)
-}
-
-// Test connection endpoint
-
-func TestHandleGrafanaTestConnection(t *testing.T) {
-	tests := []struct {
-		name           string
-		dbPingError    error
-		expectedStatus int
-		expectedBody   string
-	}{
-		{
-			name:           "successful connection",
-			dbPingError:    nil,
-			expectedStatus: http.StatusOK,
-			expectedBody:   "success",
-		},
-		{
-			name:           "database connection failed",
-			dbPingError:    fmt.Errorf("connection failed"),
-			expectedStatus: http.StatusServiceUnavailable,
-			expectedBody:   "Database connection failed",
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			api, _, mockDB := setupGrafanaAPI()
-
-			mockDB.On("Ping").Return(tt.dbPingError)
-
-			req := httptest.NewRequest("GET", "/grafana/", nil)
-			w := httptest.NewRecorder()
-
-			api.HandleGrafanaTestConnection(w, req)
-
-			assert.Equal(t, tt.expectedStatus, w.Code)
-			assert.Contains(t, w.Body.String(), tt.expectedBody)
-
-			mockDB.AssertExpectations(t)
-		})
-	}
-}
-
-// Test search endpoint
-
-func TestHandleGrafanaSearch(t *testing.T) {
-	tests := []struct {
-		name           string
-		requestBody    interface{}
-		mockRows       []string
-		dbError        error
-		expectedStatus int
-		expectedCount  int
-	}{
-		{
-			name: "successful search",
-			requestBody: GrafanaSearchRequest{
-				Target: "test",
-			},
-			mockRows:       []string{"test-benchmark", "another-test"},
-			dbError:        nil,
-			expectedStatus: http.StatusOK,
-			expectedCount:  8, // 2 tests * 4 metric types
-		},
-		{
-			name: "empty search",
-			requestBody: GrafanaSearchRequest{
-				Target: "",
-			},
-			mockRows:       []string{"test-benchmark"},
-			dbError:        nil,
-			expectedStatus: http.StatusOK,
-			expectedCount:  4, // 1 test * 4 metric types
-		},
-		{
-			name:           "invalid JSON",
-			requestBody:    "invalid json",
-			mockRows:       nil,
-			dbError:        nil,
-			expectedStatus: http.StatusBadRequest,
-			expectedCount:  0,
-		},
-		{
-			name: "database error",
-			requestBody: GrafanaSearchRequest{
-				Target: "test",
-			},
-			mockRows:       nil,
-			dbError:        fmt.Errorf("database error"),
-			expectedStatus: http.StatusInternalServerError,
-			expectedCount:  0,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			api, _, mockDB := setupGrafanaAPI()
-
-			// Setup database mock expectations
-			if tt.dbError == nil && tt.mockRows != nil {
-				// Mock test names query
-				rows := &MockRows{
-					data: make([][]interface{}, len(tt.mockRows)),
-				}
-				for i, testName := range tt.mockRows {
-					rows.data[i] = []interface{}{testName}
-				}
-				mockDB.On("QueryContext", mock.Anything, mock.MatchedBy(func(query string) bool {
-					return query == "SELECT DISTINCT test_name FROM historic_runs ORDER BY test_name"
-				})).Return(rows, nil)
-
-				// Mock client names query
-				clientRows := &MockRows{
-					data: [][]interface{}{
-						{"geth"},
-						{"besu"},
-					},
-				}
-				mockDB.On("QueryContext", mock.Anything, mock.MatchedBy(func(query string) bool {
-					return query != "SELECT DISTINCT test_name FROM historic_runs ORDER BY test_name"
-				}), mock.Anything).Return(clientRows, nil)
-			} else if tt.dbError != nil {
-				mockDB.On("QueryContext", mock.Anything, mock.AnythingOfType("string")).Return(nil, tt.dbError)
-			}
-
-			var body []byte
-			if req, ok := tt.requestBody.(GrafanaSearchRequest); ok {
-				body, _ = json.Marshal(req)
-			} else {
-				body = []byte(tt.requestBody.(string))
-			}
-
-			req := httptest.NewRequest("POST", "/grafana/search", bytes.NewReader(body))
-			req.Header.Set("Content-Type", "application/json")
-			w := httptest.NewRecorder()
-
-			api.HandleGrafanaSearch(w, req)
-
-			assert.Equal(t, tt.expectedStatus, w.Code)
-
-			if tt.expectedStatus == http.StatusOK {
-				var metrics []string
-				err := json.Unmarshal(w.Body.Bytes(), &metrics)
-				require.NoError(t, err)
-				assert.Len(t, metrics, tt.expectedCount)
-			}
-
-			mockDB.AssertExpectations(t)
-		})
-	}
-}
-
-// Test query endpoint
-
-func TestHandleGrafanaQuery(t *testing.T) {
-	tests := []struct {
-		name           string
-		requestBody    interface{}
-		mockData       [][]interface{}
-		dbError        error
-		expectedStatus int
-		expectedCount  int
-	}{
-		{
-			name: "successful time series query",
-			requestBody: GrafanaQueryRequest{
-				Range: GrafanaRange{
-					From: "2023-01-01T00:00:00Z",
-					To:   "2023-01-02T00:00:00Z",
-				},
-				Targets: []GrafanaTarget{
-					{
-						Target: "test-benchmark.overall.avg_latency",
-						Type:   "timeserie",
-					},
-				},
-			},
-			mockData: [][]interface{}{
-				{time.Date(2023, 1, 1, 12, 0, 0, 0, time.UTC), 45.5},
-				{time.Date(2023, 1, 1, 13, 0, 0, 0, time.UTC), 47.2},
-			},
-			dbError:        nil,
-			expectedStatus: http.StatusOK,
-			expectedCount:  1, // 1 target
-		},
-		{
-			name: "successful table query",
-			requestBody: GrafanaQueryRequest{
-				Range: GrafanaRange{
-					From: "2023-01-01T00:00:00Z",
-					To:   "2023-01-02T00:00:00Z",
-				},
-				Targets: []GrafanaTarget{
-					{
-						Target: "test-benchmark.overall.avg_latency",
-						Type:   "table",
-					},
-				},
-			},
-			mockData: [][]interface{}{
-				{
-					time.Date(2023, 1, 1, 12, 0, 0, 0, time.UTC),
-					"test-benchmark", "abc123", "main",
-					int64(1000), int64(10), 0.01,
-					45.5, 89.2, 125.8, "geth",
-				},
-			},
-			dbError:        nil,
-			expectedStatus: http.StatusOK,
-			expectedCount:  1,
-		},
-		{
-			name:           "invalid JSON",
-			requestBody:    "invalid json",
-			mockData:       nil,
-			dbError:        nil,
-			expectedStatus: http.StatusBadRequest,
-			expectedCount:  0,
-		},
-		{
-			name: "invalid time format",
-			requestBody: GrafanaQueryRequest{
-				Range: GrafanaRange{
-					From: "invalid-time",
-					To:   "2023-01-02T00:00:00Z",
-				},
-				Targets: []GrafanaTarget{
-					{Target: "test.overall.avg_latency"},
-				},
-			},
-			mockData:       nil,
-			dbError:        nil,
-			expectedStatus: http.StatusBadRequest,
-			expectedCount:  0,
-		},
-		{
-			name: "database error",
-			requestBody: GrafanaQueryRequest{
-				Range: GrafanaRange{
-					From: "2023-01-01T00:00:00Z",
-					To:   "2023-01-02T00:00:00Z",
-				},
-				Targets: []GrafanaTarget{
-					{Target: "test.overall.avg_latency"},
-				},
-			},
-			mockData:       nil,
-			dbError:        fmt.Errorf("database error"),
-			expectedStatus: http.StatusOK, // Query endpoint handles individual target errors gracefully
-			expectedCount:  0,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			api, _, mockDB := setupGrafanaAPI()
-
-			// Setup database mock expectations
-			if tt.dbError == nil && tt.mockData != nil {
-				rows := &MockRows{data: tt.mockData}
-				mockDB.On("QueryContext", mock.Anything, mock.AnythingOfType("string"), mock.Anything).Return(rows, nil)
-			} else if tt.dbError != nil {
-				mockDB.On("QueryContext", mock.Anything, mock.AnythingOfType("string"), mock.Anything).Return(nil, tt.dbError)
-			}
-
-			var body []byte
-			if req, ok := tt.requestBody.(GrafanaQueryRequest); ok {
-				body, _ = json.Marshal(req)
-			} else {
-				body = []byte(tt.requestBody.(string))
-			}
-
-			req := httptest.NewRequest("POST", "/grafana/query", bytes.NewReader(body))
-			req.Header.Set("Content-Type", "application/json")
-			w := httptest.NewRecorder()
-
-			api.HandleGrafanaQuery(w, req)
-
-			assert.Equal(t, tt.expectedStatus, w.Code)
-
-			if tt.expectedStatus == http.StatusOK {
-				var response []interface{}
-				err := json.Unmarshal(w.Body.Bytes(), &response)
-				require.NoError(t, err)
-				assert.Len(t, response, tt.expectedCount)
-			}
-
-			if tt.dbError != nil || (tt.dbError == nil && tt.mockData != nil) {
-				mockDB.AssertExpectations(t)
-			}
-		})
-	}
-}
-
-// Test annotations endpoint
-
-func TestHandleGrafanaAnnotations(t *testing.T) {
-	tests := []struct {
-		name           string
-		requestBody    interface{}
-		mockData       map[string][][]interface{}
-		dbError        error
-		expectedStatus int
-		minAnnotations int
-	}{
-		{
-			name: "successful annotations query",
-			requestBody: GrafanaAnnotationRequest{
-				Range: GrafanaRange{
-					From: "2023-01-01T00:00:00Z",
-					To:   "2023-01-02T00:00:00Z",
-				},
-				Annotation: GrafanaAnnotationQuery{
-					Name: "regressions",
-				},
-			},
-			mockData: map[string][][]interface{}{
-				"regressions": {
-					{"reg1", "run1", "geth", "p95_latency", "high", time.Date(2023, 1, 1, 12, 0, 0, 0, time.UTC), "test-benchmark"},
-				},
-				"baselines": {
-					{"baseline1", "run1", "test-benchmark", time.Date(2023, 1, 1, 10, 0, 0, 0, time.UTC), "abc123"},
-				},
-				"deployments": {
-					{"run1", "test-benchmark", "abc123", "main", time.Date(2023, 1, 1, 9, 0, 0, 0, time.UTC), "geth"},
-				},
-			},
-			dbError:        nil,
-			expectedStatus: http.StatusOK,
-			minAnnotations: 1,
-		},
-		{
-			name: "all annotations types",
-			requestBody: GrafanaAnnotationRequest{
-				Range: GrafanaRange{
-					From: "2023-01-01T00:00:00Z",
-					To:   "2023-01-02T00:00:00Z",
-				},
-				Annotation: GrafanaAnnotationQuery{
-					Name: "", // Empty name should return all types
-				},
-			},
-			mockData: map[string][][]interface{}{
-				"regressions": {
-					{"reg1", "run1", "geth", "p95_latency", "high", time.Date(2023, 1, 1, 12, 0, 0, 0, time.UTC), "test-benchmark"},
-				},
-				"baselines": {
-					{"baseline1", "run1", "test-benchmark", time.Date(2023, 1, 1, 10, 0, 0, 0, time.UTC), "abc123"},
-				},
-				"deployments": {
-					{"run1", "test-benchmark", "abc123", "main", time.Date(2023, 1, 1, 9, 0, 0, 0, time.UTC), "geth"},
-				},
-			},
-			dbError:        nil,
-			expectedStatus: http.StatusOK,
-			minAnnotations: 3,
-		},
-		{
-			name:           "invalid JSON",
-			requestBody:    "invalid json",
-			mockData:       nil,
-			dbError:        nil,
-			expectedStatus: http.StatusBadRequest,
-			minAnnotations: 0,
-		},
-		{
-			name: "invalid time format",
-			requestBody: GrafanaAnnotationRequest{
-				Range: GrafanaRange{
-					From: "invalid-time",
-					To:   "2023-01-02T00:00:00Z",
-				},
-				Annotation: GrafanaAnnotationQuery{
-					Name: "regressions",
-				},
-			},
-			mockData:       nil,
-			dbError:        nil,
-			expectedStatus: http.StatusBadRequest,
-			minAnnotations: 0,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			api, _, mockDB := setupGrafanaAPI()
-
-			// Setup database mock expectations
-			if tt.dbError == nil && tt.mockData != nil {
-				// Mock regressions query
-				if data, exists := tt.mockData["regressions"]; exists {
-					rows := &MockRows{data: data}
-					mockDB.On("QueryContext", mock.Anything, mock.MatchedBy(func(query string) bool {
-						return contains(query, "regressions")
-					}), mock.Anything).Return(rows, nil)
-				}
-
-				// Mock baselines query
-				if data, exists := tt.mockData["baselines"]; exists {
-					rows := &MockRows{data: data}
-					mockDB.On("QueryContext", mock.Anything, mock.MatchedBy(func(query string) bool {
-						return contains(query, "baselines")
-					}), mock.Anything).Return(rows, nil)
-				}
-
-				// Mock deployments query
-				if data, exists := tt.mockData["deployments"]; exists {
-					rows := &MockRows{data: data}
-					mockDB.On("QueryContext", mock.Anything, mock.MatchedBy(func(query string) bool {
-						return contains(query, "historic_runs") && contains(query, "git_commit")
-					}), mock.Anything).Return(rows, nil)
-				}
-			} else if tt.dbError != nil {
-				mockDB.On("QueryContext", mock.Anything, mock.AnythingOfType("string"), mock.Anything).Return(nil, tt.dbError)
-			}
-
-			var body []byte
-			if req, ok := tt.requestBody.(GrafanaAnnotationRequest); ok {
-				body, _ = json.Marshal(req)
-			} else {
-				body = []byte(tt.requestBody.(string))
-			}
-
-			req := httptest.NewRequest("POST", "/grafana/annotations", bytes.NewReader(body))
-			req.Header.Set("Content-Type", "application/json")
-			w := httptest.NewRecorder()
-
-			api.HandleGrafanaAnnotations(w, req)
-
-			assert.Equal(t, tt.expectedStatus, w.Code)
-
-			if tt.expectedStatus == http.StatusOK {
-				var annotations []GrafanaAnnotation
-				err := json.Unmarshal(w.Body.Bytes(), &annotations)
-				require.NoError(t, err)
-				assert.GreaterOrEqual(t, len(annotations), tt.minAnnotations)
-
-				// Verify annotation structure
-				for _, annotation := range annotations {
-					assert.NotEmpty(t, annotation.Title)
-					assert.NotZero(t, annotation.Time)
-					assert.NotNil(t, annotation.Annotation)
-				}
-			}
-
-			if tt.mockData != nil || tt.dbError != nil {
-				mockDB.AssertExpectations(t)
-			}
-		})
-	}
-}
-
-// Test metrics metadata endpoint
-
-func TestHandleGrafanaMetrics(t *testing.T) {
-	api, _, _ := setupGrafanaAPI()
-
-	req := httptest.NewRequest("GET", "/grafana/metrics", nil)
-	w := httptest.NewRecorder()
-
-	api.HandleGrafanaMetrics(w, req)
-
-	assert.Equal(t, http.StatusOK, w.Code)
-
-	var metadata []MetricMetadata
-	err := json.Unmarshal(w.Body.Bytes(), &metadata)
-	require.NoError(t, err)
-
-	assert.NotEmpty(t, metadata)
-
-	// Verify base metrics are present
-	baseMetrics := []string{"avg_latency", "p95_latency", "p99_latency", "error_rate", "throughput"}
-	foundMetrics := make(map[string]bool)
-	for _, meta := range metadata {
-		for _, base := range baseMetrics {
-			if meta.Name == base {
-				foundMetrics[base] = true
-				assert.NotEmpty(t, meta.Type)
-				assert.NotEmpty(t, meta.Help)
-				assert.NotEmpty(t, meta.Unit)
-				assert.NotEmpty(t, meta.Labels)
-			}
-		}
-	}
-
-	for _, base := range baseMetrics {
-		assert.True(t, foundMetrics[base], "Base metric %s should be present", base)
-	}
-}
-
-// Test data formatting methods
-
-func TestFormatGrafanaTimeSeries(t *testing.T) {
-	api, _, _ := setupGrafanaAPI()
-
-	data := []TimeSeriesDataPoint{
-		{
-			Timestamp: time.Date(2023, 1, 1, 12, 0, 0, 0, time.UTC),
-			Value:     45.5,
-		},
-		{
-			Timestamp: time.Date(2023, 1, 1, 13, 0, 0, 0, time.UTC),
-			Value:     47.2,
-		},
-	}
-
-	result := api.FormatGrafanaTimeSeries(data, "test.metric")
-
-	assert.Equal(t, "test.metric", result.Target)
-	assert.Len(t, result.DataPoints, 2)
-
-	// Verify data point format [value, timestamp_ms]
-	assert.Equal(t, 45.5, result.DataPoints[0][0])
-	assert.Equal(t, int64(1672574400000), result.DataPoints[0][1]) // Unix timestamp in ms
-
-	assert.Equal(t, 47.2, result.DataPoints[1][0])
-	assert.Equal(t, int64(1672578000000), result.DataPoints[1][1])
-}
-
-func TestFormatGrafanaTable(t *testing.T) {
-	api, _, _ := setupGrafanaAPI()
-
-	columns := []TableColumn{
-		{Text: "Time", Type: "time"},
-		{Text: "Value", Type: "number"},
-		{Text: "Client", Type: "string"},
-	}
-
-	data := []TableRow{
-		{Values: []interface{}{1672574400000, 45.5, "geth"}},
-		{Values: []interface{}{1672578000000, 47.2, "besu"}},
-	}
-
-	result := api.FormatGrafanaTable(data, columns)
-
-	assert.Equal(t, "table", result.Type)
-	assert.Len(t, result.Columns, 3)
-	assert.Len(t, result.Rows, 2)
-
-	assert.Equal(t, "Time", result.Columns[0].Text)
-	assert.Equal(t, "time", result.Columns[0].Type)
-
-	assert.Equal(t, []interface{}{1672574400000, 45.5, "geth"}, result.Rows[0])
-	assert.Equal(t, []interface{}{1672578000000, 47.2, "besu"}, result.Rows[1])
-}
-
-// Test helper methods
-
-func TestParseMetricTarget(t *testing.T) {
-	api, _, _ := setupGrafanaAPI()
-
-	tests := []struct {
-		name     string
-		target   string
-		expected *MetricInfo
-	}{
-		{
-			name:   "basic metric",
-			target: "test-benchmark.geth.avg_latency",
-			expected: &MetricInfo{
-				OriginalTarget: "test-benchmark.geth.avg_latency",
-				TestName:       "test-benchmark",
-				Client:         "geth",
-				MetricType:     "avg_latency",
-			},
-		},
-		{
-			name:   "metric with aggregation",
-			target: "rate(test-benchmark.overall.error_rate)",
-			expected: &MetricInfo{
-				OriginalTarget: "test-benchmark.overall.error_rate",
-				TestName:       "test-benchmark",
-				Client:         "overall",
-				MetricType:     "error_rate",
-				Aggregation:    "rate",
-			},
-		},
-		{
-			name:     "invalid format",
-			target:   "invalid.format",
-			expected: nil,
-		},
-		{
-			name:     "empty target",
-			target:   "",
-			expected: nil,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			result := api.parseMetricTarget(tt.target)
-
-			if tt.expected == nil {
-				assert.Nil(t, result)
-			} else {
-				require.NotNil(t, result)
-				assert.Equal(t, tt.expected.OriginalTarget, result.OriginalTarget)
-				assert.Equal(t, tt.expected.TestName, result.TestName)
-				assert.Equal(t, tt.expected.Client, result.Client)
-				assert.Equal(t, tt.expected.MetricType, result.MetricType)
-				assert.Equal(t, tt.expected.Aggregation, result.Aggregation)
-			}
-		})
-	}
-}
-
-func TestParseGrafanaTime(t *testing.T) {
-	api, _, _ := setupGrafanaAPI()
-
-	tests := []struct {
-		name        string
-		timeStr     string
-		expectError bool
-		expected    time.Time
-	}{
-		{
-			name:        "RFC3339 format",
-			timeStr:     "2023-01-01T12:00:00Z",
-			expectError: false,
-			expected:    time.Date(2023, 1, 1, 12, 0, 0, 0, time.UTC),
-		},
-		{
-			name:        "Unix timestamp seconds",
-			timeStr:     "1672574400",
-			expectError: false,
-			expected:    time.Date(2023, 1, 1, 12, 0, 0, 0, time.UTC),
-		},
-		{
-			name:        "Unix timestamp milliseconds",
-			timeStr:     "1672574400000",
-			expectError: false,
-			expected:    time.Date(2023, 1, 1, 12, 0, 0, 0, time.UTC),
-		},
-		{
-			name:        "invalid format",
-			timeStr:     "invalid-time",
-			expectError: true,
-		},
-		{
-			name:        "empty string",
-			timeStr:     "",
-			expectError: true,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			result, err := api.parseGrafanaTime(tt.timeStr)
-
-			if tt.expectError {
-				assert.Error(t, err)
-			} else {
-				assert.NoError(t, err)
-				assert.Equal(t, tt.expected.Unix(), result.Unix())
-			}
-		})
-	}
-}
-
-func TestMatchesSearch(t *testing.T) {
-	api, _, _ := setupGrafanaAPI()
-
-	tests := []struct {
-		name     string
-		metric   string
-		search   string
-		expected bool
-	}{
-		{
-			name:     "exact match",
-			metric:   "test.geth.avg_latency",
-			search:   "test.geth.avg_latency",
-			expected: true,
-		},
-		{
-			name:     "partial match",
-			metric:   "test.geth.avg_latency",
-			search:   "geth",
-			expected: true,
-		},
-		{
-			name:     "case insensitive",
-			metric:   "test.geth.avg_latency",
-			search:   "GETH",
-			expected: true,
-		},
-		{
-			name:     "wildcard match",
-			metric:   "test.geth.avg_latency",
-			search:   "test.*latency",
-			expected: true,
-		},
-		{
-			name:     "no match",
-			metric:   "test.geth.avg_latency",
-			search:   "besu",
-			expected: false,
-		},
-		{
-			name:     "empty search matches all",
-			metric:   "test.geth.avg_latency",
-			search:   "",
-			expected: true,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			result := api.matchesSearch(tt.metric, tt.search)
-			assert.Equal(t, tt.expected, result)
-		})
-	}
-}
-
-func TestApplyAggregation(t *testing.T) {
-	api, _, _ := setupGrafanaAPI()
-
-	tests := []struct {
-		name           string
-		aggregation    string
-		value          float64
-		existingPoints [][]interface{}
-		expected       float64
-	}{
-		{
-			name:           "rate calculation",
-			aggregation:    "rate",
-			value:          100.0,
-			existingPoints: [][]interface{}{{50.0, int64(1672574400000)}},
-			expected:       50.0,
-		},
-		{
-			name:           "delta calculation",
-			aggregation:    "delta",
-			value:          75.0,
-			existingPoints: [][]interface{}{{50.0, int64(1672574400000)}},
-			expected:       25.0,
-		},
-		{
-			name:           "count aggregation",
-			aggregation:    "count",
-			value:          123.45,
-			existingPoints: [][]interface{}{},
-			expected:       1.0,
-		},
-		{
-			name:           "no aggregation",
-			aggregation:    "",
-			value:          42.5,
-			existingPoints: [][]interface{}{},
-			expected:       42.5,
-		},
-		{
-			name:           "unknown aggregation",
-			aggregation:    "unknown",
-			value:          42.5,
-			existingPoints: [][]interface{}{},
-			expected:       42.5,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			result := api.applyAggregation(tt.aggregation, tt.value, tt.existingPoints)
-			assert.Equal(t, tt.expected, result)
-		})
-	}
-}
-
-func TestGetSeverityColor(t *testing.T) {
-	api, _, _ := setupGrafanaAPI()
-
-	tests := []struct {
-		severity string
-		expected string
-	}{
-		{"critical", "red"},
-		{"major", "orange"},
-		{"minor", "yellow"},
-		{"unknown", "blue"},
-		{"", "blue"},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.severity, func(t *testing.T) {
-			result := api.getSeverityColor(tt.severity)
-			assert.Equal(t, tt.expected, result)
-		})
-	}
-}
-
-// Test response writing methods
-
-func TestWriteGrafanaResponse(t *testing.T) {
-	api, _, _ := setupGrafanaAPI()
-
-	testData := map[string]interface{}{
-		"test": "data",
-		"num":  42,
-	}
-
-	w := httptest.NewRecorder()
-	api.writeGrafanaResponse(w, http.StatusOK, testData)
-
-	assert.Equal(t, http.StatusOK, w.Code)
-	assert.Equal(t, "application/json", w.Header().Get("Content-Type"))
-	assert.Equal(t, "*", w.Header().Get("Access-Control-Allow-Origin"))
-	assert.Equal(t, "GET, POST, OPTIONS", w.Header().Get("Access-Control-Allow-Methods"))
-	assert.Equal(t, "Content-Type, Authorization", w.Header().Get("Access-Control-Allow-Headers"))
-
-	var response map[string]interface{}
-	err := json.Unmarshal(w.Body.Bytes(), &response)
-	require.NoError(t, err)
-	assert.Equal(t, "data", response["test"])
-	assert.Equal(t, float64(42), response["num"])
-}
-
-func TestWriteGrafanaErrorResponse(t *testing.T) {
-	api, _, _ := setupGrafanaAPI()
-
-	w := httptest.NewRecorder()
-	api.writeGrafanaErrorResponse(w, http.StatusBadRequest, "Test error message")
-
-	assert.Equal(t, http.StatusBadRequest, w.Code)
-	assert.Equal(t, "application/json", w.Header().Get("Content-Type"))
-
-	var response map[string]interface{}
-	err := json.Unmarshal(w.Body.Bytes(), &response)
-	require.NoError(t, err)
-	assert.Equal(t, "Test error message", response["error"])
-	assert.Equal(t, "Test error message", response["message"])
-	assert.Equal(t, float64(400), response["status"])
-}
-
-// Mock implementation for sql.Rows
-type MockRows struct {
-	data    [][]interface{}
-	index   int
-	columns []string
-	err     error
-}
-
-func (m *MockRows) Close() error {
-	return nil
-}
-
-func (m *MockRows) Columns() ([]string, error) {
-	if m.columns != nil {
-		return m.columns, nil
-	}
-	return []string{"col1", "col2", "col3", "col4", "col5", "col6", "col7"}, nil
-}
-
-func (m *MockRows) Err() error {
-	return m.err
-}
-
-func (m *MockRows) Next() bool {
-	return m.index < len(m.data)
-}
-
-func (m *MockRows) Scan(dest ...interface{}) error {
-	if m.index >= len(m.data) {
-		return fmt.Errorf("no more rows")
-	}
-
-	row := m.data[m.index]
-	m.index++
-
-	for i, val := range dest {
-		if i < len(row) {
-			switch v := dest[i].(type) {
-			case *string:
-				if str, ok := row[i].(string); ok {
-					*v = str
-				} else {
-					*v = fmt.Sprintf("%v", row[i])
-				}
-			case *time.Time:
-				if t, ok := row[i].(time.Time); ok {
-					*v = t
-				}
-			case *float64:
-				if f, ok := row[i].(float64); ok {
-					*v = f
-				} else if i, ok := row[i].(int); ok {
-					*v = float64(i)
-				}
-			case *int64:
-				if i, ok := row[i].(int64); ok {
-					*v = i
-				} else if i, ok := row[i].(int); ok {
-					*v = int64(i)
-				}
-			case *int:
-				if i, ok := row[i].(int); ok {
-					*v = i
-				} else if i, ok := row[i].(int64); ok {
-					*v = int(i)
-				}
-			}
-		}
-	}
-
-	return nil
-}
-
-// Helper function for string contains check
-func contains(s, substr string) bool {
-	return len(s) >= len(substr) && (s == substr || len(substr) == 0 ||
-		(len(substr) > 0 && len(s) > 0 &&
-			(s[:len(substr)] == substr || s[len(s)-len(substr):] == substr ||
-				(len(s) > len(substr) && findInString(s, substr)))))
-}
-
-func findInString(s, substr string) bool {
-	for i := 0; i <= len(s)-len(substr); i++ {
-		if s[i:i+len(substr)] == substr {
-			return true
-		}
-	}
-	return false
-}
-
-// Benchmark tests
-
-func BenchmarkHandleGrafanaQuery(b *testing.B) {
-	api, _, mockDB := setupGrafanaAPI()
-
-	mockData := [][]interface{}{
-		{time.Now(), 45.5},
-		{time.Now().Add(time.Minute), 47.2},
-	}
-	rows := &MockRows{data: mockData}
-	mockDB.On("QueryContext", mock.Anything, mock.AnythingOfType("string"), mock.Anything).Return(rows, nil)
-
-	requestBody := GrafanaQueryRequest{
-		Range: GrafanaRange{
-			From: "2023-01-01T00:00:00Z",
-			To:   "2023-01-02T00:00:00Z",
-		},
-		Targets: []GrafanaTarget{
-			{Target: "test.overall.avg_latency"},
-		},
-	}
-	body, _ := json.Marshal(requestBody)
-
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		req := httptest.NewRequest("POST", "/grafana/query", bytes.NewReader(body))
-		req.Header.Set("Content-Type", "application/json")
-		w := httptest.NewRecorder()
-
-		api.HandleGrafanaQuery(w, req)
-	}
-}
-
-func BenchmarkParseMetricTarget(b *testing.B) {
-	api, _, _ := setupGrafanaAPI()
-	target := "test-benchmark.geth.avg_latency"
-
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		_ = api.parseMetricTarget(target)
-	}
-}
-
-func BenchmarkFormatGrafanaTimeSeries(b *testing.B) {
-	api, _, _ := setupGrafanaAPI()
-
-	data := []TimeSeriesDataPoint{
-		{Timestamp: time.Now(), Value: 45.5},
-		{Timestamp: time.Now().Add(time.Minute), Value: 47.2},
-	}
-
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		_ = api.FormatGrafanaTimeSeries(data, "test.metric")
-	}
-}
-
-// Integration tests
-
-func TestGrafanaAPIIntegration(b *testing.T) {
-	api, mockStorage, mockDB := setupGrafanaAPI()
-
-	ctx := context.Background()
-
-	// Test lifecycle
-	err := api.Start(ctx)
-	require.NoError(b, err)
-
-	// Test connection endpoint
-	mockDB.On("Ping").Return(nil)
-
-	req := httptest.NewRequest("GET", "/grafana/", nil)
-	w := httptest.NewRecorder()
-
-	api.HandleGrafanaTestConnection(w, req)
-	assert.Equal(b, http.StatusOK, w.Code)
-
-	// Test search endpoint
-	rows := &MockRows{
-		data: [][]interface{}{
-			{"test-benchmark"},
-			{"another-test"},
-		},
-	}
-	mockDB.On("QueryContext", mock.Anything, mock.AnythingOfType("string")).Return(rows, nil)
-	mockDB.On("QueryContext", mock.Anything, mock.AnythingOfType("string"), mock.Anything).Return(&MockRows{data: [][]interface{}{{"geth"}}}, nil)
-
-	searchReq := GrafanaSearchRequest{Target: "test"}
-	body, _ := json.Marshal(searchReq)
-
-	req = httptest.NewRequest("POST", "/grafana/search", bytes.NewReader(body))
-	req.Header.Set("Content-Type", "application/json")
-	w = httptest.NewRecorder()
-
-	api.HandleGrafanaSearch(w, req)
-	assert.Equal(b, http.StatusOK, w.Code)
-
-	// Test cleanup
-	err = api.Stop()
-	require.NoError(b, err)
-
-	mockDB.AssertExpectations(b)
-}
diff --git a/runner/api/handlers.go b/runner/api/handlers.go
index ec33b8f..ff35603 100644
--- a/runner/api/handlers.go
+++ b/runner/api/handlers.go
@@ -14,6 +14,7 @@ import (
 	"github.com/sirupsen/logrus"
 
 	"github.com/jsonrpc-bench/runner/analysis"
+	"github.com/jsonrpc-bench/runner/internal/sanitize"
 	"github.com/jsonrpc-bench/runner/storage"
 	"github.com/jsonrpc-bench/runner/types"
 )
@@ -274,7 +275,7 @@ func (h *apiHandlers) HandleGetRun(w http.ResponseWriter, r *http.Request) {
 		return
 	}
 
-	h.log.WithField("run_id", runID).Debug("Handling get run request")
+	h.log.WithField("run_id", sanitize.LogValue(runID)).Debug("Handling get run request")
 
 	if runID == "" {
 		h.writeErrorResponse(w, http.StatusBadRequest, "Run ID is required")
@@ -286,7 +287,7 @@ func (h *apiHandlers) HandleGetRun(w http.ResponseWriter, r *http.Request) {
 		if strings.Contains(err.Error(), "not found") {
 			h.writeErrorResponse(w, http.StatusNotFound, "Run not found")
 		} else {
-			h.log.WithError(err).Error("Failed to get historic run")
+			h.log.WithError(sanitize.LogError(err)).Error("Failed to get historic run")
 			h.writeErrorResponse(w, http.StatusInternalServerError, "Failed to retrieve run")
 		}
 		return
@@ -322,7 +323,7 @@ func (h *apiHandlers) HandleGetRunMethods(w http.ResponseWriter, r *http.Request
 		return
 	}
 
-	h.log.WithField("run_id", runID).Debug("Getting method metrics for run")
+	h.log.WithField("run_id", sanitize.LogValue(runID)).Debug("Getting method metrics for run")
 
 	// Query method metrics directly from database
 	// Note: Using MAX aggregation to handle cases where there might be multiple metric entries
@@ -395,7 +396,7 @@ func (h *apiHandlers) HandleGetRunMethods(w http.ResponseWriter, r *http.Request
 
 	// Debug logging for total methods found
 	h.log.WithFields(logrus.Fields{
-		"run_id":       runID,
+		"run_id":       sanitize.LogValue(runID),
 		"method_count": methodCount,
 	}).Debug("Completed processing method metrics")
 
@@ -418,7 +419,7 @@ func (h *apiHandlers) HandleGetReport(w http.ResponseWriter, r *http.Request) {
 		return
 	}
 
-	h.log.WithField("run_id", runID).Debug("Handling get report request")
+	h.log.WithField("run_id", sanitize.LogValue(runID)).Debug("Handling get report request")
 
 	// Get the run
 	run, err := h.storage.GetHistoricRun(ctx, runID)
@@ -426,7 +427,7 @@ func (h *apiHandlers) HandleGetReport(w http.ResponseWriter, r *http.Request) {
 		if strings.Contains(err.Error(), "not found") {
 			h.writeErrorResponse(w, http.StatusNotFound, "Run not found")
 		} else {
-			h.log.WithError(err).Error("Failed to get historic run")
+			h.log.WithError(sanitize.LogError(err)).Error("Failed to get historic run")
 			h.writeErrorResponse(w, http.StatusInternalServerError, "Failed to retrieve run")
 		}
 		return
@@ -466,7 +467,7 @@ func (h *apiHandlers) HandleDeleteRun(w http.ResponseWriter, r *http.Request) {
 		return
 	}
 
-	h.log.WithField("run_id", runID).Info("Handling delete run request")
+	h.log.WithField("run_id", sanitize.LogValue(runID)).Info("Handling delete run request")
 
 	err := h.storage.DeleteHistoricRun(ctx, runID)
 	if err != nil {
@@ -503,8 +504,8 @@ func (h *apiHandlers) HandleCompareRuns(w http.ResponseWriter, r *http.Request)
 	}
 
 	h.log.WithFields(logrus.Fields{
-		"run_id_1": runID1,
-		"run_id_2": runID2,
+		"run_id_1": sanitize.LogValue(runID1),
+		"run_id_2": sanitize.LogValue(runID2),
 	}).Debug("Handling compare runs request")
 
 	comparison, err := h.storage.CompareRuns(ctx, runID1, runID2)
@@ -536,7 +537,7 @@ func (h *apiHandlers) HandleListBaselines(w http.ResponseWriter, r *http.Request
 		}
 	}
 
-	h.log.WithField("test_name", testName).Debug("Handling list baselines request")
+	h.log.WithField("test_name", sanitize.LogValue(testName)).Debug("Handling list baselines request")
 
 	baselines, err := h.baselineManager.ListBaselines(ctx, testName)
 	if err != nil {
@@ -577,7 +578,7 @@ func (h *apiHandlers) HandleCreateBaseline(w http.ResponseWriter, r *http.Reques
 
 	baseline, err := h.baselineManager.SetBaseline(ctx, req.RunID, req.Name, req.Description)
 	if err != nil {
-		h.log.WithError(err).Error("Failed to create baseline")
+		h.log.WithError(sanitize.LogError(err)).Error("Failed to create baseline")
 		h.writeErrorResponse(w, http.StatusInternalServerError, "Failed to create baseline")
 		return
 	}
@@ -596,14 +597,14 @@ func (h *apiHandlers) HandleGetBaseline(w http.ResponseWriter, r *http.Request)
 		return
 	}
 
-	h.log.WithField("baseline_name", baselineName).Debug("Handling get baseline request")
+	h.log.WithField("baseline_name", sanitize.LogValue(baselineName)).Debug("Handling get baseline request")
 
 	baseline, err := h.baselineManager.GetBaseline(ctx, baselineName)
 	if err != nil {
 		if strings.Contains(err.Error(), "not found") {
 			h.writeErrorResponse(w, http.StatusNotFound, "Baseline not found")
 		} else {
-			h.log.WithError(err).Error("Failed to get baseline")
+			h.log.WithError(sanitize.LogError(err)).Error("Failed to get baseline")
 			h.writeErrorResponse(w, http.StatusInternalServerError, "Failed to retrieve baseline")
 		}
 		return
@@ -623,14 +624,14 @@ func (h *apiHandlers) HandleDeleteBaseline(w http.ResponseWriter, r *http.Reques
 		return
 	}
 
-	h.log.WithField("baseline_name", baselineName).Info("Handling delete baseline request")
+	h.log.WithField("baseline_name", sanitize.LogValue(baselineName)).Info("Handling delete baseline request")
 
 	err := h.baselineManager.DeleteBaseline(ctx, baselineName)
 	if err != nil {
 		if strings.Contains(err.Error(), "not found") {
 			h.writeErrorResponse(w, http.StatusNotFound, "Baseline not found")
 		} else {
-			h.log.WithError(err).Error("Failed to delete baseline")
+			h.log.WithError(sanitize.LogError(err)).Error("Failed to delete baseline")
 			h.writeErrorResponse(w, http.StatusInternalServerError, "Failed to delete baseline")
 		}
 		return
@@ -654,7 +655,7 @@ func (h *apiHandlers) HandleSetBaseline(w http.ResponseWriter, r *http.Request)
 		return
 	}
 
-	h.log.WithField("run_id", runID).Info("Handling set baseline request")
+	h.log.WithField("run_id", sanitize.LogValue(runID)).Info("Handling set baseline request")
 
 	var req SetBaselineRequest
 	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
@@ -669,7 +670,7 @@ func (h *apiHandlers) HandleSetBaseline(w http.ResponseWriter, r *http.Request)
 
 	baseline, err := h.baselineManager.SetBaseline(ctx, runID, req.Name, req.Description)
 	if err != nil {
-		h.log.WithError(err).Error("Failed to set baseline")
+		h.log.WithError(sanitize.LogError(err)).Error("Failed to set baseline")
 		h.writeErrorResponse(w, http.StatusInternalServerError, "Failed to set baseline")
 		return
 	}
@@ -697,8 +698,8 @@ func (h *apiHandlers) HandleGetTrends(w http.ResponseWriter, r *http.Request) {
 	}
 
 	h.log.WithFields(logrus.Fields{
-		"test_name": testName,
-		"days":      SanitizeLogValue(daysStr),
+		"test_name": sanitize.LogValue(testName),
+		"days":      sanitize.LogValue(daysStr),
 	}).Debug("Handling get trends request")
 
 	days := 30 // Default
@@ -742,9 +743,9 @@ func (h *apiHandlers) HandleMethodTrends(w http.ResponseWriter, r *http.Request)
 	}
 
 	h.log.WithFields(logrus.Fields{
-		"test_name": testName,
-		"method":    method,
-		"days":      SanitizeLogValue(daysStr),
+		"test_name": sanitize.LogValue(testName),
+		"method":    sanitize.LogValue(method),
+		"days":      sanitize.LogValue(daysStr),
 	}).Debug("Handling method trends request")
 
 	days := 30 // Default
@@ -788,9 +789,9 @@ func (h *apiHandlers) HandleClientTrends(w http.ResponseWriter, r *http.Request)
 	}
 
 	h.log.WithFields(logrus.Fields{
-		"test_name": testName,
-		"client":    client,
-		"days":      SanitizeLogValue(daysStr),
+		"test_name": sanitize.LogValue(testName),
+		"client":    sanitize.LogValue(client),
+		"days":      sanitize.LogValue(daysStr),
 	}).Debug("Handling client trends request")
 
 	days := 30 // Default
@@ -828,7 +829,7 @@ func (h *apiHandlers) HandleGetRegressions(w http.ResponseWriter, r *http.Reques
 		return
 	}
 
-	h.log.WithField("run_id", runID).Debug("Handling get regressions request")
+	h.log.WithField("run_id", sanitize.LogValue(runID)).Debug("Handling get regressions request")
 
 	regressions, err := h.regressionDetector.GetRegressions(ctx, runID)
 	if err != nil {
@@ -867,7 +868,7 @@ func (h *apiHandlers) HandleDetectRegressions(w http.ResponseWriter, r *http.Req
 		return
 	}
 
-	h.log.WithField("run_id", runID).Info("Handling detect regressions request")
+	h.log.WithField("run_id", sanitize.LogValue(runID)).Info("Handling detect regressions request")
 
 	var req RegressionDetectionRequest
 	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
@@ -938,7 +939,7 @@ func (h *apiHandlers) HandleDetectRegressions(w http.ResponseWriter, r *http.Req
 
 	report, err := h.regressionDetector.DetectRegressions(ctx, runID, options)
 	if err != nil {
-		h.log.WithError(err).Error("Failed to detect regressions")
+		h.log.WithError(sanitize.LogError(err)).Error("Failed to detect regressions")
 		h.writeErrorResponse(w, http.StatusInternalServerError, "Failed to detect regressions")
 		return
 	}
@@ -957,7 +958,7 @@ func (h *apiHandlers) HandleAcknowledgeRegression(w http.ResponseWriter, r *http
 		return
 	}
 
-	h.log.WithField("regression_id", regressionID).Info("Handling acknowledge regression request")
+	h.log.WithField("regression_id", sanitize.LogValue(regressionID)).Info("Handling acknowledge regression request")
 
 	var req AcknowledgeRegressionRequest
 	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
@@ -971,14 +972,14 @@ func (h *apiHandlers) HandleAcknowledgeRegression(w http.ResponseWriter, r *http
 	}
 	// AcknowledgedBy is a free-form name; scrub control characters before
 	// it propagates into downstream log fields.
-	acknowledgedBy := SanitizeLogValue(req.AcknowledgedBy)
+	acknowledgedBy := sanitize.LogValue(req.AcknowledgedBy)
 
 	err := h.regressionDetector.AcknowledgeRegression(ctx, regressionID, acknowledgedBy)
 	if err != nil {
 		if strings.Contains(err.Error(), "not found") {
 			h.writeErrorResponse(w, http.StatusNotFound, "Regression not found")
 		} else {
-			h.log.WithError(err).Error("Failed to acknowledge regression")
+			h.log.WithError(sanitize.LogError(err)).Error("Failed to acknowledge regression")
 			h.writeErrorResponse(w, http.StatusInternalServerError, "Failed to acknowledge regression")
 		}
 		return
@@ -1004,11 +1005,11 @@ func (h *apiHandlers) HandleAnalyzeRun(w http.ResponseWriter, r *http.Request) {
 		return
 	}
 
-	h.log.WithField("run_id", runID).Debug("Handling analyze run request")
+	h.log.WithField("run_id", sanitize.LogValue(runID)).Debug("Handling analyze run request")
 
 	analysis, err := h.regressionDetector.AnalyzeRun(ctx, runID)
 	if err != nil {
-		h.log.WithError(err).Error("Failed to analyze run")
+		h.log.WithError(sanitize.LogError(err)).Error("Failed to analyze run")
 		h.writeErrorResponse(w, http.StatusInternalServerError, "Failed to analyze run")
 		return
 	}
@@ -1045,8 +1046,8 @@ func (h *apiHandlers) HandleGetMetricTrends(w http.ResponseWriter, r *http.Reque
 	}
 
 	h.log.WithFields(logrus.Fields{
-		"test_name": testName,
-		"metric":    metric,
+		"test_name": sanitize.LogValue(testName),
+		"metric":    sanitize.LogValue(metric),
 	}).Debug("Handling get metric trends request")
 
 	days := 30 // Default
@@ -1063,8 +1064,10 @@ func (h *apiHandlers) HandleGetMetricTrends(w http.ResponseWriter, r *http.Reque
 	// Create trend filter
 	since := time.Now().AddDate(0, 0, -days)
 	filter := types.TrendFilter{
-		Client: client,
-		Since:  since,
+		TestName: testName,
+		Client:   client,
+		Method:   metric,
+		Since:    since,
 	}
 
 	// Get basic trend
diff --git a/runner/api/handlers_test.go b/runner/api/handlers_test.go
deleted file mode 100644
index 008184b..0000000
--- a/runner/api/handlers_test.go
+++ /dev/null
@@ -1,1211 +0,0 @@
-package api
-
-import (
-	"bytes"
-	"context"
-	"database/sql"
-	"encoding/json"
-	"fmt"
-	"net/http"
-	"net/http/httptest"
-	"strings"
-	"testing"
-	"time"
-
-	"github.com/gorilla/mux"
-	"github.com/sirupsen/logrus"
-	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/mock"
-	"github.com/stretchr/testify/require"
-
-	"github.com/jsonrpc-bench/runner/analysis"
-	"github.com/jsonrpc-bench/runner/types"
-)
-
-// Helper function to setup test handlers
-func setupTestHandlers() (*apiHandlers, *MockHistoricStorage, *MockBaselineManager, *MockTrendAnalyzer, *MockRegressionDetector, *MockDB) {
-	log := logrus.New()
-	log.SetLevel(logrus.ErrorLevel) // Reduce noise in tests
-
-	storage := &MockHistoricStorage{}
-	baselineManager := &MockBaselineManager{}
-	trendAnalyzer := &MockTrendAnalyzer{}
-	regressionDetector := &MockRegressionDetector{}
-	db := &MockDB{}
-
-	handlers := &apiHandlers{
-		storage:            storage,
-		baselineManager:    baselineManager,
-		trendAnalyzer:      trendAnalyzer,
-		regressionDetector: regressionDetector,
-		db:                 db,
-		log:                log.WithField("component", "api-handlers"),
-	}
-
-	return handlers, storage, baselineManager, trendAnalyzer, regressionDetector, db
-}
-
-// Helper function to create mock historic run
-func createMockHistoricRun(id string) *types.HistoricRun {
-	return &types.HistoricRun{
-		ID:               id,
-		TestName:         "test-benchmark",
-		Description:      "Test benchmark description",
-		GitCommit:        "abc123def456",
-		GitBranch:        "main",
-		Timestamp:        time.Now(),
-		StartTime:        time.Now().Add(-5 * time.Minute),
-		EndTime:          time.Now(),
-		Duration:         "5m0s",
-		ClientsCount:     3,
-		EndpointsCount:   10,
-		TargetRPS:        100,
-		TotalRequests:    30000,
-		TotalErrors:      150,
-		OverallErrorRate: 0.005,
-		AvgLatencyMs:     45.5,
-		P95LatencyMs:     89.2,
-		P99LatencyMs:     125.8,
-		MaxLatencyMs:     250.0,
-		BestClient:       "geth",
-		PerformanceScores: map[string]float64{
-			"geth":       92.5,
-			"besu":       88.3,
-			"nethermind": 85.1,
-		},
-		FullResults: json.RawMessage(`{"test": "data"}`),
-		Notes:       "Test run notes",
-		CreatedAt:   time.Now(),
-		UpdatedAt:   time.Now(),
-	}
-}
-
-// Test APIHandlers creation and lifecycle
-
-func TestNewAPIHandlers(t *testing.T) {
-	handlers, storage, baselineManager, trendAnalyzer, regressionDetector, db := setupTestHandlers()
-
-	assert.NotNil(t, handlers)
-	assert.Equal(t, storage, handlers.storage)
-	assert.Equal(t, baselineManager, handlers.baselineManager)
-	assert.Equal(t, trendAnalyzer, handlers.trendAnalyzer)
-	assert.Equal(t, regressionDetector, handlers.regressionDetector)
-	assert.Equal(t, db, handlers.db)
-	assert.NotNil(t, handlers.log)
-}
-
-func TestAPIHandlersStartStop(t *testing.T) {
-	handlers, _, _, _, _, _ := setupTestHandlers()
-
-	ctx := context.Background()
-
-	// Test Start
-	err := handlers.Start(ctx)
-	assert.NoError(t, err)
-
-	// Test Stop
-	err = handlers.Stop()
-	assert.NoError(t, err)
-}
-
-// Test Historic Runs Handlers
-
-func TestHandleListRuns(t *testing.T) {
-	tests := []struct {
-		name           string
-		queryParams    string
-		mockRuns       []*types.HistoricRun
-		mockError      error
-		expectedStatus int
-		expectedCount  int
-	}{
-		{
-			name:        "successful list with no filters",
-			queryParams: "",
-			mockRuns: []*types.HistoricRun{
-				createMockHistoricRun("run1"),
-				createMockHistoricRun("run2"),
-			},
-			mockError:      nil,
-			expectedStatus: http.StatusOK,
-			expectedCount:  2,
-		},
-		{
-			name:        "successful list with test filter",
-			queryParams: "?test=specific-test&limit=10",
-			mockRuns: []*types.HistoricRun{
-				createMockHistoricRun("run1"),
-			},
-			mockError:      nil,
-			expectedStatus: http.StatusOK,
-			expectedCount:  1,
-		},
-		{
-			name:           "storage error",
-			queryParams:    "",
-			mockRuns:       nil,
-			mockError:      fmt.Errorf("storage error"),
-			expectedStatus: http.StatusInternalServerError,
-			expectedCount:  0,
-		},
-		{
-			name:           "empty result",
-			queryParams:    "",
-			mockRuns:       []*types.HistoricRun{},
-			mockError:      nil,
-			expectedStatus: http.StatusOK,
-			expectedCount:  0,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			handlers, mockStorage, _, _, _, mockDB := setupTestHandlers()
-
-			// Setup expectations
-			mockStorage.On("ListHistoricRuns", mock.Anything, mock.AnythingOfType("string"), mock.AnythingOfType("int")).Return(tt.mockRuns, tt.mockError)
-			if tt.mockError == nil {
-				mockDB.On("QueryRowContext", mock.Anything, mock.AnythingOfType("string"), mock.Anything).Return(&sql.Row{})
-			}
-
-			req := httptest.NewRequest("GET", "/api/runs"+tt.queryParams, nil)
-			w := httptest.NewRecorder()
-
-			handlers.HandleListRuns(w, req)
-
-			assert.Equal(t, tt.expectedStatus, w.Code)
-
-			if tt.expectedStatus == http.StatusOK {
-				var response map[string]interface{}
-				err := json.Unmarshal(w.Body.Bytes(), &response)
-				require.NoError(t, err)
-
-				runs, ok := response["runs"].([]interface{})
-				require.True(t, ok)
-				assert.Len(t, runs, tt.expectedCount)
-
-				assert.Contains(t, response, "count")
-				assert.Contains(t, response, "limit")
-				assert.Contains(t, response, "offset")
-			}
-
-			mockStorage.AssertExpectations(t)
-		})
-	}
-}
-
-func TestHandleGetRun(t *testing.T) {
-	tests := []struct {
-		name           string
-		runID          string
-		mockRun        *types.HistoricRun
-		mockError      error
-		expectedStatus int
-	}{
-		{
-			name:           "successful get",
-			runID:          "test-run-1",
-			mockRun:        createMockHistoricRun("test-run-1"),
-			mockError:      nil,
-			expectedStatus: http.StatusOK,
-		},
-		{
-			name:           "run not found",
-			runID:          "nonexistent",
-			mockRun:        nil,
-			mockError:      fmt.Errorf("run not found"),
-			expectedStatus: http.StatusNotFound,
-		},
-		{
-			name:           "storage error",
-			runID:          "test-run-1",
-			mockRun:        nil,
-			mockError:      fmt.Errorf("database connection failed"),
-			expectedStatus: http.StatusInternalServerError,
-		},
-		{
-			name:           "empty run ID",
-			runID:          "",
-			mockRun:        nil,
-			mockError:      nil,
-			expectedStatus: http.StatusBadRequest,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			handlers, mockStorage, _, _, _, _ := setupTestHandlers()
-
-			if tt.runID != "" {
-				mockStorage.On("GetHistoricRun", mock.Anything, tt.runID).Return(tt.mockRun, tt.mockError)
-			}
-
-			req := httptest.NewRequest("GET", "/api/runs/"+tt.runID, nil)
-			req = mux.SetURLVars(req, map[string]string{"runId": tt.runID})
-			w := httptest.NewRecorder()
-
-			handlers.HandleGetRun(w, req)
-
-			assert.Equal(t, tt.expectedStatus, w.Code)
-
-			if tt.expectedStatus == http.StatusOK {
-				var run types.HistoricRun
-				err := json.Unmarshal(w.Body.Bytes(), &run)
-				require.NoError(t, err)
-				assert.Equal(t, tt.mockRun.ID, run.ID)
-				assert.Equal(t, tt.mockRun.TestName, run.TestName)
-			}
-
-			mockStorage.AssertExpectations(t)
-		})
-	}
-}
-
-func TestHandleGetRunWithClientFilter(t *testing.T) {
-	handlers, mockStorage, _, _, _, _ := setupTestHandlers()
-
-	mockRun := createMockHistoricRun("test-run-1")
-	mockStorage.On("GetHistoricRun", mock.Anything, "test-run-1").Return(mockRun, nil)
-
-	req := httptest.NewRequest("GET", "/api/runs/test-run-1?client=geth", nil)
-	req = mux.SetURLVars(req, map[string]string{"runId": "test-run-1"})
-	w := httptest.NewRecorder()
-
-	handlers.HandleGetRun(w, req)
-
-	assert.Equal(t, http.StatusOK, w.Code)
-
-	var run types.HistoricRun
-	err := json.Unmarshal(w.Body.Bytes(), &run)
-	require.NoError(t, err)
-	assert.Equal(t, "test-run-1", run.ID)
-
-	mockStorage.AssertExpectations(t)
-}
-
-func TestHandleDeleteRun(t *testing.T) {
-	tests := []struct {
-		name           string
-		runID          string
-		mockError      error
-		expectedStatus int
-	}{
-		{
-			name:           "successful delete",
-			runID:          "test-run-1",
-			mockError:      nil,
-			expectedStatus: http.StatusOK,
-		},
-		{
-			name:           "run not found",
-			runID:          "nonexistent",
-			mockError:      fmt.Errorf("run not found"),
-			expectedStatus: http.StatusNotFound,
-		},
-		{
-			name:           "storage error",
-			runID:          "test-run-1",
-			mockError:      fmt.Errorf("database error"),
-			expectedStatus: http.StatusInternalServerError,
-		},
-		{
-			name:           "empty run ID",
-			runID:          "",
-			mockError:      nil,
-			expectedStatus: http.StatusBadRequest,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			handlers, mockStorage, _, _, _, _ := setupTestHandlers()
-
-			if tt.runID != "" {
-				mockStorage.On("DeleteHistoricRun", mock.Anything, tt.runID).Return(tt.mockError)
-			}
-
-			req := httptest.NewRequest("DELETE", "/api/runs/"+tt.runID, nil)
-			req = mux.SetURLVars(req, map[string]string{"runId": tt.runID})
-			w := httptest.NewRecorder()
-
-			handlers.HandleDeleteRun(w, req)
-
-			assert.Equal(t, tt.expectedStatus, w.Code)
-
-			if tt.expectedStatus == http.StatusOK {
-				var response map[string]interface{}
-				err := json.Unmarshal(w.Body.Bytes(), &response)
-				require.NoError(t, err)
-				assert.Equal(t, "success", response["status"])
-				assert.Equal(t, tt.runID, response["run_id"])
-			}
-
-			mockStorage.AssertExpectations(t)
-		})
-	}
-}
-
-func TestHandleCompareRuns(t *testing.T) {
-	tests := []struct {
-		name           string
-		runID1         string
-		runID2         string
-		mockComparison *types.HistoricComparison
-		mockError      error
-		expectedStatus int
-	}{
-		{
-			name:   "successful comparison",
-			runID1: "run1",
-			runID2: "run2",
-			mockComparison: &types.HistoricComparison{
-				RunID1:     "run1",
-				RunID2:     "run2",
-				Summary:    "Run 2 performed 5% better than run 1",
-				Timestamp1: time.Now().Add(-1 * time.Hour),
-				Timestamp2: time.Now(),
-			},
-			mockError:      nil,
-			expectedStatus: http.StatusOK,
-		},
-		{
-			name:           "storage error",
-			runID1:         "run1",
-			runID2:         "run2",
-			mockComparison: nil,
-			mockError:      fmt.Errorf("comparison failed"),
-			expectedStatus: http.StatusInternalServerError,
-		},
-		{
-			name:           "missing run ID",
-			runID1:         "run1",
-			runID2:         "",
-			mockComparison: nil,
-			mockError:      nil,
-			expectedStatus: http.StatusBadRequest,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			handlers, mockStorage, _, _, _, _ := setupTestHandlers()
-
-			if tt.runID1 != "" && tt.runID2 != "" {
-				mockStorage.On("CompareRuns", mock.Anything, tt.runID1, tt.runID2).Return(tt.mockComparison, tt.mockError)
-			}
-
-			req := httptest.NewRequest("GET", "/api/runs/"+tt.runID1+"/compare/"+tt.runID2, nil)
-			req = mux.SetURLVars(req, map[string]string{"runId1": tt.runID1, "runId2": tt.runID2})
-			w := httptest.NewRecorder()
-
-			handlers.HandleCompareRuns(w, req)
-
-			assert.Equal(t, tt.expectedStatus, w.Code)
-
-			if tt.expectedStatus == http.StatusOK {
-				var comparison types.HistoricComparison
-				err := json.Unmarshal(w.Body.Bytes(), &comparison)
-				require.NoError(t, err)
-				assert.Equal(t, tt.mockComparison.RunID1, comparison.RunID1)
-				assert.Equal(t, tt.mockComparison.RunID2, comparison.RunID2)
-			}
-
-			mockStorage.AssertExpectations(t)
-		})
-	}
-}
-
-// Test Baseline Management Handlers
-
-func TestHandleListBaselines(t *testing.T) {
-	tests := []struct {
-		name           string
-		testName       string
-		mockBaselines  []*types.Baseline
-		mockError      error
-		expectedStatus int
-		expectedCount  int
-	}{
-		{
-			name:     "successful list",
-			testName: "test-benchmark",
-			mockBaselines: []*types.Baseline{
-				{Name: "baseline1", TestName: "test-benchmark"},
-				{Name: "baseline2", TestName: "test-benchmark"},
-			},
-			mockError:      nil,
-			expectedStatus: http.StatusOK,
-			expectedCount:  2,
-		},
-		{
-			name:           "baseline manager error",
-			testName:       "test-benchmark",
-			mockBaselines:  nil,
-			mockError:      fmt.Errorf("baseline error"),
-			expectedStatus: http.StatusInternalServerError,
-			expectedCount:  0,
-		},
-		{
-			name:           "empty result",
-			testName:       "test-benchmark",
-			mockBaselines:  []*types.Baseline{},
-			mockError:      nil,
-			expectedStatus: http.StatusOK,
-			expectedCount:  0,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			handlers, _, mockBaselineManager, _, _, _ := setupTestHandlers()
-
-			mockBaselineManager.On("ListBaselines", mock.Anything, tt.testName).Return(tt.mockBaselines, tt.mockError)
-
-			req := httptest.NewRequest("GET", "/api/baselines?test="+tt.testName, nil)
-			w := httptest.NewRecorder()
-
-			handlers.HandleListBaselines(w, req)
-
-			assert.Equal(t, tt.expectedStatus, w.Code)
-
-			if tt.expectedStatus == http.StatusOK {
-				var response map[string]interface{}
-				err := json.Unmarshal(w.Body.Bytes(), &response)
-				require.NoError(t, err)
-
-				baselines, ok := response["baselines"].([]interface{})
-				require.True(t, ok)
-				assert.Len(t, baselines, tt.expectedCount)
-			}
-
-			mockBaselineManager.AssertExpectations(t)
-		})
-	}
-}
-
-func TestHandleCreateBaseline(t *testing.T) {
-	tests := []struct {
-		name           string
-		requestBody    interface{}
-		mockBaseline   *types.Baseline
-		mockError      error
-		expectedStatus int
-	}{
-		{
-			name: "successful creation",
-			requestBody: CreateBaselineRequest{
-				RunID:       "test-run-1",
-				Name:        "test-baseline",
-				Description: "Test baseline description",
-			},
-			mockBaseline: &types.Baseline{
-				Name:        "test-baseline",
-				RunID:       "test-run-1",
-				Description: "Test baseline description",
-				TestName:    "test-benchmark",
-			},
-			mockError:      nil,
-			expectedStatus: http.StatusCreated,
-		},
-		{
-			name: "missing required fields",
-			requestBody: CreateBaselineRequest{
-				RunID: "test-run-1",
-				// Missing Name
-			},
-			mockBaseline:   nil,
-			mockError:      nil,
-			expectedStatus: http.StatusBadRequest,
-		},
-		{
-			name: "baseline manager error",
-			requestBody: CreateBaselineRequest{
-				RunID:       "test-run-1",
-				Name:        "test-baseline",
-				Description: "Test baseline description",
-			},
-			mockBaseline:   nil,
-			mockError:      fmt.Errorf("baseline creation failed"),
-			expectedStatus: http.StatusInternalServerError,
-		},
-		{
-			name:           "invalid JSON",
-			requestBody:    "invalid json",
-			mockBaseline:   nil,
-			mockError:      nil,
-			expectedStatus: http.StatusBadRequest,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			handlers, _, mockBaselineManager, _, _, _ := setupTestHandlers()
-
-			var body []byte
-			if req, ok := tt.requestBody.(CreateBaselineRequest); ok {
-				body, _ = json.Marshal(req)
-				if req.RunID != "" && req.Name != "" {
-					mockBaselineManager.On("SetBaseline", mock.Anything, req.RunID, req.Name, req.Description).Return(tt.mockBaseline, tt.mockError)
-				}
-			} else {
-				body = []byte(tt.requestBody.(string))
-			}
-
-			req := httptest.NewRequest("POST", "/api/baselines", bytes.NewReader(body))
-			req.Header.Set("Content-Type", "application/json")
-			w := httptest.NewRecorder()
-
-			handlers.HandleCreateBaseline(w, req)
-
-			assert.Equal(t, tt.expectedStatus, w.Code)
-
-			if tt.expectedStatus == http.StatusCreated {
-				var baseline types.Baseline
-				err := json.Unmarshal(w.Body.Bytes(), &baseline)
-				require.NoError(t, err)
-				assert.Equal(t, tt.mockBaseline.Name, baseline.Name)
-			}
-
-			mockBaselineManager.AssertExpectations(t)
-		})
-	}
-}
-
-func TestHandleGetBaseline(t *testing.T) {
-	tests := []struct {
-		name           string
-		baselineName   string
-		mockBaseline   *types.Baseline
-		mockError      error
-		expectedStatus int
-	}{
-		{
-			name:         "successful get",
-			baselineName: "test-baseline",
-			mockBaseline: &types.Baseline{
-				Name:        "test-baseline",
-				RunID:       "test-run-1",
-				Description: "Test baseline",
-				TestName:    "test-benchmark",
-			},
-			mockError:      nil,
-			expectedStatus: http.StatusOK,
-		},
-		{
-			name:           "baseline not found",
-			baselineName:   "nonexistent",
-			mockBaseline:   nil,
-			mockError:      fmt.Errorf("baseline not found"),
-			expectedStatus: http.StatusNotFound,
-		},
-		{
-			name:           "empty baseline name",
-			baselineName:   "",
-			mockBaseline:   nil,
-			mockError:      nil,
-			expectedStatus: http.StatusBadRequest,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			handlers, _, mockBaselineManager, _, _, _ := setupTestHandlers()
-
-			if tt.baselineName != "" {
-				mockBaselineManager.On("GetBaseline", mock.Anything, tt.baselineName).Return(tt.mockBaseline, tt.mockError)
-			}
-
-			req := httptest.NewRequest("GET", "/api/baselines/"+tt.baselineName, nil)
-			req = mux.SetURLVars(req, map[string]string{"baselineName": tt.baselineName})
-			w := httptest.NewRecorder()
-
-			handlers.HandleGetBaseline(w, req)
-
-			assert.Equal(t, tt.expectedStatus, w.Code)
-
-			if tt.expectedStatus == http.StatusOK {
-				var baseline types.Baseline
-				err := json.Unmarshal(w.Body.Bytes(), &baseline)
-				require.NoError(t, err)
-				assert.Equal(t, tt.mockBaseline.Name, baseline.Name)
-			}
-
-			mockBaselineManager.AssertExpectations(t)
-		})
-	}
-}
-
-func TestHandleDeleteBaseline(t *testing.T) {
-	tests := []struct {
-		name           string
-		baselineName   string
-		mockError      error
-		expectedStatus int
-	}{
-		{
-			name:           "successful delete",
-			baselineName:   "test-baseline",
-			mockError:      nil,
-			expectedStatus: http.StatusOK,
-		},
-		{
-			name:           "baseline not found",
-			baselineName:   "nonexistent",
-			mockError:      fmt.Errorf("baseline not found"),
-			expectedStatus: http.StatusNotFound,
-		},
-		{
-			name:           "empty baseline name",
-			baselineName:   "",
-			mockError:      nil,
-			expectedStatus: http.StatusBadRequest,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			handlers, _, mockBaselineManager, _, _, _ := setupTestHandlers()
-
-			if tt.baselineName != "" {
-				mockBaselineManager.On("DeleteBaseline", mock.Anything, tt.baselineName).Return(tt.mockError)
-			}
-
-			req := httptest.NewRequest("DELETE", "/api/baselines/"+tt.baselineName, nil)
-			req = mux.SetURLVars(req, map[string]string{"baselineName": tt.baselineName})
-			w := httptest.NewRecorder()
-
-			handlers.HandleDeleteBaseline(w, req)
-
-			assert.Equal(t, tt.expectedStatus, w.Code)
-
-			if tt.expectedStatus == http.StatusOK {
-				var response map[string]interface{}
-				err := json.Unmarshal(w.Body.Bytes(), &response)
-				require.NoError(t, err)
-				assert.Equal(t, "success", response["status"])
-			}
-
-			mockBaselineManager.AssertExpectations(t)
-		})
-	}
-}
-
-// Test Trend Analysis Handlers
-
-func TestHandleGetTrends(t *testing.T) {
-	tests := []struct {
-		name           string
-		testName       string
-		days           string
-		mockTrends     *types.TrendAnalysis
-		mockError      error
-		expectedStatus int
-	}{
-		{
-			name:     "successful trends",
-			testName: "test-benchmark",
-			days:     "30",
-			mockTrends: &types.TrendAnalysis{
-				TestName: "test-benchmark",
-				Days:     30,
-				Trends:   map[string]*types.HistoricTrend{},
-			},
-			mockError:      nil,
-			expectedStatus: http.StatusOK,
-		},
-		{
-			name:           "missing test name",
-			testName:       "",
-			days:           "30",
-			mockTrends:     nil,
-			mockError:      nil,
-			expectedStatus: http.StatusBadRequest,
-		},
-		{
-			name:           "trend analyzer error",
-			testName:       "test-benchmark",
-			days:           "30",
-			mockTrends:     nil,
-			mockError:      fmt.Errorf("trend calculation failed"),
-			expectedStatus: http.StatusInternalServerError,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			handlers, _, _, mockTrendAnalyzer, _, _ := setupTestHandlers()
-
-			if tt.testName != "" {
-				expectedDays := 30
-				if tt.days != "" {
-					expectedDays = 30 // Default value used in handler
-				}
-				mockTrendAnalyzer.On("CalculateTrends", mock.Anything, tt.testName, expectedDays).Return(tt.mockTrends, tt.mockError)
-			}
-
-			url := "/api/trends?test=" + tt.testName
-			if tt.days != "" {
-				url += "&days=" + tt.days
-			}
-
-			req := httptest.NewRequest("GET", url, nil)
-			w := httptest.NewRecorder()
-
-			handlers.HandleGetTrends(w, req)
-
-			assert.Equal(t, tt.expectedStatus, w.Code)
-
-			if tt.expectedStatus == http.StatusOK {
-				var trends types.TrendAnalysis
-				err := json.Unmarshal(w.Body.Bytes(), &trends)
-				require.NoError(t, err)
-				assert.Equal(t, tt.mockTrends.TestName, trends.TestName)
-			}
-
-			mockTrendAnalyzer.AssertExpectations(t)
-		})
-	}
-}
-
-// Test Regression Detection Handlers
-
-func TestHandleDetectRegressions(t *testing.T) {
-	tests := []struct {
-		name           string
-		runID          string
-		requestBody    interface{}
-		mockReport     *types.RegressionReport
-		mockError      error
-		expectedStatus int
-	}{
-		{
-			name:  "successful detection",
-			runID: "test-run-1",
-			requestBody: RegressionDetectionRequest{
-				ComparisonMode: "sequential",
-				LookbackCount:  1,
-				WindowSize:     5,
-			},
-			mockReport: &types.RegressionReport{
-				RunID:       "test-run-1",
-				Regressions: []*types.Regression{},
-				Summary:     "No regressions detected",
-			},
-			mockError:      nil,
-			expectedStatus: http.StatusOK,
-		},
-		{
-			name:           "missing run ID",
-			runID:          "",
-			requestBody:    RegressionDetectionRequest{},
-			mockReport:     nil,
-			mockError:      nil,
-			expectedStatus: http.StatusBadRequest,
-		},
-		{
-			name:  "detection error",
-			runID: "test-run-1",
-			requestBody: RegressionDetectionRequest{
-				ComparisonMode: "sequential",
-			},
-			mockReport:     nil,
-			mockError:      fmt.Errorf("detection failed"),
-			expectedStatus: http.StatusInternalServerError,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			handlers, _, _, _, mockRegressionDetector, _ := setupTestHandlers()
-
-			var body []byte
-			if req, ok := tt.requestBody.(RegressionDetectionRequest); ok {
-				body, _ = json.Marshal(req)
-				if tt.runID != "" {
-					mockRegressionDetector.On("DetectRegressions", mock.Anything, tt.runID, mock.AnythingOfType("analysis.DetectionOptions")).Return(tt.mockReport, tt.mockError)
-				}
-			}
-
-			req := httptest.NewRequest("POST", "/api/runs/"+tt.runID+"/regressions", bytes.NewReader(body))
-			req = mux.SetURLVars(req, map[string]string{"runId": tt.runID})
-			req.Header.Set("Content-Type", "application/json")
-			w := httptest.NewRecorder()
-
-			handlers.HandleDetectRegressions(w, req)
-
-			assert.Equal(t, tt.expectedStatus, w.Code)
-
-			if tt.expectedStatus == http.StatusOK {
-				var report types.RegressionReport
-				err := json.Unmarshal(w.Body.Bytes(), &report)
-				require.NoError(t, err)
-				assert.Equal(t, tt.mockReport.RunID, report.RunID)
-			}
-
-			mockRegressionDetector.AssertExpectations(t)
-		})
-	}
-}
-
-func TestHandleGetRegressions(t *testing.T) {
-	tests := []struct {
-		name            string
-		runID           string
-		severityFilter  string
-		mockRegressions []*types.Regression
-		mockError       error
-		expectedStatus  int
-		expectedCount   int
-	}{
-		{
-			name:  "successful get with no filter",
-			runID: "test-run-1",
-			mockRegressions: []*types.Regression{
-				{ID: "reg1", RunID: "test-run-1", Severity: "high"},
-				{ID: "reg2", RunID: "test-run-1", Severity: "low"},
-			},
-			mockError:      nil,
-			expectedStatus: http.StatusOK,
-			expectedCount:  2,
-		},
-		{
-			name:           "successful get with severity filter",
-			runID:          "test-run-1",
-			severityFilter: "high",
-			mockRegressions: []*types.Regression{
-				{ID: "reg1", RunID: "test-run-1", Severity: "high"},
-				{ID: "reg2", RunID: "test-run-1", Severity: "low"},
-			},
-			mockError:      nil,
-			expectedStatus: http.StatusOK,
-			expectedCount:  1, // Only high severity should be returned
-		},
-		{
-			name:            "missing run ID",
-			runID:           "",
-			mockRegressions: nil,
-			mockError:       nil,
-			expectedStatus:  http.StatusBadRequest,
-			expectedCount:   0,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			handlers, _, _, _, mockRegressionDetector, _ := setupTestHandlers()
-
-			if tt.runID != "" {
-				mockRegressionDetector.On("GetRegressions", mock.Anything, tt.runID).Return(tt.mockRegressions, tt.mockError)
-			}
-
-			url := "/api/runs/" + tt.runID + "/regressions"
-			if tt.severityFilter != "" {
-				url += "?severity=" + tt.severityFilter
-			}
-
-			req := httptest.NewRequest("GET", url, nil)
-			req = mux.SetURLVars(req, map[string]string{"runId": tt.runID})
-			w := httptest.NewRecorder()
-
-			handlers.HandleGetRegressions(w, req)
-
-			assert.Equal(t, tt.expectedStatus, w.Code)
-
-			if tt.expectedStatus == http.StatusOK {
-				var response map[string]interface{}
-				err := json.Unmarshal(w.Body.Bytes(), &response)
-				require.NoError(t, err)
-
-				regressions, ok := response["regressions"].([]interface{})
-				require.True(t, ok)
-				assert.Len(t, regressions, tt.expectedCount)
-			}
-
-			mockRegressionDetector.AssertExpectations(t)
-		})
-	}
-}
-
-func TestHandleAcknowledgeRegression(t *testing.T) {
-	tests := []struct {
-		name           string
-		regressionID   string
-		requestBody    interface{}
-		mockError      error
-		expectedStatus int
-	}{
-		{
-			name:         "successful acknowledgment",
-			regressionID: "regression-1",
-			requestBody: AcknowledgeRegressionRequest{
-				AcknowledgedBy: "user@example.com",
-				Notes:          "Acknowledged - investigating fix",
-			},
-			mockError:      nil,
-			expectedStatus: http.StatusOK,
-		},
-		{
-			name:           "missing regression ID",
-			regressionID:   "",
-			requestBody:    AcknowledgeRegressionRequest{AcknowledgedBy: "user@example.com"},
-			mockError:      nil,
-			expectedStatus: http.StatusBadRequest,
-		},
-		{
-			name:         "missing acknowledged by",
-			regressionID: "regression-1",
-			requestBody: AcknowledgeRegressionRequest{
-				Notes: "Investigating",
-			},
-			mockError:      nil,
-			expectedStatus: http.StatusBadRequest,
-		},
-		{
-			name:         "regression not found",
-			regressionID: "nonexistent",
-			requestBody: AcknowledgeRegressionRequest{
-				AcknowledgedBy: "user@example.com",
-			},
-			mockError:      fmt.Errorf("regression not found"),
-			expectedStatus: http.StatusNotFound,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			handlers, _, _, _, mockRegressionDetector, _ := setupTestHandlers()
-
-			var body []byte
-			if req, ok := tt.requestBody.(AcknowledgeRegressionRequest); ok {
-				body, _ = json.Marshal(req)
-				if tt.regressionID != "" && req.AcknowledgedBy != "" {
-					mockRegressionDetector.On("AcknowledgeRegression", mock.Anything, tt.regressionID, req.AcknowledgedBy).Return(tt.mockError)
-				}
-			}
-
-			req := httptest.NewRequest("POST", "/api/regressions/"+tt.regressionID+"/acknowledge", bytes.NewReader(body))
-			req = mux.SetURLVars(req, map[string]string{"regressionId": tt.regressionID})
-			req.Header.Set("Content-Type", "application/json")
-			w := httptest.NewRecorder()
-
-			handlers.HandleAcknowledgeRegression(w, req)
-
-			assert.Equal(t, tt.expectedStatus, w.Code)
-
-			if tt.expectedStatus == http.StatusOK {
-				var response map[string]interface{}
-				err := json.Unmarshal(w.Body.Bytes(), &response)
-				require.NoError(t, err)
-				assert.Equal(t, "success", response["status"])
-			}
-
-			mockRegressionDetector.AssertExpectations(t)
-		})
-	}
-}
-
-// Test Health and Status Handlers
-
-func TestHandleHealth(t *testing.T) {
-	tests := []struct {
-		name           string
-		dbPingError    error
-		storageError   error
-		expectedStatus int
-		expectedHealth string
-	}{
-		{
-			name:           "healthy system",
-			dbPingError:    nil,
-			storageError:   nil,
-			expectedStatus: http.StatusOK,
-			expectedHealth: "healthy",
-		},
-		{
-			name:           "unhealthy database",
-			dbPingError:    fmt.Errorf("db connection failed"),
-			storageError:   nil,
-			expectedStatus: http.StatusServiceUnavailable,
-			expectedHealth: "unhealthy",
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			handlers, mockStorage, _, _, _, mockDB := setupTestHandlers()
-
-			mockDB.On("Ping").Return(tt.dbPingError)
-			if tt.dbPingError == nil {
-				mockStorage.On("ListHistoricRuns", mock.Anything, "", 1).Return([]*types.HistoricRun{}, tt.storageError)
-			}
-
-			req := httptest.NewRequest("GET", "/health", nil)
-			w := httptest.NewRecorder()
-
-			handlers.HandleHealth(w, req)
-
-			assert.Equal(t, tt.expectedStatus, w.Code)
-
-			var response map[string]interface{}
-			err := json.Unmarshal(w.Body.Bytes(), &response)
-			require.NoError(t, err)
-			assert.Contains(t, response["status"], tt.expectedHealth)
-
-			mockDB.AssertExpectations(t)
-			if tt.dbPingError == nil {
-				mockStorage.AssertExpectations(t)
-			}
-		})
-	}
-}
-
-// Test JSON utility methods
-
-func TestWriteJSONResponse(t *testing.T) {
-	handlers, _, _, _, _, _ := setupTestHandlers()
-
-	testData := map[string]interface{}{
-		"test": "data",
-		"num":  42,
-	}
-
-	w := httptest.NewRecorder()
-	handlers.writeJSONResponse(w, http.StatusOK, testData)
-
-	assert.Equal(t, http.StatusOK, w.Code)
-	assert.Equal(t, "application/json", w.Header().Get("Content-Type"))
-
-	var response map[string]interface{}
-	err := json.Unmarshal(w.Body.Bytes(), &response)
-	require.NoError(t, err)
-	assert.Equal(t, "data", response["test"])
-	assert.Equal(t, float64(42), response["num"])
-}
-
-func TestWriteErrorResponse(t *testing.T) {
-	handlers, _, _, _, _, _ := setupTestHandlers()
-
-	w := httptest.NewRecorder()
-	handlers.writeErrorResponse(w, http.StatusBadRequest, "Test error message")
-
-	assert.Equal(t, http.StatusBadRequest, w.Code)
-	assert.Equal(t, "application/json", w.Header().Get("Content-Type"))
-
-	var response map[string]interface{}
-	err := json.Unmarshal(w.Body.Bytes(), &response)
-	require.NoError(t, err)
-	assert.Equal(t, true, response["error"])
-	assert.Equal(t, "Test error message", response["message"])
-	assert.Equal(t, float64(400), response["status"])
-	assert.Contains(t, response, "timestamp")
-}
-
-// Benchmark tests
-
-func BenchmarkHandleListRuns(b *testing.B) {
-	handlers, mockStorage, _, _, _, _ := setupTestHandlers()
-
-	mockRuns := []*types.HistoricRun{
-		createMockHistoricRun("run1"),
-		createMockHistoricRun("run2"),
-	}
-	mockStorage.On("ListHistoricRuns", mock.Anything, mock.AnythingOfType("string"), mock.AnythingOfType("int")).Return(mockRuns, nil)
-
-	req := httptest.NewRequest("GET", "/api/runs", nil)
-
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		w := httptest.NewRecorder()
-		handlers.HandleListRuns(w, req)
-	}
-}
-
-func BenchmarkHandleGetRun(b *testing.B) {
-	handlers, mockStorage, _, _, _, _ := setupTestHandlers()
-
-	mockRun := createMockHistoricRun("test-run-1")
-	mockStorage.On("GetHistoricRun", mock.Anything, "test-run-1").Return(mockRun, nil)
-
-	req := httptest.NewRequest("GET", "/api/runs/test-run-1", nil)
-	req = mux.SetURLVars(req, map[string]string{"runId": "test-run-1"})
-
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		w := httptest.NewRecorder()
-		handlers.HandleGetRun(w, req)
-	}
-}
-
-// Integration tests
-
-func TestHandlerIntegration(t *testing.T) {
-	handlers, mockStorage, mockBaselineManager, mockTrendAnalyzer, mockRegressionDetector, mockDB := setupTestHandlers()
-
-	// Test lifecycle
-	ctx := context.Background()
-	err := handlers.Start(ctx)
-	require.NoError(t, err)
-
-	// Test typical workflow: list runs, get specific run, create baseline
-	mockRuns := []*types.HistoricRun{createMockHistoricRun("run1")}
-	mockStorage.On("ListHistoricRuns", mock.Anything, "", 50).Return(mockRuns, nil)
-
-	mockRun := createMockHistoricRun("run1")
-	mockStorage.On("GetHistoricRun", mock.Anything, "run1").Return(mockRun, nil)
-
-	mockBaseline := &types.Baseline{Name: "test-baseline", RunID: "run1"}
-	mockBaselineManager.On("SetBaseline", mock.Anything, "run1", "test-baseline", "Test baseline").Return(mockBaseline, nil)
-
-	// Test sequence
-	// 1. List runs
-	req1 := httptest.NewRequest("GET", "/api/runs", nil)
-	w1 := httptest.NewRecorder()
-	handlers.HandleListRuns(w1, req1)
-	assert.Equal(t, http.StatusOK, w1.Code)
-
-	// 2. Get specific run
-	req2 := httptest.NewRequest("GET", "/api/runs/run1", nil)
-	req2 = mux.SetURLVars(req2, map[string]string{"runId": "run1"})
-	w2 := httptest.NewRecorder()
-	handlers.HandleGetRun(w2, req2)
-	assert.Equal(t, http.StatusOK, w2.Code)
-
-	// 3. Create baseline from run
-	baselineReq := CreateBaselineRequest{
-		RunID:       "run1",
-		Name:        "test-baseline",
-		Description: "Test baseline",
-	}
-	body, _ := json.Marshal(baselineReq)
-	req3 := httptest.NewRequest("POST", "/api/baselines", bytes.NewReader(body))
-	req3.Header.Set("Content-Type", "application/json")
-	w3 := httptest.NewRecorder()
-	handlers.HandleCreateBaseline(w3, req3)
-	assert.Equal(t, http.StatusCreated, w3.Code)
-
-	// Test cleanup
-	err = handlers.Stop()
-	require.NoError(t, err)
-
-	mockStorage.AssertExpectations(t)
-	mockBaselineManager.AssertExpectations(t)
-}
-
-func TestErrorHandling(t *testing.T) {
-	handlers, _, _, _, _, _ := setupTestHandlers()
-
-	// Test handling of malformed JSON
-	req := httptest.NewRequest("POST", "/api/baselines", strings.NewReader("invalid json"))
-	req.Header.Set("Content-Type", "application/json")
-	w := httptest.NewRecorder()
-
-	handlers.HandleCreateBaseline(w, req)
-	assert.Equal(t, http.StatusBadRequest, w.Code)
-
-	// Test handling of missing content type
-	req = httptest.NewRequest("POST", "/api/baselines", strings.NewReader("{}"))
-	w = httptest.NewRecorder()
-
-	handlers.HandleCreateBaseline(w, req)
-	assert.Equal(t, http.StatusBadRequest, w.Code)
-}
diff --git a/runner/api/inputvalidation.go b/runner/api/inputvalidation.go
index a7062b7..083a3d7 100644
--- a/runner/api/inputvalidation.go
+++ b/runner/api/inputvalidation.go
@@ -3,14 +3,10 @@ package api
 import (
 	"fmt"
 	"regexp"
-	"strings"
 )
 
-const maxLogValueLen = 512
-
 var (
-	idRegex       = regexp.MustCompile(`^[A-Za-z0-9._-]{1,128}$`)
-	ctrlCharRegex = regexp.MustCompile(`[\x00-\x1f\x7f]`)
+	idRegex = regexp.MustCompile(`^[A-Za-z0-9._-]{1,128}$`)
 
 	validComparisonModes = map[string]struct{}{
 		"sequential":      {},
@@ -62,14 +58,3 @@ func ValidateSeverity(s string) error {
 	return nil
 }
 
-// SanitizeLogValue scrubs ASCII control characters (CR, LF, NUL, etc.) and
-// truncates to maxLogValueLen. Use it for fields that legitimately accept
-// free-form user input but still get logged (request paths, user agents,
-// Grafana query targets, acknowledger names, etc.).
-func SanitizeLogValue(s string) string {
-	s = ctrlCharRegex.ReplaceAllString(s, "")
-	if len(s) > maxLogValueLen {
-		s = s[:maxLogValueLen] + "..."
-	}
-	return strings.TrimSpace(s)
-}
diff --git a/runner/api/inputvalidation_test.go b/runner/api/inputvalidation_test.go
index 66b8831..db170e9 100644
--- a/runner/api/inputvalidation_test.go
+++ b/runner/api/inputvalidation_test.go
@@ -79,37 +79,3 @@ func TestValidateSeverity(t *testing.T) {
 	}
 }
 
-func TestSanitizeLogValue(t *testing.T) {
-	t.Run("strips CR LF NUL TAB", func(t *testing.T) {
-		got := SanitizeLogValue("hello\r\nworld\x00x\ty")
-		want := "helloworldxy"
-		if got != want {
-			t.Errorf("SanitizeLogValue = %q, want %q", got, want)
-		}
-	})
-
-	t.Run("truncates long input", func(t *testing.T) {
-		input := strings.Repeat("a", maxLogValueLen+50)
-		got := SanitizeLogValue(input)
-		if len(got) != maxLogValueLen+3 { // truncated body + "..."
-			t.Errorf("unexpected length: %d", len(got))
-		}
-		if !strings.HasSuffix(got, "...") {
-			t.Errorf("expected truncation suffix, got %q", got[len(got)-5:])
-		}
-	})
-
-	t.Run("trims surrounding whitespace", func(t *testing.T) {
-		got := SanitizeLogValue("  padded  ")
-		if got != "padded" {
-			t.Errorf("SanitizeLogValue = %q, want %q", got, "padded")
-		}
-	})
-
-	t.Run("passes safe input through", func(t *testing.T) {
-		got := SanitizeLogValue("legit-id_v2.0")
-		if got != "legit-id_v2.0" {
-			t.Errorf("SanitizeLogValue = %q, want passthrough", got)
-		}
-	})
-}
diff --git a/runner/api/integration_test.go b/runner/api/integration_test.go
deleted file mode 100644
index 3b176cb..0000000
--- a/runner/api/integration_test.go
+++ /dev/null
@@ -1,945 +0,0 @@
-package api
-
-import (
-	"bytes"
-	"context"
-	"database/sql"
-	"encoding/json"
-	"fmt"
-	"net/http"
-	"net/http/httptest"
-	"sync"
-	"testing"
-	"time"
-
-	"github.com/gorilla/websocket"
-	"github.com/sirupsen/logrus"
-	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/require"
-	"github.com/stretchr/testify/suite"
-	"github.com/testcontainers/testcontainers-go"
-	"github.com/testcontainers/testcontainers-go/modules/postgres"
-	"github.com/testcontainers/testcontainers-go/wait"
-
-	"github.com/jsonrpc-bench/runner/analysis"
-	"github.com/jsonrpc-bench/runner/storage"
-	"github.com/jsonrpc-bench/runner/types"
-)
-
-// APIIntegrationTestSuite provides comprehensive API integration tests
-type APIIntegrationTestSuite struct {
-	suite.Suite
-	container          *postgres.PostgresContainer
-	db                 *sql.DB
-	server             Server
-	testServer         *httptest.Server
-	ctx                context.Context
-	logger             logrus.FieldLogger
-	historicStorage    storage.HistoricStorage
-	baselineManager    analysis.BaselineManager
-	trendAnalyzer      analysis.TrendAnalyzer
-	regressionDetector analysis.RegressionDetector
-}
-
-// SetupSuite initializes the integration test environment
-func (suite *APIIntegrationTestSuite) SetupSuite() {
-	suite.ctx = context.Background()
-	suite.logger = logrus.New().WithField("test", "api_integration")
-
-	// Start PostgreSQL container
-	pgContainer, err := postgres.RunContainer(suite.ctx,
-		testcontainers.WithImage("postgres:15-alpine"),
-		postgres.WithDatabase("api_test_db"),
-		postgres.WithUsername("api_test_user"),
-		postgres.WithPassword("api_test_pass"),
-		testcontainers.WithWaitStrategy(
-			wait.ForLog("database system is ready to accept connections").
-				WithOccurrence(2).
-				WithStartupTimeout(30*time.Second)),
-	)
-	require.NoError(suite.T(), err)
-	suite.container = pgContainer
-
-	// Setup database connection
-	mappedPort, err := pgContainer.MappedPort(suite.ctx, "5432")
-	require.NoError(suite.T(), err)
-
-	connStr := fmt.Sprintf("host=localhost port=%d user=api_test_user password=api_test_pass dbname=api_test_db sslmode=disable",
-		mappedPort.Int())
-	db, err := sql.Open("postgres", connStr)
-	require.NoError(suite.T(), err)
-	suite.db = db
-
-	// Initialize storage components
-	migration := storage.NewMigrationService(db, suite.logger)
-	err = migration.Up()
-	require.NoError(suite.T(), err)
-
-	// Create storage instances
-	suite.historicStorage = storage.NewHistoricStorage(db, "results/test", suite.logger)
-	suite.baselineManager = analysis.NewBaselineManager(suite.historicStorage, db, suite.logger)
-	suite.trendAnalyzer = analysis.NewTrendAnalyzer(suite.historicStorage, db, suite.logger)
-	suite.regressionDetector = analysis.NewRegressionDetector(suite.historicStorage, suite.baselineManager, db, suite.logger)
-
-	// Start services
-	err = suite.historicStorage.Start(suite.ctx)
-	require.NoError(suite.T(), err)
-	err = suite.baselineManager.Start(suite.ctx)
-	require.NoError(suite.T(), err)
-	err = suite.trendAnalyzer.Start(suite.ctx)
-	require.NoError(suite.T(), err)
-	err = suite.regressionDetector.Start(suite.ctx)
-	require.NoError(suite.T(), err)
-}
-
-// TearDownSuite cleans up test resources
-func (suite *APIIntegrationTestSuite) TearDownSuite() {
-	if suite.testServer != nil {
-		suite.testServer.Close()
-	}
-	if suite.server != nil {
-		suite.server.Stop()
-	}
-	if suite.regressionDetector != nil {
-		suite.regressionDetector.Stop()
-	}
-	if suite.trendAnalyzer != nil {
-		suite.trendAnalyzer.Stop()
-	}
-	if suite.baselineManager != nil {
-		suite.baselineManager.Stop()
-	}
-	if suite.historicStorage != nil {
-		suite.historicStorage.Stop()
-	}
-	if suite.db != nil {
-		suite.db.Close()
-	}
-	if suite.container != nil {
-		suite.container.Terminate(suite.ctx)
-	}
-}
-
-// SetupTest prepares clean state for each test
-func (suite *APIIntegrationTestSuite) SetupTest() {
-	// Create and start API server
-	suite.server = NewServer(
-		suite.historicStorage,
-		suite.baselineManager,
-		suite.trendAnalyzer,
-		suite.regressionDetector,
-		suite.db,
-		suite.logger,
-	)
-
-	// Create test server
-	router := suite.server.(*server).setupRoutes()
-	suite.testServer = httptest.NewServer(router)
-}
-
-// TearDownTest cleans up after each test
-func (suite *APIIntegrationTestSuite) TearDownTest() {
-	if suite.testServer != nil {
-		suite.testServer.Close()
-		suite.testServer = nil
-	}
-
-	// Clean up test data
-	_, err := suite.db.Exec("DELETE FROM benchmark_runs WHERE test_name LIKE 'integration_%'")
-	if err != nil {
-		suite.logger.WithError(err).Warn("Failed to clean up test data")
-	}
-}
-
-// TestAPIHealthEndpoint tests the health check endpoint
-func (suite *APIIntegrationTestSuite) TestAPIHealthEndpoint() {
-	t := suite.T()
-
-	resp, err := http.Get(suite.testServer.URL + "/health")
-	require.NoError(t, err)
-	defer resp.Body.Close()
-
-	assert.Equal(t, http.StatusOK, resp.StatusCode)
-	assert.Equal(t, "application/json", resp.Header.Get("Content-Type"))
-
-	var health map[string]interface{}
-	err = json.NewDecoder(resp.Body).Decode(&health)
-	assert.NoError(t, err)
-	assert.Equal(t, "ok", health["status"])
-	assert.NotNil(t, health["timestamp"])
-}
-
-// TestAPIRunLifecycle tests the complete run lifecycle through API
-func (suite *APIIntegrationTestSuite) TestAPIRunLifecycle() {
-	t := suite.T()
-
-	// Create test benchmark result
-	benchmarkResult := &types.BenchmarkResult{
-		TestName:  "integration_lifecycle_test",
-		StartTime: time.Now().Add(-10 * time.Minute),
-		EndTime:   time.Now(),
-		Duration:  10 * time.Minute,
-		ClientMetrics: map[string]*types.ClientMetrics{
-			"geth": {
-				Name:          "geth",
-				TotalRequests: 1000,
-				TotalErrors:   5,
-				ErrorRate:     0.005,
-				Latency: types.LatencyMetrics{
-					Avg:        150.0,
-					P50:        120.0,
-					P95:        300.0,
-					P99:        500.0,
-					Max:        1000.0,
-					Throughput: 100.0,
-				},
-			},
-		},
-	}
-
-	// 1. Save the run through storage (simulating benchmark completion)
-	savedRun, err := suite.historicStorage.SaveHistoricRun(suite.ctx, benchmarkResult)
-	require.NoError(t, err)
-	assert.NotEmpty(t, savedRun.ID)
-
-	// 2. Get the run via API
-	resp, err := http.Get(suite.testServer.URL + "/api/v1/runs/" + savedRun.ID)
-	require.NoError(t, err)
-	defer resp.Body.Close()
-
-	assert.Equal(t, http.StatusOK, resp.StatusCode)
-
-	var apiRun types.HistoricRun
-	err = json.NewDecoder(resp.Body).Decode(&apiRun)
-	assert.NoError(t, err)
-	assert.Equal(t, savedRun.ID, apiRun.ID)
-	assert.Equal(t, savedRun.TestName, apiRun.TestName)
-
-	// 3. List runs via API
-	resp, err = http.Get(suite.testServer.URL + "/api/v1/runs?test_name=" + benchmarkResult.TestName)
-	require.NoError(t, err)
-	defer resp.Body.Close()
-
-	assert.Equal(t, http.StatusOK, resp.StatusCode)
-
-	var runs []*types.HistoricRun
-	err = json.NewDecoder(resp.Body).Decode(&runs)
-	assert.NoError(t, err)
-	assert.Len(t, runs, 1)
-	assert.Equal(t, savedRun.ID, runs[0].ID)
-
-	// 4. Delete the run via API
-	req, err := http.NewRequest("DELETE", suite.testServer.URL+"/api/v1/runs/"+savedRun.ID, nil)
-	require.NoError(t, err)
-
-	client := &http.Client{}
-	resp, err = client.Do(req)
-	require.NoError(t, err)
-	defer resp.Body.Close()
-
-	assert.Equal(t, http.StatusOK, resp.StatusCode)
-
-	// 5. Verify run is deleted
-	resp, err = http.Get(suite.testServer.URL + "/api/v1/runs/" + savedRun.ID)
-	require.NoError(t, err)
-	defer resp.Body.Close()
-
-	assert.Equal(t, http.StatusNotFound, resp.StatusCode)
-}
-
-// TestAPIBaselineWorkflow tests baseline management through API
-func (suite *APIIntegrationTestSuite) TestAPIBaselineWorkflow() {
-	t := suite.T()
-
-	// Create and save a test run
-	benchmarkResult := &types.BenchmarkResult{
-		TestName:  "integration_baseline_test",
-		StartTime: time.Now().Add(-10 * time.Minute),
-		EndTime:   time.Now(),
-		Duration:  10 * time.Minute,
-		ClientMetrics: map[string]*types.ClientMetrics{
-			"geth": {
-				Name:          "geth",
-				TotalRequests: 1000,
-				TotalErrors:   10,
-				ErrorRate:     0.01,
-				Latency: types.LatencyMetrics{
-					Avg:        150.0,
-					P95:        300.0,
-					P99:        500.0,
-					Throughput: 100.0,
-				},
-			},
-		},
-	}
-
-	savedRun, err := suite.historicStorage.SaveHistoricRun(suite.ctx, benchmarkResult)
-	require.NoError(t, err)
-
-	// 1. Create baseline via API
-	baselineData := map[string]interface{}{
-		"run_id":      savedRun.ID,
-		"name":        "integration_test_baseline",
-		"description": "Baseline for integration testing",
-	}
-
-	jsonData, err := json.Marshal(baselineData)
-	require.NoError(t, err)
-
-	resp, err := http.Post(
-		suite.testServer.URL+"/api/v1/baselines",
-		"application/json",
-		bytes.NewBuffer(jsonData),
-	)
-	require.NoError(t, err)
-	defer resp.Body.Close()
-
-	assert.Equal(t, http.StatusCreated, resp.StatusCode)
-
-	var baseline analysis.Baseline
-	err = json.NewDecoder(resp.Body).Decode(&baseline)
-	assert.NoError(t, err)
-	assert.Equal(t, "integration_test_baseline", baseline.Name)
-	assert.Equal(t, savedRun.ID, baseline.RunID)
-
-	// 2. Get baseline via API
-	resp, err = http.Get(suite.testServer.URL + "/api/v1/baselines/" + baseline.Name)
-	require.NoError(t, err)
-	defer resp.Body.Close()
-
-	assert.Equal(t, http.StatusOK, resp.StatusCode)
-
-	var retrievedBaseline analysis.Baseline
-	err = json.NewDecoder(resp.Body).Decode(&retrievedBaseline)
-	assert.NoError(t, err)
-	assert.Equal(t, baseline.Name, retrievedBaseline.Name)
-
-	// 3. List baselines via API
-	resp, err = http.Get(suite.testServer.URL + "/api/v1/baselines?test_name=" + benchmarkResult.TestName)
-	require.NoError(t, err)
-	defer resp.Body.Close()
-
-	assert.Equal(t, http.StatusOK, resp.StatusCode)
-
-	var baselines []*analysis.Baseline
-	err = json.NewDecoder(resp.Body).Decode(&baselines)
-	assert.NoError(t, err)
-	assert.Len(t, baselines, 1)
-	assert.Equal(t, baseline.Name, baselines[0].Name)
-
-	// 4. Compare to baseline via API
-	secondRun, err := suite.historicStorage.SaveHistoricRun(suite.ctx, benchmarkResult)
-	require.NoError(t, err)
-
-	resp, err = http.Get(fmt.Sprintf("%s/api/v1/baselines/%s/compare/%s",
-		suite.testServer.URL, baseline.Name, secondRun.ID))
-	require.NoError(t, err)
-	defer resp.Body.Close()
-
-	assert.Equal(t, http.StatusOK, resp.StatusCode)
-
-	var comparison analysis.BaselineComparison
-	err = json.NewDecoder(resp.Body).Decode(&comparison)
-	assert.NoError(t, err)
-	assert.Equal(t, secondRun.ID, comparison.RunID)
-	assert.Equal(t, baseline.Name, comparison.BaselineName)
-
-	// 5. Delete baseline via API
-	req, err := http.NewRequest("DELETE", suite.testServer.URL+"/api/v1/baselines/"+baseline.Name, nil)
-	require.NoError(t, err)
-
-	client := &http.Client{}
-	resp, err = client.Do(req)
-	require.NoError(t, err)
-	defer resp.Body.Close()
-
-	assert.Equal(t, http.StatusOK, resp.StatusCode)
-}
-
-// TestAPITrendAnalysis tests trend analysis through API
-func (suite *APIIntegrationTestSuite) TestAPITrendAnalysis() {
-	t := suite.T()
-
-	testName := "integration_trend_test"
-
-	// Create multiple runs with trending data
-	for i := 0; i < 5; i++ {
-		benchmarkResult := &types.BenchmarkResult{
-			TestName:  testName,
-			StartTime: time.Now().Add(time.Duration(-i*60) * time.Minute),
-			EndTime:   time.Now().Add(time.Duration(-i*60+10) * time.Minute),
-			Duration:  10 * time.Minute,
-			ClientMetrics: map[string]*types.ClientMetrics{
-				"geth": {
-					Name:          "geth",
-					TotalRequests: 1000,
-					TotalErrors:   10 + i, // Slight increase over time
-					ErrorRate:     float64(10+i) / 1000.0,
-					Latency: types.LatencyMetrics{
-						Avg:        150.0 + float64(i)*5.0, // Slight degradation
-						P95:        300.0 + float64(i)*10.0,
-						P99:        500.0 + float64(i)*15.0,
-						Throughput: 100.0 - float64(i)*1.0, // Slight decrease
-					},
-				},
-			},
-		}
-
-		_, err := suite.historicStorage.SaveHistoricRun(suite.ctx, benchmarkResult)
-		require.NoError(t, err)
-	}
-
-	// Wait for data to be processed
-	time.Sleep(100 * time.Millisecond)
-
-	// 1. Get trends via API
-	resp, err := http.Get(fmt.Sprintf("%s/api/v1/trends?test_name=%s&days=1",
-		suite.testServer.URL, testName))
-	require.NoError(t, err)
-	defer resp.Body.Close()
-
-	assert.Equal(t, http.StatusOK, resp.StatusCode)
-
-	var trends types.TrendAnalysis
-	err = json.NewDecoder(resp.Body).Decode(&trends)
-	assert.NoError(t, err)
-	assert.Equal(t, testName, trends.TestName)
-	assert.NotEmpty(t, trends.Trends)
-
-	// 2. Get method trends via API
-	resp, err = http.Get(fmt.Sprintf("%s/api/v1/trends/methods?test_name=%s&method=eth_getBalance&days=1",
-		suite.testServer.URL, testName))
-	require.NoError(t, err)
-	defer resp.Body.Close()
-
-	assert.Equal(t, http.StatusOK, resp.StatusCode)
-
-	// 3. Get client trends via API
-	resp, err = http.Get(fmt.Sprintf("%s/api/v1/trends/clients?test_name=%s&client=geth&days=1",
-		suite.testServer.URL, testName))
-	require.NoError(t, err)
-	defer resp.Body.Close()
-
-	assert.Equal(t, http.StatusOK, resp.StatusCode)
-}
-
-// TestAPIRegressionDetection tests regression detection through API
-func (suite *APIIntegrationTestSuite) TestAPIRegressionDetection() {
-	t := suite.T()
-
-	testName := "integration_regression_test"
-
-	// Create baseline run
-	baselineResult := &types.BenchmarkResult{
-		TestName:  testName,
-		StartTime: time.Now().Add(-20 * time.Minute),
-		EndTime:   time.Now().Add(-10 * time.Minute),
-		Duration:  10 * time.Minute,
-		ClientMetrics: map[string]*types.ClientMetrics{
-			"geth": {
-				Name:          "geth",
-				TotalRequests: 1000,
-				TotalErrors:   5,
-				ErrorRate:     0.005,
-				Latency: types.LatencyMetrics{
-					Avg:        150.0,
-					P95:        300.0,
-					P99:        500.0,
-					Throughput: 100.0,
-				},
-			},
-		},
-	}
-
-	baselineRun, err := suite.historicStorage.SaveHistoricRun(suite.ctx, baselineResult)
-	require.NoError(t, err)
-
-	// Create regressed run
-	regressedResult := &types.BenchmarkResult{
-		TestName:  testName,
-		StartTime: time.Now().Add(-10 * time.Minute),
-		EndTime:   time.Now(),
-		Duration:  10 * time.Minute,
-		ClientMetrics: map[string]*types.ClientMetrics{
-			"geth": {
-				Name:          "geth",
-				TotalRequests: 1000,
-				TotalErrors:   50, // 10x increase
-				ErrorRate:     0.05,
-				Latency: types.LatencyMetrics{
-					Avg:        250.0, // Significant increase
-					P95:        500.0,
-					P99:        800.0,
-					Throughput: 80.0, // Decrease
-				},
-			},
-		},
-	}
-
-	regressedRun, err := suite.historicStorage.SaveHistoricRun(suite.ctx, regressedResult)
-	require.NoError(t, err)
-
-	// 1. Detect regressions via API
-	detectionOptions := map[string]interface{}{
-		"comparison_mode": "sequential",
-		"lookback_count":  1,
-	}
-
-	jsonData, err := json.Marshal(detectionOptions)
-	require.NoError(t, err)
-
-	resp, err := http.Post(
-		suite.testServer.URL+"/api/v1/regressions/detect/"+regressedRun.ID,
-		"application/json",
-		bytes.NewBuffer(jsonData),
-	)
-	require.NoError(t, err)
-	defer resp.Body.Close()
-
-	assert.Equal(t, http.StatusOK, resp.StatusCode)
-
-	var regressionReport types.RegressionReport
-	err = json.NewDecoder(resp.Body).Decode(&regressionReport)
-	assert.NoError(t, err)
-	assert.Equal(t, regressedRun.ID, regressionReport.RunID)
-	assert.NotEmpty(t, regressionReport.Regressions)
-
-	// 2. Get regressions for run via API
-	resp, err = http.Get(suite.testServer.URL + "/api/v1/regressions/" + regressedRun.ID)
-	require.NoError(t, err)
-	defer resp.Body.Close()
-
-	assert.Equal(t, http.StatusOK, resp.StatusCode)
-
-	var regressions []*types.Regression
-	err = json.NewDecoder(resp.Body).Decode(&regressions)
-	assert.NoError(t, err)
-	assert.NotEmpty(t, regressions)
-
-	// 3. Analyze run via API
-	resp, err = http.Get(suite.testServer.URL + "/api/v1/runs/" + regressedRun.ID + "/analyze")
-	require.NoError(t, err)
-	defer resp.Body.Close()
-
-	assert.Equal(t, http.StatusOK, resp.StatusCode)
-
-	var analysis types.RunAnalysis
-	err = json.NewDecoder(resp.Body).Decode(&analysis)
-	assert.NoError(t, err)
-	assert.Equal(t, regressedRun.ID, analysis.RunID)
-}
-
-// TestAPIErrorHandling tests API error handling scenarios
-func (suite *APIIntegrationTestSuite) TestAPIErrorHandling() {
-	t := suite.T()
-
-	// Test 404 for non-existent run
-	resp, err := http.Get(suite.testServer.URL + "/api/v1/runs/non-existent-run")
-	require.NoError(t, err)
-	defer resp.Body.Close()
-
-	assert.Equal(t, http.StatusNotFound, resp.StatusCode)
-
-	// Test 400 for invalid JSON in POST request
-	invalidJSON := bytes.NewBuffer([]byte("{invalid json"))
-	resp, err = http.Post(
-		suite.testServer.URL+"/api/v1/baselines",
-		"application/json",
-		invalidJSON,
-	)
-	require.NoError(t, err)
-	defer resp.Body.Close()
-
-	assert.Equal(t, http.StatusBadRequest, resp.StatusCode)
-
-	// Test 405 for method not allowed
-	req, err := http.NewRequest("POST", suite.testServer.URL+"/api/v1/runs/some-id", nil)
-	require.NoError(t, err)
-
-	client := &http.Client{}
-	resp, err = client.Do(req)
-	require.NoError(t, err)
-	defer resp.Body.Close()
-
-	assert.Equal(t, http.StatusMethodNotAllowed, resp.StatusCode)
-}
-
-// TestAPIConcurrentRequests tests concurrent API requests
-func (suite *APIIntegrationTestSuite) TestAPIConcurrentRequests() {
-	t := suite.T()
-
-	// Create test data
-	benchmarkResult := &types.BenchmarkResult{
-		TestName:  "integration_concurrent_test",
-		StartTime: time.Now().Add(-10 * time.Minute),
-		EndTime:   time.Now(),
-		Duration:  10 * time.Minute,
-		ClientMetrics: map[string]*types.ClientMetrics{
-			"geth": {
-				Name:          "geth",
-				TotalRequests: 1000,
-				TotalErrors:   10,
-				ErrorRate:     0.01,
-				Latency: types.LatencyMetrics{
-					Avg:        150.0,
-					P95:        300.0,
-					P99:        500.0,
-					Throughput: 100.0,
-				},
-			},
-		},
-	}
-
-	savedRun, err := suite.historicStorage.SaveHistoricRun(suite.ctx, benchmarkResult)
-	require.NoError(t, err)
-
-	// Make concurrent requests
-	const concurrency = 10
-	var wg sync.WaitGroup
-	results := make(chan int, concurrency)
-
-	for i := 0; i < concurrency; i++ {
-		wg.Add(1)
-		go func() {
-			defer wg.Done()
-
-			resp, err := http.Get(suite.testServer.URL + "/api/v1/runs/" + savedRun.ID)
-			if err != nil {
-				results <- 0
-				return
-			}
-			defer resp.Body.Close()
-
-			results <- resp.StatusCode
-		}()
-	}
-
-	wg.Wait()
-	close(results)
-
-	// Verify all requests succeeded
-	successCount := 0
-	for statusCode := range results {
-		if statusCode == http.StatusOK {
-			successCount++
-		}
-	}
-
-	assert.Equal(t, concurrency, successCount, "All concurrent requests should succeed")
-}
-
-// TestAPIWebSocketIntegration tests WebSocket functionality
-func (suite *APIIntegrationTestSuite) TestAPIWebSocketIntegration() {
-	t := suite.T()
-
-	// Convert HTTP URL to WebSocket URL
-	wsURL := "ws" + suite.testServer.URL[4:] + "/ws"
-
-	// Connect to WebSocket
-	conn, _, err := websocket.DefaultDialer.Dial(wsURL, nil)
-	require.NoError(t, err)
-	defer conn.Close()
-
-	// Set up message reception
-	messages := make(chan map[string]interface{}, 10)
-	go func() {
-		for {
-			var msg map[string]interface{}
-			err := conn.ReadJSON(&msg)
-			if err != nil {
-				return
-			}
-			messages <- msg
-		}
-	}()
-
-	// Trigger an event that should send WebSocket message
-	benchmarkResult := &types.BenchmarkResult{
-		TestName:  "integration_websocket_test",
-		StartTime: time.Now().Add(-10 * time.Minute),
-		EndTime:   time.Now(),
-		Duration:  10 * time.Minute,
-		ClientMetrics: map[string]*types.ClientMetrics{
-			"geth": {
-				Name:          "geth",
-				TotalRequests: 1000,
-				TotalErrors:   10,
-				ErrorRate:     0.01,
-				Latency: types.LatencyMetrics{
-					Avg:        150.0,
-					P95:        300.0,
-					P99:        500.0,
-					Throughput: 100.0,
-				},
-			},
-		},
-	}
-
-	_, err = suite.historicStorage.SaveHistoricRun(suite.ctx, benchmarkResult)
-	require.NoError(t, err)
-
-	// Wait for WebSocket message
-	select {
-	case msg := <-messages:
-		assert.NotNil(t, msg)
-		assert.Contains(t, []string{"run_saved", "update", "notification"}, msg["type"])
-	case <-time.After(2 * time.Second):
-		t.Log("No WebSocket message received within timeout - this might be expected if WebSocket broadcasting is not implemented")
-	}
-}
-
-// TestAPIGrafanaIntegration tests Grafana API endpoints
-func (suite *APIIntegrationTestSuite) TestAPIGrafanaIntegration() {
-	t := suite.T()
-
-	// Create test data
-	benchmarkResult := &types.BenchmarkResult{
-		TestName:  "integration_grafana_test",
-		StartTime: time.Now().Add(-10 * time.Minute),
-		EndTime:   time.Now(),
-		Duration:  10 * time.Minute,
-		ClientMetrics: map[string]*types.ClientMetrics{
-			"geth": {
-				Name:          "geth",
-				TotalRequests: 1000,
-				TotalErrors:   10,
-				ErrorRate:     0.01,
-				Latency: types.LatencyMetrics{
-					Avg:        150.0,
-					P95:        300.0,
-					P99:        500.0,
-					Throughput: 100.0,
-				},
-			},
-		},
-	}
-
-	_, err := suite.historicStorage.SaveHistoricRun(suite.ctx, benchmarkResult)
-	require.NoError(t, err)
-
-	// Test Grafana search endpoint
-	resp, err := http.Get(suite.testServer.URL + "/grafana/search")
-	require.NoError(t, err)
-	defer resp.Body.Close()
-
-	assert.Equal(t, http.StatusOK, resp.StatusCode)
-
-	var searchResults []map[string]interface{}
-	err = json.NewDecoder(resp.Body).Decode(&searchResults)
-	assert.NoError(t, err)
-	assert.NotEmpty(t, searchResults)
-
-	// Test Grafana query endpoint
-	queryData := map[string]interface{}{
-		"targets": []map[string]interface{}{
-			{
-				"target": "latency_avg",
-				"type":   "timeserie",
-			},
-		},
-		"range": map[string]interface{}{
-			"from": time.Now().Add(-1 * time.Hour).Format(time.RFC3339),
-			"to":   time.Now().Format(time.RFC3339),
-		},
-		"interval": "1m",
-	}
-
-	jsonData, err := json.Marshal(queryData)
-	require.NoError(t, err)
-
-	resp, err = http.Post(
-		suite.testServer.URL+"/grafana/query",
-		"application/json",
-		bytes.NewBuffer(jsonData),
-	)
-	require.NoError(t, err)
-	defer resp.Body.Close()
-
-	assert.Equal(t, http.StatusOK, resp.StatusCode)
-
-	var queryResults []map[string]interface{}
-	err = json.NewDecoder(resp.Body).Decode(&queryResults)
-	assert.NoError(t, err)
-	// Query results might be empty if no data matches the time range
-}
-
-// TestAPIPerformance tests API performance characteristics
-func (suite *APIIntegrationTestSuite) TestAPIPerformance() {
-	t := suite.T()
-
-	// Create test data
-	benchmarkResult := &types.BenchmarkResult{
-		TestName:  "integration_performance_test",
-		StartTime: time.Now().Add(-10 * time.Minute),
-		EndTime:   time.Now(),
-		Duration:  10 * time.Minute,
-		ClientMetrics: map[string]*types.ClientMetrics{
-			"geth": {
-				Name:          "geth",
-				TotalRequests: 1000,
-				TotalErrors:   10,
-				ErrorRate:     0.01,
-				Latency: types.LatencyMetrics{
-					Avg:        150.0,
-					P95:        300.0,
-					P99:        500.0,
-					Throughput: 100.0,
-				},
-			},
-		},
-	}
-
-	savedRun, err := suite.historicStorage.SaveHistoricRun(suite.ctx, benchmarkResult)
-	require.NoError(t, err)
-
-	// Test response times
-	endpoints := []string{
-		"/health",
-		"/api/v1/runs/" + savedRun.ID,
-		"/api/v1/runs?test_name=" + benchmarkResult.TestName,
-	}
-
-	for _, endpoint := range endpoints {
-		start := time.Now()
-		resp, err := http.Get(suite.testServer.URL + endpoint)
-		duration := time.Since(start)
-
-		require.NoError(t, err)
-		resp.Body.Close()
-
-		assert.Equal(t, http.StatusOK, resp.StatusCode)
-		assert.Less(t, duration, 1*time.Second, "Endpoint %s should respond within 1 second", endpoint)
-	}
-}
-
-// TestAPIDataConsistency tests data consistency across API endpoints
-func (suite *APIIntegrationTestSuite) TestAPIDataConsistency() {
-	t := suite.T()
-
-	testName := "integration_consistency_test"
-
-	// Create multiple runs
-	var savedRunIDs []string
-	for i := 0; i < 3; i++ {
-		benchmarkResult := &types.BenchmarkResult{
-			TestName:  testName,
-			StartTime: time.Now().Add(time.Duration(-i*60) * time.Minute),
-			EndTime:   time.Now().Add(time.Duration(-i*60+10) * time.Minute),
-			Duration:  10 * time.Minute,
-			ClientMetrics: map[string]*types.ClientMetrics{
-				"geth": {
-					Name:          "geth",
-					TotalRequests: 1000,
-					TotalErrors:   10 + i,
-					ErrorRate:     float64(10+i) / 1000.0,
-					Latency: types.LatencyMetrics{
-						Avg:        150.0 + float64(i)*5.0,
-						P95:        300.0 + float64(i)*10.0,
-						P99:        500.0 + float64(i)*15.0,
-						Throughput: 100.0 - float64(i)*1.0,
-					},
-				},
-			},
-		}
-
-		savedRun, err := suite.historicStorage.SaveHistoricRun(suite.ctx, benchmarkResult)
-		require.NoError(t, err)
-		savedRunIDs = append(savedRunIDs, savedRun.ID)
-	}
-
-	// Get runs through different endpoints and verify consistency
-	// 1. Get individual runs
-	var individualRuns []*types.HistoricRun
-	for _, runID := range savedRunIDs {
-		resp, err := http.Get(suite.testServer.URL + "/api/v1/runs/" + runID)
-		require.NoError(t, err)
-		defer resp.Body.Close()
-
-		var run types.HistoricRun
-		err = json.NewDecoder(resp.Body).Decode(&run)
-		require.NoError(t, err)
-		individualRuns = append(individualRuns, &run)
-	}
-
-	// 2. Get runs through list endpoint
-	resp, err := http.Get(suite.testServer.URL + "/api/v1/runs?test_name=" + testName)
-	require.NoError(t, err)
-	defer resp.Body.Close()
-
-	var listRuns []*types.HistoricRun
-	err = json.NewDecoder(resp.Body).Decode(&listRuns)
-	require.NoError(t, err)
-
-	// 3. Verify consistency
-	assert.Len(t, listRuns, len(individualRuns))
-
-	// Create maps for easy lookup
-	individualMap := make(map[string]*types.HistoricRun)
-	listMap := make(map[string]*types.HistoricRun)
-
-	for _, run := range individualRuns {
-		individualMap[run.ID] = run
-	}
-	for _, run := range listRuns {
-		listMap[run.ID] = run
-	}
-
-	// Verify all runs are present in both results
-	for runID := range individualMap {
-		assert.Contains(t, listMap, runID, "Run %s should be present in list results", runID)
-
-		if listRun, exists := listMap[runID]; exists {
-			individualRun := individualMap[runID]
-			assert.Equal(t, individualRun.TestName, listRun.TestName)
-			assert.Equal(t, individualRun.AvgLatencyMs, listRun.AvgLatencyMs)
-			assert.Equal(t, individualRun.OverallErrorRate, listRun.OverallErrorRate)
-		}
-	}
-}
-
-// Run the test suite
-func TestAPIIntegrationTestSuite(t *testing.T) {
-	suite.Run(t, new(APIIntegrationTestSuite))
-}
-
-// Benchmark tests for API performance
-
-func BenchmarkAPIHealthEndpoint(b *testing.B) {
-	// Setup test server (simplified for benchmarking)
-	handlers, _, _, _, _, _ := setupTestHandlers()
-	router := mux.NewRouter()
-	router.HandleFunc("/health", handlers.handleHealth).Methods("GET")
-	server := httptest.NewServer(router)
-	defer server.Close()
-
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		resp, err := http.Get(server.URL + "/health")
-		if err != nil {
-			b.Fatal(err)
-		}
-		resp.Body.Close()
-	}
-}
-
-func BenchmarkAPIGetRun(b *testing.B) {
-	// Setup test server with mock data
-	handlers, mockStorage, _, _, _, _ := setupTestHandlers()
-	router := mux.NewRouter()
-	router.HandleFunc("/api/v1/runs/{id}", handlers.handleGetRun).Methods("GET")
-	server := httptest.NewServer(router)
-	defer server.Close()
-
-	// Setup mock
-	mockRun := createMockHistoricRun("bench-run-id")
-	mockStorage.On("GetHistoricRun", mock.Anything, "bench-run-id").Return(mockRun, nil)
-
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		resp, err := http.Get(server.URL + "/api/v1/runs/bench-run-id")
-		if err != nil {
-			b.Fatal(err)
-		}
-		resp.Body.Close()
-	}
-}
diff --git a/runner/api/server.go b/runner/api/server.go
index 5b3eddf..486d45e 100644
--- a/runner/api/server.go
+++ b/runner/api/server.go
@@ -16,6 +16,7 @@ import (
 	"github.com/sirupsen/logrus"
 
 	"github.com/jsonrpc-bench/runner/analysis"
+	"github.com/jsonrpc-bench/runner/internal/sanitize"
 	"github.com/jsonrpc-bench/runner/storage"
 	"github.com/jsonrpc-bench/runner/types"
 )
@@ -223,12 +224,12 @@ func (s *server) loggingMiddleware(next http.Handler) http.Handler {
 
 		duration := time.Since(start)
 		s.log.WithFields(logrus.Fields{
-			"method":      r.Method,
-			"path":        SanitizeLogValue(r.URL.Path),
+			"method":      sanitize.LogValue(r.Method),
+			"path":        sanitize.LogValue(r.URL.Path),
 			"status":      wrapper.statusCode,
 			"duration_ms": duration.Milliseconds(),
-			"user_agent":  SanitizeLogValue(r.UserAgent()),
-			"remote_addr": SanitizeLogValue(r.RemoteAddr),
+			"user_agent":  sanitize.LogValue(r.UserAgent()),
+			"remote_addr": sanitize.LogValue(r.RemoteAddr),
 		}).Info("HTTP request processed")
 	})
 }
@@ -319,7 +320,7 @@ func (s *server) handleGetRun(w http.ResponseWriter, r *http.Request) {
 		if strings.Contains(err.Error(), "not found") {
 			s.writeErrorResponse(w, http.StatusNotFound, "Run not found")
 		} else {
-			s.log.WithError(err).Error("Failed to get historic run")
+			s.log.WithError(sanitize.LogError(err)).Error("Failed to get historic run")
 			s.writeErrorResponse(w, http.StatusInternalServerError, "Failed to retrieve run")
 		}
 		return
@@ -336,7 +337,7 @@ func (s *server) handleGetRun(w http.ResponseWriter, r *http.Request) {
 		s.log.WithError(err).Warn("Failed to get client metrics from database")
 	} else if len(clientMetrics) > 0 {
 		response["client_metrics"] = clientMetrics
-		s.log.WithField("run_id", runID).WithField("client_count", len(clientMetrics)).Debug("Added client metrics to response")
+		s.log.WithField("run_id", sanitize.LogValue(runID)).WithField("client_count", len(clientMetrics)).Debug("Added client metrics to response")
 	}
 
 	s.writeJSONResponse(w, http.StatusOK, response)
@@ -705,7 +706,7 @@ func (s *server) handleCreateBaseline(w http.ResponseWriter, r *http.Request) {
 
 	baseline, err := s.baselineManager.SetBaseline(ctx, req.RunID, req.Name, req.Description)
 	if err != nil {
-		s.log.WithError(err).Error("Failed to create baseline")
+		s.log.WithError(sanitize.LogError(err)).Error("Failed to create baseline")
 		s.writeErrorResponse(w, http.StatusInternalServerError, "Failed to create baseline")
 		return
 	}
@@ -729,7 +730,7 @@ func (s *server) handleGetBaseline(w http.ResponseWriter, r *http.Request) {
 		if strings.Contains(err.Error(), "not found") {
 			s.writeErrorResponse(w, http.StatusNotFound, "Baseline not found")
 		} else {
-			s.log.WithError(err).Error("Failed to get baseline")
+			s.log.WithError(sanitize.LogError(err)).Error("Failed to get baseline")
 			s.writeErrorResponse(w, http.StatusInternalServerError, "Failed to retrieve baseline")
 		}
 		return
@@ -754,7 +755,7 @@ func (s *server) handleDeleteBaseline(w http.ResponseWriter, r *http.Request) {
 		if strings.Contains(err.Error(), "not found") {
 			s.writeErrorResponse(w, http.StatusNotFound, "Baseline not found")
 		} else {
-			s.log.WithError(err).Error("Failed to delete baseline")
+			s.log.WithError(sanitize.LogError(err)).Error("Failed to delete baseline")
 			s.writeErrorResponse(w, http.StatusInternalServerError, "Failed to delete baseline")
 		}
 		return
@@ -784,7 +785,7 @@ func (s *server) handleCompareToBaseline(w http.ResponseWriter, r *http.Request)
 
 	comparison, err := s.baselineManager.CompareToBaseline(ctx, runID, baselineName)
 	if err != nil {
-		s.log.WithError(err).Error("Failed to compare to baseline")
+		s.log.WithError(sanitize.LogError(err)).Error("Failed to compare to baseline")
 		s.writeErrorResponse(w, http.StatusInternalServerError, "Failed to compare to baseline")
 		return
 	}
@@ -845,7 +846,7 @@ func (s *server) handleDetectRegressions(w http.ResponseWriter, r *http.Request)
 
 	report, err := s.regressionDetector.DetectRegressions(ctx, runID, options)
 	if err != nil {
-		s.log.WithError(err).Error("Failed to detect regressions")
+		s.log.WithError(sanitize.LogError(err)).Error("Failed to detect regressions")
 		s.writeErrorResponse(w, http.StatusInternalServerError, "Failed to detect regressions")
 		return
 	}
@@ -900,14 +901,14 @@ func (s *server) handleAcknowledgeRegression(w http.ResponseWriter, r *http.Requ
 	}
 	// AcknowledgedBy is a free-form name; scrub control characters before
 	// it propagates into downstream log fields.
-	acknowledgedBy := SanitizeLogValue(req.AcknowledgedBy)
+	acknowledgedBy := sanitize.LogValue(req.AcknowledgedBy)
 
 	err := s.regressionDetector.AcknowledgeRegression(ctx, regressionID, acknowledgedBy)
 	if err != nil {
 		if strings.Contains(err.Error(), "not found") {
 			s.writeErrorResponse(w, http.StatusNotFound, "Regression not found")
 		} else {
-			s.log.WithError(err).Error("Failed to acknowledge regression")
+			s.log.WithError(sanitize.LogError(err)).Error("Failed to acknowledge regression")
 			s.writeErrorResponse(w, http.StatusInternalServerError, "Failed to acknowledge regression")
 		}
 		return
@@ -934,7 +935,7 @@ func (s *server) handleAnalyzeRun(w http.ResponseWriter, r *http.Request) {
 
 	analysis, err := s.regressionDetector.AnalyzeRun(ctx, runID)
 	if err != nil {
-		s.log.WithError(err).Error("Failed to analyze run")
+		s.log.WithError(sanitize.LogError(err)).Error("Failed to analyze run")
 		s.writeErrorResponse(w, http.StatusInternalServerError, "Failed to analyze run")
 		return
 	}
@@ -983,8 +984,10 @@ func (s *server) handleGetMetricTrends(w http.ResponseWriter, r *http.Request) {
 	// Create trend filter
 	since := time.Now().AddDate(0, 0, -days)
 	filter := types.TrendFilter{
-		Client: client,
-		Since:  since,
+		TestName: testName,
+		Client:   client,
+		Method:   metric,
+		Since:    since,
 	}
 
 	trend, err := s.storage.GetHistoricTrends(ctx, filter)
@@ -1081,7 +1084,7 @@ func (s *server) handleWebSocket(w http.ResponseWriter, r *http.Request) {
 	// Register client
 	s.wsClients[conn] = true
 
-	s.log.WithField("remote_addr", SanitizeLogValue(r.RemoteAddr)).Info("WebSocket client connected")
+	s.log.WithField("remote_addr", sanitize.LogValue(r.RemoteAddr)).Info("WebSocket client connected")
 
 	// Send initial connection message
 	message := map[string]interface{}{
@@ -1121,7 +1124,7 @@ func (s *server) handleWebSocket(w http.ResponseWriter, r *http.Request) {
 
 	// Unregister client
 	delete(s.wsClients, conn)
-	s.log.WithField("remote_addr", SanitizeLogValue(r.RemoteAddr)).Info("WebSocket client disconnected")
+	s.log.WithField("remote_addr", sanitize.LogValue(r.RemoteAddr)).Info("WebSocket client disconnected")
 }
 
 // handleWebSocketHub manages WebSocket message broadcasting
diff --git a/runner/api/server_test.go b/runner/api/server_test.go
deleted file mode 100644
index 8d78cc6..0000000
--- a/runner/api/server_test.go
+++ /dev/null
@@ -1,730 +0,0 @@
-package api
-
-import (
-	"context"
-	"database/sql"
-	"net/http"
-	"net/http/httptest"
-	"testing"
-	"time"
-
-	"github.com/gorilla/mux"
-	"github.com/gorilla/websocket"
-	"github.com/sirupsen/logrus"
-	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/mock"
-	"github.com/stretchr/testify/require"
-
-	"github.com/jsonrpc-bench/runner/analysis"
-	"github.com/jsonrpc-bench/runner/types"
-)
-
-// Mock implementations for testing
-
-type MockHistoricStorage struct {
-	mock.Mock
-}
-
-func (m *MockHistoricStorage) Start(ctx context.Context) error {
-	args := m.Called(ctx)
-	return args.Error(0)
-}
-
-func (m *MockHistoricStorage) Stop() error {
-	args := m.Called()
-	return args.Error(0)
-}
-
-func (m *MockHistoricStorage) SaveHistoricRun(ctx context.Context, result *types.BenchmarkResult) (*types.HistoricRun, error) {
-	args := m.Called(ctx, result)
-	return args.Get(0).(*types.HistoricRun), args.Error(1)
-}
-
-func (m *MockHistoricStorage) GetHistoricRun(ctx context.Context, runID string) (*types.HistoricRun, error) {
-	args := m.Called(ctx, runID)
-	if args.Get(0) == nil {
-		return nil, args.Error(1)
-	}
-	return args.Get(0).(*types.HistoricRun), args.Error(1)
-}
-
-func (m *MockHistoricStorage) ListHistoricRuns(ctx context.Context, testName string, limit int) ([]*types.HistoricRun, error) {
-	args := m.Called(ctx, testName, limit)
-	return args.Get(0).([]*types.HistoricRun), args.Error(1)
-}
-
-func (m *MockHistoricStorage) DeleteHistoricRun(ctx context.Context, runID string) error {
-	args := m.Called(ctx, runID)
-	return args.Error(0)
-}
-
-func (m *MockHistoricStorage) GetHistoricTrends(ctx context.Context, testName, client, metric string, days int) (*types.HistoricTrend, error) {
-	args := m.Called(ctx, testName, client, metric, days)
-	if args.Get(0) == nil {
-		return nil, args.Error(1)
-	}
-	return args.Get(0).(*types.HistoricTrend), args.Error(1)
-}
-
-func (m *MockHistoricStorage) CompareRuns(ctx context.Context, runID1, runID2 string) (*types.HistoricComparison, error) {
-	args := m.Called(ctx, runID1, runID2)
-	if args.Get(0) == nil {
-		return nil, args.Error(1)
-	}
-	return args.Get(0).(*types.HistoricComparison), args.Error(1)
-}
-
-func (m *MockHistoricStorage) GetHistoricSummary(ctx context.Context, testName string) (*types.HistoricSummary, error) {
-	args := m.Called(ctx, testName)
-	if args.Get(0) == nil {
-		return nil, args.Error(1)
-	}
-	return args.Get(0).(*types.HistoricSummary), args.Error(1)
-}
-
-func (m *MockHistoricStorage) SaveResultFiles(ctx context.Context, runID string, result *types.BenchmarkResult) error {
-	args := m.Called(ctx, runID, result)
-	return args.Error(0)
-}
-
-func (m *MockHistoricStorage) GetResultFiles(ctx context.Context, runID string) (string, error) {
-	args := m.Called(ctx, runID)
-	return args.String(0), args.Error(1)
-}
-
-func (m *MockHistoricStorage) CleanupOldFiles(ctx context.Context) error {
-	args := m.Called(ctx)
-	return args.Error(0)
-}
-
-type MockBaselineManager struct {
-	mock.Mock
-}
-
-func (m *MockBaselineManager) Start(ctx context.Context) error {
-	args := m.Called(ctx)
-	return args.Error(0)
-}
-
-func (m *MockBaselineManager) Stop() error {
-	args := m.Called()
-	return args.Error(0)
-}
-
-func (m *MockBaselineManager) SetBaseline(ctx context.Context, runID, name, description string) (*types.Baseline, error) {
-	args := m.Called(ctx, runID, name, description)
-	if args.Get(0) == nil {
-		return nil, args.Error(1)
-	}
-	return args.Get(0).(*types.Baseline), args.Error(1)
-}
-
-func (m *MockBaselineManager) GetBaseline(ctx context.Context, name string) (*types.Baseline, error) {
-	args := m.Called(ctx, name)
-	if args.Get(0) == nil {
-		return nil, args.Error(1)
-	}
-	return args.Get(0).(*types.Baseline), args.Error(1)
-}
-
-func (m *MockBaselineManager) ListBaselines(ctx context.Context, testName string) ([]*types.Baseline, error) {
-	args := m.Called(ctx, testName)
-	return args.Get(0).([]*types.Baseline), args.Error(1)
-}
-
-func (m *MockBaselineManager) DeleteBaseline(ctx context.Context, name string) error {
-	args := m.Called(ctx, name)
-	return args.Error(0)
-}
-
-func (m *MockBaselineManager) CompareToBaseline(ctx context.Context, runID, baselineName string) (*types.BaselineComparison, error) {
-	args := m.Called(ctx, runID, baselineName)
-	if args.Get(0) == nil {
-		return nil, args.Error(1)
-	}
-	return args.Get(0).(*types.BaselineComparison), args.Error(1)
-}
-
-type MockTrendAnalyzer struct {
-	mock.Mock
-}
-
-func (m *MockTrendAnalyzer) Start(ctx context.Context) error {
-	args := m.Called(ctx)
-	return args.Error(0)
-}
-
-func (m *MockTrendAnalyzer) Stop() error {
-	args := m.Called()
-	return args.Error(0)
-}
-
-func (m *MockTrendAnalyzer) CalculateTrends(ctx context.Context, testName string, days int) (*types.TrendAnalysis, error) {
-	args := m.Called(ctx, testName, days)
-	if args.Get(0) == nil {
-		return nil, args.Error(1)
-	}
-	return args.Get(0).(*types.TrendAnalysis), args.Error(1)
-}
-
-func (m *MockTrendAnalyzer) GetMethodTrends(ctx context.Context, testName, method string, days int) (*types.MethodTrends, error) {
-	args := m.Called(ctx, testName, method, days)
-	if args.Get(0) == nil {
-		return nil, args.Error(1)
-	}
-	return args.Get(0).(*types.MethodTrends), args.Error(1)
-}
-
-func (m *MockTrendAnalyzer) GetClientTrends(ctx context.Context, testName, client string, days int) (*types.ClientTrends, error) {
-	args := m.Called(ctx, testName, client, days)
-	if args.Get(0) == nil {
-		return nil, args.Error(1)
-	}
-	return args.Get(0).(*types.ClientTrends), args.Error(1)
-}
-
-func (m *MockTrendAnalyzer) CalculateMovingAverage(ctx context.Context, testName, metric string, windowSize, days int) (*types.MovingAverage, error) {
-	args := m.Called(ctx, testName, metric, windowSize, days)
-	if args.Get(0) == nil {
-		return nil, args.Error(1)
-	}
-	return args.Get(0).(*types.MovingAverage), args.Error(1)
-}
-
-func (m *MockTrendAnalyzer) ForecastTrend(ctx context.Context, testName, metric string, historyDays, forecastDays int) (*types.TrendForecast, error) {
-	args := m.Called(ctx, testName, metric, historyDays, forecastDays)
-	if args.Get(0) == nil {
-		return nil, args.Error(1)
-	}
-	return args.Get(0).(*types.TrendForecast), args.Error(1)
-}
-
-type MockRegressionDetector struct {
-	mock.Mock
-}
-
-func (m *MockRegressionDetector) Start(ctx context.Context) error {
-	args := m.Called(ctx)
-	return args.Error(0)
-}
-
-func (m *MockRegressionDetector) Stop() error {
-	args := m.Called()
-	return args.Error(0)
-}
-
-func (m *MockRegressionDetector) DetectRegressions(ctx context.Context, runID string, options analysis.DetectionOptions) (*types.RegressionReport, error) {
-	args := m.Called(ctx, runID, options)
-	if args.Get(0) == nil {
-		return nil, args.Error(1)
-	}
-	return args.Get(0).(*types.RegressionReport), args.Error(1)
-}
-
-func (m *MockRegressionDetector) GetRegressions(ctx context.Context, runID string) ([]*types.Regression, error) {
-	args := m.Called(ctx, runID)
-	return args.Get(0).([]*types.Regression), args.Error(1)
-}
-
-func (m *MockRegressionDetector) AcknowledgeRegression(ctx context.Context, regressionID, acknowledgedBy string) error {
-	args := m.Called(ctx, regressionID, acknowledgedBy)
-	return args.Error(0)
-}
-
-func (m *MockRegressionDetector) AnalyzeRun(ctx context.Context, runID string) (*types.RunAnalysis, error) {
-	args := m.Called(ctx, runID)
-	if args.Get(0) == nil {
-		return nil, args.Error(1)
-	}
-	return args.Get(0).(*types.RunAnalysis), args.Error(1)
-}
-
-type MockDB struct {
-	mock.Mock
-}
-
-func (m *MockDB) Ping() error {
-	args := m.Called()
-	return args.Error(0)
-}
-
-func (m *MockDB) QueryContext(ctx context.Context, query string, args ...interface{}) (*sql.Rows, error) {
-	mockArgs := m.Called(ctx, query, args)
-	if mockArgs.Get(0) == nil {
-		return nil, mockArgs.Error(1)
-	}
-	return mockArgs.Get(0).(*sql.Rows), mockArgs.Error(1)
-}
-
-func (m *MockDB) QueryRowContext(ctx context.Context, query string, args ...interface{}) *sql.Row {
-	mockArgs := m.Called(ctx, query, args)
-	return mockArgs.Get(0).(*sql.Row)
-}
-
-func (m *MockDB) ExecContext(ctx context.Context, query string, args ...interface{}) (sql.Result, error) {
-	mockArgs := m.Called(ctx, query, args)
-	if mockArgs.Get(0) == nil {
-		return nil, mockArgs.Error(1)
-	}
-	return mockArgs.Get(0).(sql.Result), mockArgs.Error(1)
-}
-
-// Helper functions for test setup
-
-func setupTestServer() (*server, *MockHistoricStorage, *MockBaselineManager, *MockTrendAnalyzer, *MockRegressionDetector, *MockDB) {
-	log := logrus.New()
-	log.SetLevel(logrus.ErrorLevel) // Reduce noise in tests
-
-	storage := &MockHistoricStorage{}
-	baselineManager := &MockBaselineManager{}
-	trendAnalyzer := &MockTrendAnalyzer{}
-	regressionDetector := &MockRegressionDetector{}
-	db := &MockDB{}
-
-	srv := &server{
-		storage:            storage,
-		baselineManager:    baselineManager,
-		trendAnalyzer:      trendAnalyzer,
-		regressionDetector: regressionDetector,
-		db:                 db,
-		log:                log.WithField("component", "api-server"),
-		upgrader: websocket.Upgrader{
-			CheckOrigin: func(r *http.Request) bool {
-				return true
-			},
-		},
-		wsClients:   make(map[*websocket.Conn]bool),
-		wsBroadcast: make(chan []byte, 100),
-	}
-
-	return srv, storage, baselineManager, trendAnalyzer, regressionDetector, db
-}
-
-// Test server creation and configuration
-
-func TestNewServer(t *testing.T) {
-	log := logrus.New()
-	storage := &MockHistoricStorage{}
-	baselineManager := &MockBaselineManager{}
-	trendAnalyzer := &MockTrendAnalyzer{}
-	regressionDetector := &MockRegressionDetector{}
-	db := &MockDB{}
-
-	srv := NewServer(storage, baselineManager, trendAnalyzer, regressionDetector, db, log)
-
-	assert.NotNil(t, srv)
-	assert.Implements(t, (*Server)(nil), srv)
-}
-
-func TestServerStart(t *testing.T) {
-	srv, _, _, _, _, _ := setupTestServer()
-
-	ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
-	defer cancel()
-
-	err := srv.Start(ctx)
-	assert.NoError(t, err)
-
-	// Give server time to start
-	time.Sleep(50 * time.Millisecond)
-
-	// Check that server is running
-	assert.NotNil(t, srv.httpServer)
-
-	// Stop the server
-	err = srv.Stop()
-	assert.NoError(t, err)
-}
-
-func TestServerStartWithPortInUse(t *testing.T) {
-	// Create first server and start it
-	srv1, _, _, _, _, _ := setupTestServer()
-
-	ctx1, cancel1 := context.WithTimeout(context.Background(), 100*time.Millisecond)
-	defer cancel1()
-
-	err := srv1.Start(ctx1)
-	assert.NoError(t, err)
-	defer srv1.Stop()
-
-	// Create second server with same port (should fail to bind)
-	srv2, _, _, _, _, _ := setupTestServer()
-
-	ctx2, cancel2 := context.WithTimeout(context.Background(), 100*time.Millisecond)
-	defer cancel2()
-
-	err = srv2.Start(ctx2)
-	assert.NoError(t, err) // Start method doesn't return error for port conflicts
-	defer srv2.Stop()
-
-	// Give time for potential port conflict
-	time.Sleep(50 * time.Millisecond)
-}
-
-func TestServerStop(t *testing.T) {
-	srv, _, _, _, _, _ := setupTestServer()
-
-	ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
-	defer cancel()
-
-	// Start server
-	err := srv.Start(ctx)
-	assert.NoError(t, err)
-
-	// Give server time to start
-	time.Sleep(50 * time.Millisecond)
-
-	// Stop server
-	err = srv.Stop()
-	assert.NoError(t, err)
-
-	// Verify server is stopped
-	assert.NotNil(t, srv.httpServer) // Server object still exists but is shut down
-}
-
-func TestServerStopWithoutStart(t *testing.T) {
-	srv, _, _, _, _, _ := setupTestServer()
-
-	// Stop server without starting it first
-	err := srv.Stop()
-	assert.NoError(t, err) // Should handle gracefully
-}
-
-// Test route setup and middleware
-
-func TestSetupRoutes(t *testing.T) {
-	srv, _, _, _, _, _ := setupTestServer()
-
-	router := srv.setupRoutes()
-	assert.NotNil(t, router)
-	assert.IsType(t, &mux.Router{}, router)
-
-	// Test that router is properly configured by making a test request
-	req := httptest.NewRequest("GET", "/health", nil)
-	w := httptest.NewRecorder()
-
-	router.ServeHTTP(w, req)
-
-	// Should get a response (even if it's an error response)
-	assert.NotEqual(t, 0, w.Code)
-}
-
-func TestCORSMiddleware(t *testing.T) {
-	srv, _, _, _, _, _ := setupTestServer()
-
-	// Create a test handler
-	testHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		w.WriteHeader(http.StatusOK)
-		w.Write([]byte("test"))
-	})
-
-	// Wrap with CORS middleware
-	corsHandler := srv.enableCORS(testHandler)
-
-	// Test OPTIONS request
-	req := httptest.NewRequest("OPTIONS", "/test", nil)
-	w := httptest.NewRecorder()
-
-	corsHandler.ServeHTTP(w, req)
-
-	assert.Equal(t, http.StatusOK, w.Code)
-	assert.Equal(t, "*", w.Header().Get("Access-Control-Allow-Origin"))
-	assert.Equal(t, "GET, POST, PUT, DELETE, OPTIONS", w.Header().Get("Access-Control-Allow-Methods"))
-	assert.Equal(t, "Content-Type, Authorization, X-Requested-With", w.Header().Get("Access-Control-Allow-Headers"))
-	assert.Equal(t, "86400", w.Header().Get("Access-Control-Max-Age"))
-
-	// Test regular request
-	req = httptest.NewRequest("GET", "/test", nil)
-	w = httptest.NewRecorder()
-
-	corsHandler.ServeHTTP(w, req)
-
-	assert.Equal(t, http.StatusOK, w.Code)
-	assert.Equal(t, "*", w.Header().Get("Access-Control-Allow-Origin"))
-	assert.Equal(t, "test", w.Body.String())
-}
-
-func TestLoggingMiddleware(t *testing.T) {
-	srv, _, _, _, _, _ := setupTestServer()
-
-	// Create a test handler
-	testHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		w.WriteHeader(http.StatusOK)
-		w.Write([]byte("test"))
-	})
-
-	// Wrap with logging middleware
-	logHandler := srv.loggingMiddleware(testHandler)
-
-	req := httptest.NewRequest("GET", "/test", nil)
-	req.Header.Set("User-Agent", "test-agent")
-	req.RemoteAddr = "192.168.1.1:12345"
-	w := httptest.NewRecorder()
-
-	logHandler.ServeHTTP(w, req)
-
-	assert.Equal(t, http.StatusOK, w.Code)
-	assert.Equal(t, "test", w.Body.String())
-}
-
-func TestErrorHandlingMiddleware(t *testing.T) {
-	srv, _, _, _, _, _ := setupTestServer()
-
-	// Create a handler that panics
-	panicHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		panic("test panic")
-	})
-
-	// Wrap with error handling middleware
-	errorHandler := srv.errorHandlingMiddleware(panicHandler)
-
-	req := httptest.NewRequest("GET", "/test", nil)
-	w := httptest.NewRecorder()
-
-	// Should not panic
-	assert.NotPanics(t, func() {
-		errorHandler.ServeHTTP(w, req)
-	})
-
-	assert.Equal(t, http.StatusInternalServerError, w.Code)
-}
-
-func TestResponseWriterWrapper(t *testing.T) {
-	recorder := httptest.NewRecorder()
-	wrapper := &responseWriterWrapper{
-		ResponseWriter: recorder,
-		statusCode:     http.StatusOK,
-	}
-
-	// Test WriteHeader
-	wrapper.WriteHeader(http.StatusNotFound)
-	assert.Equal(t, http.StatusNotFound, wrapper.statusCode)
-	assert.Equal(t, http.StatusNotFound, recorder.Code)
-
-	// Test Write
-	data := []byte("test data")
-	n, err := wrapper.Write(data)
-	assert.NoError(t, err)
-	assert.Equal(t, len(data), n)
-	assert.Equal(t, string(data), recorder.Body.String())
-}
-
-// Test health endpoint (implementation in handlers_test.go)
-
-// Test WebSocket functionality
-
-func TestWebSocketHub(t *testing.T) {
-	srv, _, _, _, _, _ := setupTestServer()
-
-	// Test that WebSocket channels are initialized
-	assert.NotNil(t, srv.wsClients)
-	assert.NotNil(t, srv.wsBroadcast)
-
-	// Test broadcasting to empty clients (should not panic)
-	assert.NotPanics(t, func() {
-		srv.BroadcastUpdate("test", map[string]string{"key": "value"})
-	})
-}
-
-func TestHandleWebSocketHub(t *testing.T) {
-	srv, _, _, _, _, _ := setupTestServer()
-
-	// Start the WebSocket hub in a goroutine
-	go srv.handleWebSocketHub()
-
-	// Give it time to start
-	time.Sleep(10 * time.Millisecond)
-
-	// Send a test message
-	testMessage := map[string]interface{}{
-		"type": "test",
-		"data": "test_data",
-	}
-
-	srv.BroadcastUpdate("test", testMessage)
-
-	// Give time for processing
-	time.Sleep(10 * time.Millisecond)
-
-	// Close the broadcast channel to stop the hub
-	close(srv.wsBroadcast)
-
-	// Give time for cleanup
-	time.Sleep(10 * time.Millisecond)
-}
-
-func TestBroadcastUpdate(t *testing.T) {
-	srv, _, _, _, _, _ := setupTestServer()
-
-	testData := map[string]interface{}{
-		"key1": "value1",
-		"key2": 42,
-	}
-
-	// Should not panic even with no clients
-	assert.NotPanics(t, func() {
-		srv.BroadcastUpdate("test_update", testData)
-	})
-
-	// Test with full channel (non-blocking)
-	for i := 0; i < cap(srv.wsBroadcast)+10; i++ {
-		srv.BroadcastUpdate("test", map[string]int{"count": i})
-	}
-}
-
-// Test utility methods (implementation in handlers_test.go)
-
-// TestWriteErrorResponse implementation in handlers_test.go
-
-// Benchmark tests
-
-func BenchmarkServerSetupRoutes(b *testing.B) {
-	srv, _, _, _, _, _ := setupTestServer()
-
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		router := srv.setupRoutes()
-		_ = router
-	}
-}
-
-func BenchmarkCORSMiddleware(b *testing.B) {
-	srv, _, _, _, _, _ := setupTestServer()
-
-	testHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		w.WriteHeader(http.StatusOK)
-	})
-
-	corsHandler := srv.enableCORS(testHandler)
-	req := httptest.NewRequest("GET", "/test", nil)
-
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		w := httptest.NewRecorder()
-		corsHandler.ServeHTTP(w, req)
-	}
-}
-
-func BenchmarkBroadcastUpdate(b *testing.B) {
-	srv, _, _, _, _, _ := setupTestServer()
-
-	testData := map[string]interface{}{
-		"key": "value",
-		"num": 42,
-	}
-
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		srv.BroadcastUpdate("test", testData)
-	}
-}
-
-// Integration tests
-
-func TestServerIntegration(t *testing.T) {
-	srv, mockStorage, _, _, _, mockDB := setupTestServer()
-
-	// Setup mock expectations
-	mockDB.On("Ping").Return(nil)
-	mockStorage.On("ListHistoricRuns", mock.Anything, "", 1).Return([]*types.HistoricRun{}, nil)
-
-	ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond)
-	defer cancel()
-
-	// Start server
-	err := srv.Start(ctx)
-	require.NoError(t, err)
-	defer srv.Stop()
-
-	// Give server time to start
-	time.Sleep(100 * time.Millisecond)
-
-	// Test health endpoint
-	client := &http.Client{Timeout: 1 * time.Second}
-	resp, err := client.Get("http://localhost:8080/health")
-	require.NoError(t, err)
-	defer resp.Body.Close()
-
-	assert.Equal(t, http.StatusOK, resp.StatusCode)
-	assert.Equal(t, "application/json", resp.Header.Get("Content-Type"))
-
-	mockDB.AssertExpectations(t)
-}
-
-func TestConcurrentRequests(t *testing.T) {
-	srv, _, _, _, _, mockDB := setupTestServer()
-
-	mockDB.On("Ping").Return(nil)
-
-	ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond)
-	defer cancel()
-
-	err := srv.Start(ctx)
-	require.NoError(t, err)
-	defer srv.Stop()
-
-	time.Sleep(50 * time.Millisecond)
-
-	// Make concurrent requests
-	const numRequests = 10
-	results := make(chan int, numRequests)
-
-	for i := 0; i < numRequests; i++ {
-		go func() {
-			client := &http.Client{Timeout: 1 * time.Second}
-			resp, err := client.Get("http://localhost:8080/health")
-			if err != nil {
-				results <- 0
-				return
-			}
-			defer resp.Body.Close()
-			results <- resp.StatusCode
-		}()
-	}
-
-	// Collect results
-	for i := 0; i < numRequests; i++ {
-		statusCode := <-results
-		assert.Equal(t, http.StatusOK, statusCode)
-	}
-
-	mockDB.AssertExpectations(t)
-}
-
-func TestServerSecurityHeaders(t *testing.T) {
-	srv, _, _, _, _, _ := setupTestServer()
-
-	router := srv.setupRoutes()
-	req := httptest.NewRequest("GET", "/health", nil)
-	w := httptest.NewRecorder()
-
-	router.ServeHTTP(w, req)
-
-	// Verify CORS headers are set
-	assert.Equal(t, "*", w.Header().Get("Access-Control-Allow-Origin"))
-	assert.Equal(t, "GET, POST, PUT, DELETE, OPTIONS", w.Header().Get("Access-Control-Allow-Methods"))
-	assert.Equal(t, "Content-Type, Authorization, X-Requested-With", w.Header().Get("Access-Control-Allow-Headers"))
-}
-
-func TestServerRateLimiting(t *testing.T) {
-	// This would be implemented if rate limiting was added to the server
-	// For now, just verify that rapid requests don't cause issues
-	srv, _, _, _, _, mockDB := setupTestServer()
-
-	mockDB.On("Ping").Return(nil)
-
-	router := srv.setupRoutes()
-
-	// Make rapid requests
-	for i := 0; i < 100; i++ {
-		req := httptest.NewRequest("GET", "/health", nil)
-		w := httptest.NewRecorder()
-
-		router.ServeHTTP(w, req)
-
-		// Should handle all requests without issues
-		assert.Equal(t, http.StatusOK, w.Code)
-	}
-
-	mockDB.AssertExpectations(t)
-}
diff --git a/runner/api/websocket.go b/runner/api/websocket.go
index be68100..7634d84 100644
--- a/runner/api/websocket.go
+++ b/runner/api/websocket.go
@@ -12,6 +12,7 @@ import (
 	"github.com/gorilla/websocket"
 	"github.com/sirupsen/logrus"
 
+	"github.com/jsonrpc-bench/runner/internal/sanitize"
 	"github.com/jsonrpc-bench/runner/types"
 )
 
@@ -232,9 +233,9 @@ func (h *WSHub) RegisterClient(conn *websocket.Conn, clientID, remoteAddr, userA
 	select {
 	case h.register <- client:
 		h.log.WithFields(logrus.Fields{
-			"client_id":   clientID,
-			"remote_addr": SanitizeLogValue(remoteAddr),
-			"user_agent":  SanitizeLogValue(userAgent),
+			"client_id":   sanitize.LogValue(clientID),
+			"remote_addr": sanitize.LogValue(remoteAddr),
+			"user_agent":  sanitize.LogValue(userAgent),
 		}).Info("WebSocket client registered")
 	case <-h.ctx.Done():
 		h.log.Warn("Cannot register client, hub is shutting down")
@@ -634,8 +635,8 @@ func (c *WSClient) readPump() {
 		default:
 			// Handle other message types as needed
 			c.Hub.log.WithFields(logrus.Fields{
-				"client_id":    c.ID,
-				"message_type": SanitizeLogValue(string(msg.Type)),
+				"client_id":    sanitize.LogValue(c.ID),
+				"message_type": sanitize.LogValue(string(msg.Type)),
 			}).Debug("Received WebSocket message")
 		}
 	}
@@ -704,7 +705,7 @@ func (h *WSHub) HandleWebSocketConnection(upgrader *websocket.Upgrader) func(w h
 
 		h.log.WithFields(logrus.Fields{
 			"client_id":   clientID,
-			"remote_addr": SanitizeLogValue(remoteAddr),
+			"remote_addr": sanitize.LogValue(remoteAddr),
 		}).Info("WebSocket connection established")
 	}
 }
diff --git a/runner/api/websocket_test.go b/runner/api/websocket_test.go
deleted file mode 100644
index be30205..0000000
--- a/runner/api/websocket_test.go
+++ /dev/null
@@ -1,730 +0,0 @@
-package api
-
-import (
-	"context"
-	"testing"
-	"time"
-
-	"github.com/sirupsen/logrus"
-	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/require"
-
-	"github.com/jsonrpc-bench/runner/types"
-)
-
-func TestNewWSHub(t *testing.T) {
-	log := logrus.New()
-	log.SetLevel(logrus.ErrorLevel) // Reduce noise in tests
-
-	hub := NewWSHub(log)
-
-	assert.NotNil(t, hub)
-	assert.NotNil(t, hub.clients)
-	assert.NotNil(t, hub.register)
-	assert.NotNil(t, hub.unregister)
-	assert.NotNil(t, hub.broadcast)
-	assert.NotNil(t, hub.subscriptions)
-	assert.Equal(t, 100, hub.config.MaxClients)
-	assert.Equal(t, 54*time.Second, hub.config.PingInterval)
-	assert.True(t, hub.config.EnablePingPong)
-}
-
-func TestWSHubRunAndStop(t *testing.T) {
-	log := logrus.New()
-	log.SetLevel(logrus.ErrorLevel)
-
-	hub := NewWSHub(log)
-
-	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
-	defer cancel()
-
-	// Start the hub
-	err := hub.Run(ctx)
-	require.NoError(t, err)
-
-	// Give it a moment to start
-	time.Sleep(100 * time.Millisecond)
-
-	// Check that it's running
-	assert.Equal(t, 0, hub.GetConnectedClientsCount())
-
-	// Stop the hub
-	err = hub.Stop()
-	require.NoError(t, err)
-}
-
-func TestWSHubBroadcastToAll(t *testing.T) {
-	log := logrus.New()
-	log.SetLevel(logrus.ErrorLevel)
-
-	hub := NewWSHub(log)
-
-	ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
-	defer cancel()
-
-	err := hub.Run(ctx)
-	require.NoError(t, err)
-
-	// Give hub time to start
-	time.Sleep(50 * time.Millisecond)
-
-	// Test broadcasting without any clients (should not panic)
-	testData := map[string]interface{}{
-		"test_key": "test_value",
-	}
-
-	hub.BroadcastToAll(WSMessageTypeNewRun, testData)
-
-	// Give time for processing
-	time.Sleep(50 * time.Millisecond)
-
-	// Stop the hub
-	err = hub.Stop()
-	require.NoError(t, err)
-}
-
-func TestWSHubNotifyMethods(t *testing.T) {
-	log := logrus.New()
-	log.SetLevel(logrus.ErrorLevel)
-
-	hub := NewWSHub(log)
-
-	ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
-	defer cancel()
-
-	err := hub.Run(ctx)
-	require.NoError(t, err)
-
-	// Give hub time to start
-	time.Sleep(50 * time.Millisecond)
-
-	// Test NotifyNewRun
-	run := &types.HistoricRun{
-		ID:               "test-run-1",
-		TestName:         "test-benchmark",
-		TotalRequests:    1000,
-		TotalErrors:      10,
-		OverallErrorRate: 0.01,
-		AvgLatencyMs:     50.5,
-		P95LatencyMs:     95.5,
-		BestClient:       "client1",
-		PerformanceScores: map[string]float64{
-			"client1": 95.5,
-			"client2": 92.3,
-		},
-	}
-
-	// These should not panic
-	hub.NotifyNewRun(run)
-
-	// Test NotifyRegression
-	regression := &types.Regression{
-		ID:             "regression-1",
-		RunID:          "test-run-1",
-		BaselineRunID:  "baseline-run-1",
-		Client:         "client1",
-		Metric:         "p95_latency",
-		Method:         "eth_getBalance",
-		Severity:       "high",
-		PercentChange:  25.5,
-		AbsoluteChange: 12.75,
-		BaselineValue:  50.0,
-		CurrentValue:   62.75,
-		IsSignificant:  true,
-		PValue:         0.01,
-		DetectedAt:     time.Now(),
-	}
-
-	hub.NotifyRegression(regression, run)
-
-	// Test NotifyBaselineUpdated
-	hub.NotifyBaselineUpdated("test-baseline", "test-run-1", "test-benchmark")
-
-	// Test NotifyAnalysisComplete
-	analysisResults := map[string]interface{}{
-		"performance_score":  95.5,
-		"regressions_found":  2,
-		"improvements_found": 1,
-	}
-	hub.NotifyAnalysisComplete("test-run-1", "test-benchmark", analysisResults)
-
-	// Stop the hub
-	err = hub.Stop()
-	require.NoError(t, err)
-}
-
-func TestGenerateClientID(t *testing.T) {
-	id1 := generateClientID()
-	id2 := generateClientID()
-
-	assert.NotEmpty(t, id1)
-	assert.NotEmpty(t, id2)
-	assert.NotEqual(t, id1, id2)  // IDs should be unique
-	assert.Equal(t, 16, len(id1)) // Should be 8 bytes -> 16 hex chars
-}
-
-func TestWSMessageTypes(t *testing.T) {
-	// Test that all message types are properly defined
-	assert.Equal(t, WSMessageType("connection"), WSMessageTypeConnection)
-	assert.Equal(t, WSMessageType("ping"), WSMessageTypePing)
-	assert.Equal(t, WSMessageType("pong"), WSMessageTypePong)
-	assert.Equal(t, WSMessageType("new_run"), WSMessageTypeNewRun)
-	assert.Equal(t, WSMessageType("regression_detected"), WSMessageTypeRegressionDetected)
-	assert.Equal(t, WSMessageType("baseline_updated"), WSMessageTypeBaselineUpdated)
-	assert.Equal(t, WSMessageType("analysis_complete"), WSMessageTypeAnalysisComplete)
-	assert.Equal(t, WSMessageType("run_started"), WSMessageTypeRunStarted)
-	assert.Equal(t, WSMessageType("run_progress"), WSMessageTypeRunProgress)
-	assert.Equal(t, WSMessageType("run_complete"), WSMessageTypeRunComplete)
-	assert.Equal(t, WSMessageType("run_failed"), WSMessageTypeRunFailed)
-}
-
-// Enhanced WebSocket real-time functionality tests
-
-// TestWSHubClientSubscriptions tests client subscription management
-func TestWSHubClientSubscriptions(t *testing.T) {
-	log := logrus.New()
-	log.SetLevel(logrus.ErrorLevel)
-
-	hub := NewWSHub(log)
-
-	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
-	defer cancel()
-
-	err := hub.Run(ctx)
-	require.NoError(t, err)
-	defer hub.Stop()
-
-	// Give hub time to start
-	time.Sleep(50 * time.Millisecond)
-
-	// Test subscription management
-	clientID := generateClientID()
-
-	// Subscribe to test name
-	hub.SubscribeToTestName(clientID, "test-benchmark-1")
-	assert.True(t, hub.IsSubscribedToTestName(clientID, "test-benchmark-1"))
-	assert.False(t, hub.IsSubscribedToTestName(clientID, "test-benchmark-2"))
-
-	// Subscribe to run ID
-	hub.SubscribeToRunID(clientID, "run-123")
-	assert.True(t, hub.IsSubscribedToRunID(clientID, "run-123"))
-	assert.False(t, hub.IsSubscribedToRunID(clientID, "run-456"))
-
-	// Unsubscribe
-	hub.UnsubscribeFromTestName(clientID, "test-benchmark-1")
-	assert.False(t, hub.IsSubscribedToTestName(clientID, "test-benchmark-1"))
-
-	hub.UnsubscribeFromRunID(clientID, "run-123")
-	assert.False(t, hub.IsSubscribedToRunID(clientID, "run-123"))
-}
-
-// TestWSHubTargetedBroadcasting tests targeted message broadcasting
-func TestWSHubTargetedBroadcasting(t *testing.T) {
-	log := logrus.New()
-	log.SetLevel(logrus.ErrorLevel)
-
-	hub := NewWSHub(log)
-
-	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
-	defer cancel()
-
-	err := hub.Run(ctx)
-	require.NoError(t, err)
-	defer hub.Stop()
-
-	time.Sleep(50 * time.Millisecond)
-
-	// Test broadcasting to test name subscribers
-	testData := map[string]interface{}{
-		"test_key": "test_value",
-	}
-
-	// Should not panic even without subscribers
-	hub.BroadcastToTestName("test-benchmark", WSMessageTypeNewRun, testData)
-	hub.BroadcastToRunID("run-123", WSMessageTypeRunProgress, testData)
-
-	time.Sleep(50 * time.Millisecond)
-}
-
-// TestWSHubConnectionLimit tests connection limit enforcement
-func TestWSHubConnectionLimit(t *testing.T) {
-	log := logrus.New()
-	log.SetLevel(logrus.ErrorLevel)
-
-	// Create hub with low connection limit for testing
-	hub := NewWSHub(log)
-	hub.config.MaxClients = 2
-
-	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
-	defer cancel()
-
-	err := hub.Run(ctx)
-	require.NoError(t, err)
-	defer hub.Stop()
-
-	time.Sleep(50 * time.Millisecond)
-
-	// Test that GetConnectedClientsCount works
-	assert.Equal(t, 0, hub.GetConnectedClientsCount())
-
-	// Test GetMaxClients
-	assert.Equal(t, 2, hub.GetMaxClients())
-}
-
-// TestWSHubMessageQueuing tests message queuing and delivery
-func TestWSHubMessageQueuing(t *testing.T) {
-	log := logrus.New()
-	log.SetLevel(logrus.ErrorLevel)
-
-	hub := NewWSHub(log)
-
-	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
-	defer cancel()
-
-	err := hub.Run(ctx)
-	require.NoError(t, err)
-	defer hub.Stop()
-
-	time.Sleep(50 * time.Millisecond)
-
-	// Send multiple messages rapidly
-	for i := 0; i < 10; i++ {
-		testData := map[string]interface{}{
-			"message_id": i,
-			"timestamp":  time.Now().UnixNano(),
-		}
-		hub.BroadcastToAll(WSMessageTypeNewRun, testData)
-	}
-
-	// Give time for message processing
-	time.Sleep(100 * time.Millisecond)
-}
-
-// TestWSHubErrorHandling tests error handling scenarios
-func TestWSHubErrorHandling(t *testing.T) {
-	log := logrus.New()
-	log.SetLevel(logrus.ErrorLevel)
-
-	hub := NewWSHub(log)
-
-	// Test operations before hub is started
-	testData := map[string]interface{}{
-		"test": "data",
-	}
-
-	// These should not panic
-	hub.BroadcastToAll(WSMessageTypeNewRun, testData)
-	hub.BroadcastToTestName("test", WSMessageTypeNewRun, testData)
-	hub.BroadcastToRunID("run-123", WSMessageTypeRunProgress, testData)
-
-	clientID := generateClientID()
-	hub.SubscribeToTestName(clientID, "test")
-	hub.UnsubscribeFromTestName(clientID, "test")
-
-	// Test double stop
-	err := hub.Stop()
-	assert.NoError(t, err)
-
-	err = hub.Stop()
-	assert.NoError(t, err) // Should not error on double stop
-}
-
-// TestWSHubConcurrentOperations tests concurrent WebSocket operations
-func TestWSHubConcurrentOperations(t *testing.T) {
-	log := logrus.New()
-	log.SetLevel(logrus.ErrorLevel)
-
-	hub := NewWSHub(log)
-
-	ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
-	defer cancel()
-
-	err := hub.Run(ctx)
-	require.NoError(t, err)
-	defer hub.Stop()
-
-	time.Sleep(50 * time.Millisecond)
-
-	// Test concurrent subscription operations
-	const numGoroutines = 10
-	const operationsPerGoroutine = 20
-
-	done := make(chan bool, numGoroutines)
-
-	for i := 0; i < numGoroutines; i++ {
-		go func(id int) {
-			defer func() { done <- true }()
-
-			clientID := generateClientID()
-			testName := fmt.Sprintf("test-%d", id)
-			runID := fmt.Sprintf("run-%d", id)
-
-			for j := 0; j < operationsPerGoroutine; j++ {
-				// Subscribe
-				hub.SubscribeToTestName(clientID, testName)
-				hub.SubscribeToRunID(clientID, runID)
-
-				// Check subscriptions
-				hub.IsSubscribedToTestName(clientID, testName)
-				hub.IsSubscribedToRunID(clientID, runID)
-
-				// Broadcast
-				testData := map[string]interface{}{
-					"goroutine": id,
-					"operation": j,
-				}
-				hub.BroadcastToTestName(testName, WSMessageTypeNewRun, testData)
-				hub.BroadcastToRunID(runID, WSMessageTypeRunProgress, testData)
-
-				// Unsubscribe
-				hub.UnsubscribeFromTestName(clientID, testName)
-				hub.UnsubscribeFromRunID(clientID, runID)
-			}
-		}(i)
-	}
-
-	// Wait for all goroutines to complete
-	for i := 0; i < numGoroutines; i++ {
-		select {
-		case <-done:
-		case <-time.After(2 * time.Second):
-			t.Fatal("Timeout waiting for concurrent operations to complete")
-		}
-	}
-}
-
-// TestWSHubNotificationWorkflows tests complete notification workflows
-func TestWSHubNotificationWorkflows(t *testing.T) {
-	log := logrus.New()
-	log.SetLevel(logrus.ErrorLevel)
-
-	hub := NewWSHub(log)
-
-	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
-	defer cancel()
-
-	err := hub.Run(ctx)
-	require.NoError(t, err)
-	defer hub.Stop()
-
-	time.Sleep(50 * time.Millisecond)
-
-	// Test complete benchmark run workflow
-	run := &types.HistoricRun{
-		ID:               "workflow-run-1",
-		TestName:         "workflow-test",
-		TotalRequests:    1000,
-		TotalErrors:      10,
-		OverallErrorRate: 0.01,
-		AvgLatencyMs:     50.5,
-		P95LatencyMs:     95.5,
-		BestClient:       "geth",
-		PerformanceScores: map[string]float64{
-			"geth":       95.5,
-			"nethermind": 92.3,
-		},
-	}
-
-	// 1. Notify new run
-	hub.NotifyNewRun(run)
-
-	// 2. Notify regression detected
-	regression := &types.Regression{
-		ID:             "workflow-regression-1",
-		RunID:          run.ID,
-		BaselineRunID:  "baseline-run-1",
-		Client:         "geth",
-		Metric:         "p95_latency",
-		Method:         "eth_getBalance",
-		Severity:       "high",
-		PercentChange:  25.5,
-		AbsoluteChange: 12.75,
-		BaselineValue:  50.0,
-		CurrentValue:   62.75,
-		IsSignificant:  true,
-		PValue:         0.01,
-		DetectedAt:     time.Now(),
-	}
-
-	hub.NotifyRegression(regression, run)
-
-	// 3. Notify baseline updated
-	hub.NotifyBaselineUpdated("workflow-baseline", run.ID, run.TestName)
-
-	// 4. Notify analysis complete
-	analysisResults := map[string]interface{}{
-		"performance_score":    95.5,
-		"regressions_found":    1,
-		"improvements_found":   0,
-		"overall_health_score": 88.2,
-		"risk_level":           "medium",
-	}
-	hub.NotifyAnalysisComplete(run.ID, run.TestName, analysisResults)
-
-	time.Sleep(100 * time.Millisecond)
-}
-
-// TestWSHubMemoryLeaks tests for potential memory leaks
-func TestWSHubMemoryLeaks(t *testing.T) {
-	log := logrus.New()
-	log.SetLevel(logrus.ErrorLevel)
-
-	hub := NewWSHub(log)
-
-	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
-	defer cancel()
-
-	err := hub.Run(ctx)
-	require.NoError(t, err)
-	defer hub.Stop()
-
-	time.Sleep(50 * time.Millisecond)
-
-	// Create and remove many subscriptions
-	const numClients = 100
-	const subscriptionsPerClient = 10
-
-	clientIDs := make([]string, numClients)
-	for i := 0; i < numClients; i++ {
-		clientIDs[i] = generateClientID()
-	}
-
-	// Create subscriptions
-	for _, clientID := range clientIDs {
-		for j := 0; j < subscriptionsPerClient; j++ {
-			testName := fmt.Sprintf("memory-test-%d", j)
-			runID := fmt.Sprintf("memory-run-%d", j)
-
-			hub.SubscribeToTestName(clientID, testName)
-			hub.SubscribeToRunID(clientID, runID)
-		}
-	}
-
-	// Broadcast some messages
-	for i := 0; i < 10; i++ {
-		testData := map[string]interface{}{
-			"iteration": i,
-		}
-		hub.BroadcastToAll(WSMessageTypeNewRun, testData)
-	}
-
-	// Remove all subscriptions
-	for _, clientID := range clientIDs {
-		for j := 0; j < subscriptionsPerClient; j++ {
-			testName := fmt.Sprintf("memory-test-%d", j)
-			runID := fmt.Sprintf("memory-run-%d", j)
-
-			hub.UnsubscribeFromTestName(clientID, testName)
-			hub.UnsubscribeFromRunID(clientID, runID)
-		}
-	}
-
-	time.Sleep(100 * time.Millisecond)
-
-	// Hub should handle cleanup gracefully
-	assert.Equal(t, 0, hub.GetConnectedClientsCount())
-}
-
-// TestWSHubReconnectionScenarios tests reconnection handling
-func TestWSHubReconnectionScenarios(t *testing.T) {
-	log := logrus.New()
-	log.SetLevel(logrus.ErrorLevel)
-
-	hub := NewWSHub(log)
-
-	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
-	defer cancel()
-
-	err := hub.Run(ctx)
-	require.NoError(t, err)
-	defer hub.Stop()
-
-	time.Sleep(50 * time.Millisecond)
-
-	// Simulate client reconnection scenarios
-	clientID1 := generateClientID()
-	clientID2 := generateClientID()
-
-	// Client 1 subscribes
-	hub.SubscribeToTestName(clientID1, "reconnection-test")
-	assert.True(t, hub.IsSubscribedToTestName(clientID1, "reconnection-test"))
-
-	// Client 2 subscribes to same test
-	hub.SubscribeToTestName(clientID2, "reconnection-test")
-	assert.True(t, hub.IsSubscribedToTestName(clientID2, "reconnection-test"))
-
-	// Client 1 disconnects (removes all subscriptions)
-	hub.UnsubscribeFromTestName(clientID1, "reconnection-test")
-	assert.False(t, hub.IsSubscribedToTestName(clientID1, "reconnection-test"))
-	assert.True(t, hub.IsSubscribedToTestName(clientID2, "reconnection-test"))
-
-	// Client 1 reconnects with new ID
-	clientID1New := generateClientID()
-	hub.SubscribeToTestName(clientID1New, "reconnection-test")
-	assert.True(t, hub.IsSubscribedToTestName(clientID1New, "reconnection-test"))
-
-	// Broadcast should reach both active clients
-	testData := map[string]interface{}{
-		"message": "reconnection test",
-	}
-	hub.BroadcastToTestName("reconnection-test", WSMessageTypeNewRun, testData)
-
-	time.Sleep(50 * time.Millisecond)
-}
-
-// TestWSHubMessageFiltering tests message filtering capabilities
-func TestWSHubMessageFiltering(t *testing.T) {
-	log := logrus.New()
-	log.SetLevel(logrus.ErrorLevel)
-
-	hub := NewWSHub(log)
-
-	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
-	defer cancel()
-
-	err := hub.Run(ctx)
-	require.NoError(t, err)
-	defer hub.Stop()
-
-	time.Sleep(50 * time.Millisecond)
-
-	// Test filtering by subscription
-	clientID := generateClientID()
-
-	// Subscribe only to specific test
-	hub.SubscribeToTestName(clientID, "filter-test-1")
-
-	// Broadcast to subscribed test
-	testData1 := map[string]interface{}{
-		"should_receive": true,
-	}
-	hub.BroadcastToTestName("filter-test-1", WSMessageTypeNewRun, testData1)
-
-	// Broadcast to non-subscribed test
-	testData2 := map[string]interface{}{
-		"should_not_receive": true,
-	}
-	hub.BroadcastToTestName("filter-test-2", WSMessageTypeNewRun, testData2)
-
-	time.Sleep(50 * time.Millisecond)
-}
-
-// TestWSHubPerformance tests WebSocket hub performance
-func TestWSHubPerformance(t *testing.T) {
-	if testing.Short() {
-		t.Skip("Skipping performance test in short mode")
-	}
-
-	log := logrus.New()
-	log.SetLevel(logrus.ErrorLevel)
-
-	hub := NewWSHub(log)
-
-	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
-	defer cancel()
-
-	err := hub.Run(ctx)
-	require.NoError(t, err)
-	defer hub.Stop()
-
-	time.Sleep(50 * time.Millisecond)
-
-	// Test performance with many subscriptions
-	const numClients = 100
-	const messagesPerClient = 50
-
-	// Create many subscriptions
-	clientIDs := make([]string, numClients)
-	for i := 0; i < numClients; i++ {
-		clientID := generateClientID()
-		clientIDs[i] = clientID
-		hub.SubscribeToTestName(clientID, "performance-test")
-	}
-
-	// Measure broadcast performance
-	start := time.Now()
-
-	for i := 0; i < messagesPerClient; i++ {
-		testData := map[string]interface{}{
-			"message_id": i,
-			"timestamp":  time.Now().UnixNano(),
-		}
-		hub.BroadcastToTestName("performance-test", WSMessageTypeNewRun, testData)
-	}
-
-	duration := time.Since(start)
-	totalMessages := numClients * messagesPerClient
-
-	t.Logf("Broadcast performance: %d messages in %v (%.2f msg/sec)",
-		totalMessages, duration, float64(totalMessages)/duration.Seconds())
-
-	// Performance should be reasonable
-	assert.Less(t, duration, 2*time.Second, "Broadcasting should complete within 2 seconds")
-
-	time.Sleep(100 * time.Millisecond)
-}
-
-// Benchmark tests for WebSocket operations
-
-func BenchmarkWSHubBroadcastToAll(b *testing.B) {
-	log := logrus.New()
-	log.SetLevel(logrus.ErrorLevel)
-
-	hub := NewWSHub(log)
-
-	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
-	defer cancel()
-
-	err := hub.Run(ctx)
-	if err != nil {
-		b.Fatal(err)
-	}
-	defer hub.Stop()
-
-	time.Sleep(50 * time.Millisecond)
-
-	testData := map[string]interface{}{
-		"benchmark": true,
-		"timestamp": time.Now().UnixNano(),
-	}
-
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		hub.BroadcastToAll(WSMessageTypeNewRun, testData)
-	}
-}
-
-func BenchmarkWSHubSubscriptionOperations(b *testing.B) {
-	log := logrus.New()
-	log.SetLevel(logrus.ErrorLevel)
-
-	hub := NewWSHub(log)
-
-	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
-	defer cancel()
-
-	err := hub.Run(ctx)
-	if err != nil {
-		b.Fatal(err)
-	}
-	defer hub.Stop()
-
-	time.Sleep(50 * time.Millisecond)
-
-	clientID := generateClientID()
-
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		testName := fmt.Sprintf("bench-test-%d", i%100) // Cycle through 100 test names
-		hub.SubscribeToTestName(clientID, testName)
-		hub.IsSubscribedToTestName(clientID, testName)
-		hub.UnsubscribeFromTestName(clientID, testName)
-	}
-}
-
-func BenchmarkGenerateClientID(b *testing.B) {
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		_ = generateClientID()
-	}
-}
diff --git a/runner/api/websocket_test_enhanced.go.disabled b/runner/api/websocket_test_enhanced.go.disabled
deleted file mode 100644
index 510b3d6..0000000
--- a/runner/api/websocket_test_enhanced.go.disabled
+++ /dev/null
@@ -1,1010 +0,0 @@
-package api
-
-import (
-	"context"
-	"encoding/json"
-	"fmt"
-	"net/http"
-	"net/http/httptest"
-	"strings"
-	"sync"
-	"testing"
-	"time"
-
-	"github.com/gorilla/websocket"
-	"github.com/sirupsen/logrus"
-	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/require"
-
-	"github.com/jsonrpc-bench/runner/types"
-)
-
-// Mock WebSocket connection for testing
-type MockWebSocketConn struct {
-	messages [][]byte
-	closed   bool
-	mu       sync.RWMutex
-}
-
-func (m *MockWebSocketConn) WriteMessage(messageType int, data []byte) error {
-	m.mu.Lock()
-	defer m.mu.Unlock()
-	if m.closed {
-		return fmt.Errorf("connection closed")
-	}
-	m.messages = append(m.messages, data)
-	return nil
-}
-
-func (m *MockWebSocketConn) ReadMessage() (messageType int, p []byte, err error) {
-	m.mu.RLock()
-	defer m.mu.RUnlock()
-	if m.closed {
-		return 0, nil, fmt.Errorf("connection closed")
-	}
-	// Return ping message to simulate client activity
-	return websocket.TextMessage, []byte(`{"type":"ping"}`), nil
-}
-
-func (m *MockWebSocketConn) Close() error {
-	m.mu.Lock()
-	defer m.mu.Unlock()
-	m.closed = true
-	return nil
-}
-
-func (m *MockWebSocketConn) SetReadLimit(limit int64) {}
-
-func (m *MockWebSocketConn) SetReadDeadline(t time.Time) error {
-	return nil
-}
-
-func (m *MockWebSocketConn) SetWriteDeadline(t time.Time) error {
-	return nil
-}
-
-func (m *MockWebSocketConn) SetPongHandler(h func(appData string) error) {}
-
-func (m *MockWebSocketConn) ReadJSON(v interface{}) error {
-	m.mu.RLock()
-	defer m.mu.RUnlock()
-	if m.closed {
-		return fmt.Errorf("connection closed")
-	}
-
-	// Simulate ping message
-	msg := WSMessage{
-		Type:      WSMessageTypePing,
-		Timestamp: time.Now(),
-	}
-
-	data, err := json.Marshal(msg)
-	if err != nil {
-		return err
-	}
-
-	return json.Unmarshal(data, v)
-}
-
-func (m *MockWebSocketConn) WriteJSON(v interface{}) error {
-	data, err := json.Marshal(v)
-	if err != nil {
-		return err
-	}
-	return m.WriteMessage(websocket.TextMessage, data)
-}
-
-func (m *MockWebSocketConn) GetMessages() [][]byte {
-	m.mu.RLock()
-	defer m.mu.RUnlock()
-	return append([][]byte{}, m.messages...)
-}
-
-func (m *MockWebSocketConn) IsClosed() bool {
-	m.mu.RLock()
-	defer m.mu.RUnlock()
-	return m.closed
-}
-
-// Test WebSocket Hub comprehensive functionality
-
-func TestWSHubConfiguration(t *testing.T) {
-	log := logrus.New()
-	log.SetLevel(logrus.ErrorLevel)
-
-	hub := NewWSHub(log)
-
-	// Test default configuration
-	assert.Equal(t, 100, hub.config.MaxClients)
-	assert.Equal(t, 10*time.Second, hub.config.WriteTimeout)
-	assert.Equal(t, 60*time.Second, hub.config.ReadTimeout)
-	assert.Equal(t, 54*time.Second, hub.config.PingInterval)
-	assert.Equal(t, 60*time.Second, hub.config.PongTimeout)
-	assert.Equal(t, int64(512*1024), hub.config.MaxMessageSize)
-	assert.Equal(t, 256, hub.config.ClientBufferSize)
-	assert.Equal(t, 1000, hub.config.BroadcastBufferSize)
-	assert.True(t, hub.config.EnablePingPong)
-	assert.True(t, hub.config.DisconnectOnError)
-	assert.True(t, hub.config.LogConnectionEvents)
-
-	// Test hub internal structure
-	assert.NotNil(t, hub.clients)
-	assert.NotNil(t, hub.register)
-	assert.NotNil(t, hub.unregister)
-	assert.NotNil(t, hub.broadcast)
-	assert.NotNil(t, hub.subscriptions)
-	assert.NotNil(t, hub.log)
-	assert.NotNil(t, hub.ctx)
-	assert.NotNil(t, hub.cancel)
-	assert.NotNil(t, hub.done)
-}
-
-func TestWSHubLifecycle(t *testing.T) {
-	log := logrus.New()
-	log.SetLevel(logrus.ErrorLevel)
-
-	hub := NewWSHub(log)
-
-	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
-	defer cancel()
-
-	// Test Run
-	err := hub.Run(ctx)
-	require.NoError(t, err)
-
-	// Give hub time to start
-	time.Sleep(100 * time.Millisecond)
-
-	// Verify hub is running
-	assert.Equal(t, 0, hub.GetConnectedClientsCount())
-
-	// Test Stop
-	err = hub.Stop()
-	require.NoError(t, err)
-
-	// Verify hub is stopped
-	select {
-	case <-hub.done:
-		// Hub properly closed
-	case <-time.After(1 * time.Second):
-		t.Error("Hub did not stop within timeout")
-	}
-}
-
-func TestWSClientRegistration(t *testing.T) {
-	log := logrus.New()
-	log.SetLevel(logrus.ErrorLevel)
-
-	hub := NewWSHub(log)
-
-	ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
-	defer cancel()
-
-	err := hub.Run(ctx)
-	require.NoError(t, err)
-	defer hub.Stop()
-
-	// Give hub time to start
-	time.Sleep(50 * time.Millisecond)
-
-	// Test client registration
-	mockConn := &MockWebSocketConn{}
-	clientID := "test-client-1"
-	remoteAddr := "192.168.1.1:12345"
-	userAgent := "test-agent"
-
-	client := hub.RegisterClient(mockConn, clientID, remoteAddr, userAgent)
-
-	// Give time for registration
-	time.Sleep(50 * time.Millisecond)
-
-	assert.NotNil(t, client)
-	assert.Equal(t, clientID, client.ID)
-	assert.Equal(t, remoteAddr, client.RemoteAddr)
-	assert.Equal(t, userAgent, client.UserAgent)
-	assert.Equal(t, 1, hub.GetConnectedClientsCount())
-
-	// Verify welcome message was sent
-	messages := mockConn.GetMessages()
-	assert.GreaterOrEqual(t, len(messages), 1)
-
-	var welcomeMsg WSMessage
-	err = json.Unmarshal(messages[0], &welcomeMsg)
-	require.NoError(t, err)
-	assert.Equal(t, WSMessageTypeConnection, welcomeMsg.Type)
-}
-
-func TestWSClientUnregistration(t *testing.T) {
-	log := logrus.New()
-	log.SetLevel(logrus.ErrorLevel)
-
-	hub := NewWSHub(log)
-
-	ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
-	defer cancel()
-
-	err := hub.Run(ctx)
-	require.NoError(t, err)
-	defer hub.Stop()
-
-	time.Sleep(50 * time.Millisecond)
-
-	// Register client
-	mockConn := &MockWebSocketConn{}
-	client := hub.RegisterClient(mockConn, "test-client", "127.0.0.1:12345", "test-agent")
-	time.Sleep(50 * time.Millisecond)
-
-	assert.Equal(t, 1, hub.GetConnectedClientsCount())
-
-	// Unregister client
-	hub.unregister <- client
-	time.Sleep(50 * time.Millisecond)
-
-	assert.Equal(t, 0, hub.GetConnectedClientsCount())
-	assert.True(t, mockConn.IsClosed())
-}
-
-func TestWSMaxClientsLimit(t *testing.T) {
-	log := logrus.New()
-	log.SetLevel(logrus.ErrorLevel)
-
-	hub := NewWSHub(log)
-	hub.config.MaxClients = 2 // Set low limit for testing
-
-	ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
-	defer cancel()
-
-	err := hub.Run(ctx)
-	require.NoError(t, err)
-	defer hub.Stop()
-
-	time.Sleep(50 * time.Millisecond)
-
-	// Register clients up to limit
-	clients := make([]*WSClient, 0)
-	for i := 0; i < 2; i++ {
-		mockConn := &MockWebSocketConn{}
-		client := hub.RegisterClient(mockConn, fmt.Sprintf("client-%d", i), "127.0.0.1:12345", "test-agent")
-		if client != nil {
-			clients = append(clients, client)
-		}
-	}
-
-	time.Sleep(100 * time.Millisecond)
-	assert.Equal(t, 2, hub.GetConnectedClientsCount())
-
-	// Try to register one more client (should be rejected)
-	mockConn := &MockWebSocketConn{}
-	client := hub.RegisterClient(mockConn, "client-overflow", "127.0.0.1:12345", "test-agent")
-	time.Sleep(100 * time.Millisecond)
-
-	// Should still be 2 clients
-	assert.Equal(t, 2, hub.GetConnectedClientsCount())
-	assert.True(t, mockConn.IsClosed()) // Overflow connection should be closed
-}
-
-func TestWSBroadcastToAll(t *testing.T) {
-	log := logrus.New()
-	log.SetLevel(logrus.ErrorLevel)
-
-	hub := NewWSHub(log)
-
-	ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
-	defer cancel()
-
-	err := hub.Run(ctx)
-	require.NoError(t, err)
-	defer hub.Stop()
-
-	time.Sleep(50 * time.Millisecond)
-
-	// Register multiple clients
-	clients := make([]*MockWebSocketConn, 3)
-	for i := 0; i < 3; i++ {
-		mockConn := &MockWebSocketConn{}
-		clients[i] = mockConn
-		hub.RegisterClient(mockConn, fmt.Sprintf("client-%d", i), "127.0.0.1:12345", "test-agent")
-	}
-
-	time.Sleep(100 * time.Millisecond)
-
-	// Broadcast message
-	testData := map[string]interface{}{
-		"message": "Hello, all clients!",
-		"number":  42,
-	}
-
-	hub.BroadcastToAll(WSMessageTypeNewRun, testData)
-	time.Sleep(100 * time.Millisecond)
-
-	// Verify all clients received the message
-	for i, client := range clients {
-		messages := client.GetMessages()
-		assert.GreaterOrEqual(t, len(messages), 2, "Client %d should have received at least 2 messages (welcome + broadcast)", i)
-
-		// Find the broadcast message (skip welcome message)
-		var broadcastMsg WSMessage
-		found := false
-		for _, msgBytes := range messages[1:] { // Skip first message (welcome)
-			var msg WSMessage
-			if json.Unmarshal(msgBytes, &msg) == nil && msg.Type == WSMessageTypeNewRun {
-				broadcastMsg = msg
-				found = true
-				break
-			}
-		}
-
-		assert.True(t, found, "Client %d should have received broadcast message", i)
-		if found {
-			data, ok := broadcastMsg.Data.(map[string]interface{})
-			require.True(t, ok)
-			assert.Equal(t, "Hello, all clients!", data["message"])
-			assert.Equal(t, float64(42), data["number"])
-		}
-	}
-}
-
-func TestWSBroadcastToSubscribers(t *testing.T) {
-	log := logrus.New()
-	log.SetLevel(logrus.ErrorLevel)
-
-	hub := NewWSHub(log)
-
-	ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
-	defer cancel()
-
-	err := hub.Run(ctx)
-	require.NoError(t, err)
-	defer hub.Stop()
-
-	time.Sleep(50 * time.Millisecond)
-
-	// Register clients
-	client1 := &MockWebSocketConn{}
-	client2 := &MockWebSocketConn{}
-	wsClient1 := hub.RegisterClient(client1, "client-1", "127.0.0.1:12345", "test-agent")
-	wsClient2 := hub.RegisterClient(client2, "client-2", "127.0.0.1:12346", "test-agent")
-
-	time.Sleep(100 * time.Millisecond)
-
-	// Manually set subscriptions (in real usage, this would be done via WebSocket messages)
-	hub.mu.Lock()
-	hub.subscriptions[wsClient1]["test-topic"] = true
-	hub.subscriptions[wsClient2]["other-topic"] = true
-	hub.mu.Unlock()
-
-	// Broadcast to specific topic
-	testData := map[string]string{"topic": "test-topic", "message": "Topic-specific message"}
-	hub.BroadcastToSubscribers(WSMessageTypeNewRun, testData, []string{"test-topic"})
-
-	time.Sleep(100 * time.Millisecond)
-
-	// Verify only subscribed client received the message
-	client1Messages := client1.GetMessages()
-	client2Messages := client2.GetMessages()
-
-	// Client 1 should have received the message (welcome + broadcast)
-	assert.GreaterOrEqual(t, len(client1Messages), 2)
-
-	// Client 2 should only have welcome message
-	assert.Equal(t, 1, len(client2Messages))
-}
-
-func TestWSNotificationMethods(t *testing.T) {
-	log := logrus.New()
-	log.SetLevel(logrus.ErrorLevel)
-
-	hub := NewWSHub(log)
-
-	ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
-	defer cancel()
-
-	err := hub.Run(ctx)
-	require.NoError(t, err)
-	defer hub.Stop()
-
-	time.Sleep(50 * time.Millisecond)
-
-	// Register a client
-	mockConn := &MockWebSocketConn{}
-	hub.RegisterClient(mockConn, "test-client", "127.0.0.1:12345", "test-agent")
-	time.Sleep(100 * time.Millisecond)
-
-	// Test NotifyNewRun
-	run := &types.HistoricRun{
-		ID:               "test-run-1",
-		TestName:         "test-benchmark",
-		TotalRequests:    1000,
-		TotalErrors:      10,
-		OverallErrorRate: 0.01,
-		AvgLatencyMs:     50.5,
-		P95LatencyMs:     95.5,
-		BestClient:       "geth",
-		PerformanceScores: map[string]float64{
-			"geth": 95.5,
-			"besu": 92.3,
-		},
-	}
-
-	hub.NotifyNewRun(run)
-	time.Sleep(50 * time.Millisecond)
-
-	// Test NotifyRegression
-	regression := &types.Regression{
-		ID:             "regression-1",
-		RunID:          "test-run-1",
-		BaselineRunID:  "baseline-run-1",
-		Client:         "geth",
-		Metric:         "p95_latency",
-		Method:         "eth_getBalance",
-		Severity:       "high",
-		PercentChange:  25.5,
-		AbsoluteChange: 12.75,
-		BaselineValue:  50.0,
-		CurrentValue:   62.75,
-		IsSignificant:  true,
-		PValue:         0.01,
-		DetectedAt:     time.Now(),
-	}
-
-	hub.NotifyRegression(regression, run)
-	time.Sleep(50 * time.Millisecond)
-
-	// Test NotifyBaselineUpdated
-	hub.NotifyBaselineUpdated("test-baseline", "test-run-1", "test-benchmark")
-	time.Sleep(50 * time.Millisecond)
-
-	// Test NotifyAnalysisComplete
-	analysisResults := map[string]interface{}{
-		"performance_score":  95.5,
-		"regressions_found":  2,
-		"improvements_found": 1,
-	}
-	hub.NotifyAnalysisComplete("test-run-1", "test-benchmark", analysisResults)
-	time.Sleep(50 * time.Millisecond)
-
-	// Verify messages were sent
-	messages := mockConn.GetMessages()
-	assert.GreaterOrEqual(t, len(messages), 5) // welcome + 4 notifications
-
-	// Verify message types
-	messageTypes := make(map[WSMessageType]bool)
-	for _, msgBytes := range messages[1:] { // Skip welcome message
-		var msg WSMessage
-		if json.Unmarshal(msgBytes, &msg) == nil {
-			messageTypes[msg.Type] = true
-		}
-	}
-
-	assert.True(t, messageTypes[WSMessageTypeNewRun])
-	assert.True(t, messageTypes[WSMessageTypeRegressionDetected])
-	assert.True(t, messageTypes[WSMessageTypeBaselineUpdated])
-	assert.True(t, messageTypes[WSMessageTypeAnalysisComplete])
-}
-
-func TestWSClientInfo(t *testing.T) {
-	log := logrus.New()
-	log.SetLevel(logrus.ErrorLevel)
-
-	hub := NewWSHub(log)
-
-	ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
-	defer cancel()
-
-	err := hub.Run(ctx)
-	require.NoError(t, err)
-	defer hub.Stop()
-
-	time.Sleep(50 * time.Millisecond)
-
-	// Register clients with different information
-	clients := []struct {
-		id         string
-		remoteAddr string
-		userAgent  string
-	}{
-		{"client-1", "192.168.1.1:12345", "Mozilla/5.0 (Test Browser)"},
-		{"client-2", "10.0.0.1:54321", "curl/7.68.0"},
-		{"client-3", "127.0.0.1:8080", "websocket-client/1.0"},
-	}
-
-	for _, clientInfo := range clients {
-		mockConn := &MockWebSocketConn{}
-		hub.RegisterClient(mockConn, clientInfo.id, clientInfo.remoteAddr, clientInfo.userAgent)
-	}
-
-	time.Sleep(100 * time.Millisecond)
-
-	// Test GetConnectedClientsCount
-	assert.Equal(t, 3, hub.GetConnectedClientsCount())
-
-	// Test GetClientInfo
-	clientInfos := hub.GetClientInfo()
-	assert.Len(t, clientInfos, 3)
-
-	// Verify client information
-	infoMap := make(map[string]map[string]interface{})
-	for _, info := range clientInfos {
-		infoMap[info["id"].(string)] = info
-	}
-
-	for _, expectedClient := range clients {
-		info, exists := infoMap[expectedClient.id]
-		assert.True(t, exists, "Client %s should be in client info", expectedClient.id)
-		if exists {
-			assert.Equal(t, expectedClient.remoteAddr, info["remote_addr"])
-			assert.Equal(t, expectedClient.userAgent, info["user_agent"])
-			assert.Contains(t, info, "connected_at")
-			assert.Contains(t, info, "last_ping")
-		}
-	}
-}
-
-func TestWSPingPongMechanism(t *testing.T) {
-	log := logrus.New()
-	log.SetLevel(logrus.ErrorLevel)
-
-	hub := NewWSHub(log)
-	hub.config.PingInterval = 100 * time.Millisecond // Short interval for testing
-	hub.config.PongTimeout = 200 * time.Millisecond
-
-	ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
-	defer cancel()
-
-	err := hub.Run(ctx)
-	require.NoError(t, err)
-	defer hub.Stop()
-
-	time.Sleep(50 * time.Millisecond)
-
-	// Register client
-	mockConn := &MockWebSocketConn{}
-	client := hub.RegisterClient(mockConn, "test-client", "127.0.0.1:12345", "test-agent")
-	time.Sleep(50 * time.Millisecond)
-
-	// Update last ping time to simulate active connection
-	client.mu.Lock()
-	client.LastPing = time.Now()
-	client.mu.Unlock()
-
-	// Wait for ping interval
-	time.Sleep(150 * time.Millisecond)
-
-	// Client should still be connected (responding to pings)
-	assert.Equal(t, 1, hub.GetConnectedClientsCount())
-
-	// Simulate dead connection by setting old ping time
-	client.mu.Lock()
-	client.LastPing = time.Now().Add(-300 * time.Millisecond)
-	client.mu.Unlock()
-
-	// Wait for cleanup
-	time.Sleep(150 * time.Millisecond)
-
-	// Client should be disconnected due to ping timeout
-	assert.Equal(t, 0, hub.GetConnectedClientsCount())
-}
-
-func TestWSMessageHandling(t *testing.T) {
-	log := logrus.New()
-	log.SetLevel(logrus.ErrorLevel)
-
-	hub := NewWSHub(log)
-
-	ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
-	defer cancel()
-
-	err := hub.Run(ctx)
-	require.NoError(t, err)
-	defer hub.Stop()
-
-	time.Sleep(50 * time.Millisecond)
-
-	// Test ping/pong message handling
-	mockConn := &MockWebSocketConn{}
-	client := hub.RegisterClient(mockConn, "test-client", "127.0.0.1:12345", "test-agent")
-	time.Sleep(100 * time.Millisecond)
-
-	// Send ping message
-	pingMsg := WSMessage{
-		Type:      WSMessageTypePing,
-		Timestamp: time.Now(),
-		ClientID:  client.ID,
-	}
-
-	client.sendMessage(pingMsg)
-	time.Sleep(50 * time.Millisecond)
-
-	// Verify pong response was sent
-	messages := mockConn.GetMessages()
-	assert.GreaterOrEqual(t, len(messages), 2)
-
-	// Look for pong message
-	var pongFound bool
-	for _, msgBytes := range messages {
-		var msg WSMessage
-		if json.Unmarshal(msgBytes, &msg) == nil && msg.Type == WSMessageTypePong {
-			pongFound = true
-			break
-		}
-	}
-	assert.True(t, pongFound, "Pong message should be sent in response to ping")
-}
-
-func TestWSErrorHandling(t *testing.T) {
-	log := logrus.New()
-	log.SetLevel(logrus.ErrorLevel)
-
-	hub := NewWSHub(log)
-
-	ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
-	defer cancel()
-
-	err := hub.Run(ctx)
-	require.NoError(t, err)
-	defer hub.Stop()
-
-	time.Sleep(50 * time.Millisecond)
-
-	// Test broadcast channel overflow
-	for i := 0; i < hub.config.BroadcastBufferSize+10; i++ {
-		hub.BroadcastToAll(WSMessageTypeNewRun, map[string]int{"test": i})
-	}
-
-	// Should not panic or block
-	time.Sleep(50 * time.Millisecond)
-
-	// Test client send channel overflow
-	mockConn := &MockWebSocketConn{}
-	client := hub.RegisterClient(mockConn, "test-client", "127.0.0.1:12345", "test-agent")
-	time.Sleep(50 * time.Millisecond)
-
-	// Fill client send channel
-	for i := 0; i < hub.config.ClientBufferSize+10; i++ {
-		client.sendMessage(WSMessage{
-			Type: WSMessageTypeNewRun,
-			Data: map[string]int{"test": i},
-		})
-	}
-
-	time.Sleep(100 * time.Millisecond)
-
-	// Client should be disconnected due to full send channel
-	assert.Equal(t, 0, hub.GetConnectedClientsCount())
-}
-
-func TestWSHubStopWithConnectedClients(t *testing.T) {
-	log := logrus.New()
-	log.SetLevel(logrus.ErrorLevel)
-
-	hub := NewWSHub(log)
-
-	ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
-	defer cancel()
-
-	err := hub.Run(ctx)
-	require.NoError(t, err)
-
-	time.Sleep(50 * time.Millisecond)
-
-	// Register multiple clients
-	clients := make([]*MockWebSocketConn, 3)
-	for i := 0; i < 3; i++ {
-		mockConn := &MockWebSocketConn{}
-		clients[i] = mockConn
-		hub.RegisterClient(mockConn, fmt.Sprintf("client-%d", i), "127.0.0.1:12345", "test-agent")
-	}
-
-	time.Sleep(100 * time.Millisecond)
-	assert.Equal(t, 3, hub.GetConnectedClientsCount())
-
-	// Stop hub
-	err = hub.Stop()
-	require.NoError(t, err)
-
-	// All clients should be closed
-	for i, client := range clients {
-		assert.True(t, client.IsClosed(), "Client %d should be closed", i)
-	}
-
-	// Hub should be fully stopped
-	select {
-	case <-hub.done:
-		// Expected
-	case <-time.After(1 * time.Second):
-		t.Error("Hub did not stop within timeout")
-	}
-}
-
-func TestWSClientID(t *testing.T) {
-	// Test multiple ID generations are unique
-	ids := make(map[string]bool)
-	for i := 0; i < 100; i++ {
-		id := generateClientID()
-		assert.NotEmpty(t, id)
-		assert.False(t, ids[id], "Client ID should be unique")
-		ids[id] = true
-		assert.Equal(t, 16, len(id)) // 8 bytes = 16 hex chars
-	}
-}
-
-func TestWSMessageTypes(t *testing.T) {
-	// Verify all message type constants are defined
-	expectedTypes := []WSMessageType{
-		WSMessageTypeConnection,
-		WSMessageTypePing,
-		WSMessageTypePong,
-		WSMessageTypeError,
-		WSMessageTypeDisconnection,
-		WSMessageTypeNewRun,
-		WSMessageTypeRegressionDetected,
-		WSMessageTypeBaselineUpdated,
-		WSMessageTypeAnalysisComplete,
-		WSMessageTypeRunStarted,
-		WSMessageTypeRunProgress,
-		WSMessageTypeRunComplete,
-		WSMessageTypeRunFailed,
-	}
-
-	for _, msgType := range expectedTypes {
-		assert.NotEmpty(t, string(msgType))
-	}
-
-	// Test message structure
-	msg := WSMessage{
-		Type:      WSMessageTypeNewRun,
-		Data:      map[string]string{"test": "data"},
-		Timestamp: time.Now(),
-		ID:        "msg-123",
-		ClientID:  "client-456",
-	}
-
-	// Should be serializable
-	data, err := json.Marshal(msg)
-	require.NoError(t, err)
-	assert.Contains(t, string(data), "new_run")
-	assert.Contains(t, string(data), "test")
-
-	// Should be deserializable
-	var unmarshaled WSMessage
-	err = json.Unmarshal(data, &unmarshaled)
-	require.NoError(t, err)
-	assert.Equal(t, msg.Type, unmarshaled.Type)
-	assert.Equal(t, msg.ID, unmarshaled.ID)
-	assert.Equal(t, msg.ClientID, unmarshaled.ClientID)
-}
-
-func TestWSWebSocketHandlerIntegration(t *testing.T) {
-	log := logrus.New()
-	log.SetLevel(logrus.ErrorLevel)
-
-	hub := NewWSHub(log)
-
-	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
-	defer cancel()
-
-	err := hub.Run(ctx)
-	require.NoError(t, err)
-	defer hub.Stop()
-
-	time.Sleep(50 * time.Millisecond)
-
-	// Create upgrader
-	upgrader := &websocket.Upgrader{
-		CheckOrigin: func(r *http.Request) bool {
-			return true
-		},
-	}
-
-	// Create handler
-	handler := hub.HandleWebSocketConnection(upgrader)
-
-	// Test with mock HTTP request
-	server := httptest.NewServer(http.HandlerFunc(handler))
-	defer server.Close()
-
-	// Convert HTTP URL to WebSocket URL
-	wsURL := "ws" + strings.TrimPrefix(server.URL, "http")
-
-	// Connect with real WebSocket client
-	conn, _, err := websocket.DefaultDialer.Dial(wsURL, nil)
-	if err != nil {
-		// If connection fails, it might be due to test environment
-		// Just verify the handler doesn't panic
-		t.Logf("WebSocket connection failed (expected in some test environments): %v", err)
-		return
-	}
-	defer conn.Close()
-
-	// Give time for connection to be registered
-	time.Sleep(100 * time.Millisecond)
-
-	// Should have one connected client
-	assert.Equal(t, 1, hub.GetConnectedClientsCount())
-
-	// Send a ping message
-	pingMsg := WSMessage{
-		Type:      WSMessageTypePing,
-		Timestamp: time.Now(),
-	}
-
-	err = conn.WriteJSON(pingMsg)
-	require.NoError(t, err)
-
-	// Read pong response
-	var pongMsg WSMessage
-	err = conn.ReadJSON(&pongMsg)
-	require.NoError(t, err)
-	assert.Equal(t, WSMessageTypePong, pongMsg.Type)
-}
-
-// Benchmark tests
-
-func BenchmarkWSHubBroadcast(b *testing.B) {
-	log := logrus.New()
-	log.SetLevel(logrus.ErrorLevel)
-
-	hub := NewWSHub(log)
-
-	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
-	defer cancel()
-
-	err := hub.Run(ctx)
-	require.NoError(b, err)
-	defer hub.Stop()
-
-	time.Sleep(50 * time.Millisecond)
-
-	// Register some clients
-	for i := 0; i < 10; i++ {
-		mockConn := &MockWebSocketConn{}
-		hub.RegisterClient(mockConn, fmt.Sprintf("client-%d", i), "127.0.0.1:12345", "test-agent")
-	}
-
-	time.Sleep(100 * time.Millisecond)
-
-	testData := map[string]interface{}{
-		"test": "data",
-		"num":  42,
-	}
-
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		hub.BroadcastToAll(WSMessageTypeNewRun, testData)
-	}
-}
-
-func BenchmarkWSClientRegistration(b *testing.B) {
-	log := logrus.New()
-	log.SetLevel(logrus.ErrorLevel)
-
-	hub := NewWSHub(log)
-	hub.config.MaxClients = 10000 // Allow many clients
-
-	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
-	defer cancel()
-
-	err := hub.Run(ctx)
-	require.NoError(b, err)
-	defer hub.Stop()
-
-	time.Sleep(50 * time.Millisecond)
-
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		mockConn := &MockWebSocketConn{}
-		hub.RegisterClient(mockConn, fmt.Sprintf("client-%d", i), "127.0.0.1:12345", "test-agent")
-	}
-}
-
-func BenchmarkWSNotifyNewRun(b *testing.B) {
-	log := logrus.New()
-	log.SetLevel(logrus.ErrorLevel)
-
-	hub := NewWSHub(log)
-
-	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
-	defer cancel()
-
-	err := hub.Run(ctx)
-	require.NoError(b, err)
-	defer hub.Stop()
-
-	time.Sleep(50 * time.Millisecond)
-
-	run := &types.HistoricRun{
-		ID:               "test-run-1",
-		TestName:         "test-benchmark",
-		TotalRequests:    1000,
-		TotalErrors:      10,
-		OverallErrorRate: 0.01,
-		AvgLatencyMs:     50.5,
-		P95LatencyMs:     95.5,
-		BestClient:       "geth",
-		PerformanceScores: map[string]float64{
-			"geth": 95.5,
-			"besu": 92.3,
-		},
-	}
-
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		hub.NotifyNewRun(run)
-	}
-}
-
-// Stress tests
-
-func TestWSHubStress(t *testing.T) {
-	if testing.Short() {
-		t.Skip("Skipping stress test in short mode")
-	}
-
-	log := logrus.New()
-	log.SetLevel(logrus.ErrorLevel)
-
-	hub := NewWSHub(log)
-	hub.config.MaxClients = 1000
-
-	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
-	defer cancel()
-
-	err := hub.Run(ctx)
-	require.NoError(t, err)
-	defer hub.Stop()
-
-	time.Sleep(100 * time.Millisecond)
-
-	// Register many clients
-	numClients := 100
-	clients := make([]*MockWebSocketConn, numClients)
-
-	for i := 0; i < numClients; i++ {
-		mockConn := &MockWebSocketConn{}
-		clients[i] = mockConn
-		hub.RegisterClient(mockConn, fmt.Sprintf("stress-client-%d", i), "127.0.0.1:12345", "stress-test")
-		if i%10 == 0 {
-			time.Sleep(10 * time.Millisecond) // Throttle registration
-		}
-	}
-
-	time.Sleep(1 * time.Second)
-	assert.Equal(t, numClients, hub.GetConnectedClientsCount())
-
-	// Broadcast many messages
-	for i := 0; i < 50; i++ {
-		hub.BroadcastToAll(WSMessageTypeNewRun, map[string]interface{}{
-			"stress_test": i,
-			"timestamp":   time.Now(),
-		})
-		time.Sleep(10 * time.Millisecond)
-	}
-
-	time.Sleep(1 * time.Second)
-
-	// Verify clients received messages
-	for i, client := range clients {
-		messages := client.GetMessages()
-		assert.GreaterOrEqual(t, len(messages), 10, "Client %d should have received multiple messages", i)
-	}
-
-	// Disconnect half the clients
-	for i := 0; i < numClients/2; i++ {
-		clients[i].Close()
-	}
-
-	time.Sleep(500 * time.Millisecond)
-
-	// Continue broadcasting
-	for i := 0; i < 10; i++ {
-		hub.BroadcastToAll(WSMessageTypeNewRun, map[string]interface{}{
-			"post_disconnect": i,
-		})
-		time.Sleep(50 * time.Millisecond)
-	}
-
-	time.Sleep(500 * time.Millisecond)
-
-	// Should handle gracefully without errors
-	assert.LessOrEqual(t, hub.GetConnectedClientsCount(), numClients/2)
-}
diff --git a/runner/comparator/config_loader.go b/runner/comparator/config_loader.go
deleted file mode 100644
index 9663941..0000000
--- a/runner/comparator/config_loader.go
+++ /dev/null
@@ -1,87 +0,0 @@
-package comparator
-
-import (
-	"fmt"
-	"os"
-
-	"github.com/jsonrpc-bench/runner/config"
-	"github.com/jsonrpc-bench/runner/types"
-	"gopkg.in/yaml.v3"
-)
-
-// ComparisonConfigYAML represents the YAML configuration for comparison
-type ComparisonConfigYAML struct {
-	Name           string                `yaml:"name"`
-	Description    string                `yaml:"description"`
-	Clients        []*types.ClientConfig `yaml:"clients"`
-	ValidateSchema bool                  `yaml:"validate_schema"`
-	Concurrency    int                   `yaml:"concurrency"`
-	TimeoutSeconds int                   `yaml:"timeout_seconds"`
-	OutputDir      string                `yaml:"output_dir"`
-	Methods        []MethodConfigYAML    `yaml:"methods"`
-}
-
-// MethodConfigYAML represents a method configuration in YAML
-type MethodConfigYAML struct {
-	Name   string        `yaml:"name"`
-	Params []interface{} `yaml:"params"`
-}
-
-// LoadConfigFromYAML loads a comparison configuration from a YAML file
-func LoadConfigFromYAML(filePath string) (*ComparisonConfig, error) {
-	// Read YAML file
-	data, err := os.ReadFile(filePath)
-	if err != nil {
-		return nil, fmt.Errorf("failed to read config file: %w", err)
-	}
-
-	// Substitute environment variables
-	content := string(data)
-	substituted, err := config.SubstituteEnvVars(content)
-	if err != nil {
-		return nil, fmt.Errorf("failed to substitute environment variables: %w", err)
-	}
-	data = []byte(substituted)
-
-	// Parse YAML
-	var yamlConfig ComparisonConfigYAML
-	if err := yaml.Unmarshal(data, &yamlConfig); err != nil {
-		return nil, fmt.Errorf("failed to parse YAML: %w", err)
-	}
-
-	// Convert to ComparisonConfig
-	config := &ComparisonConfig{
-		Name:                  yamlConfig.Name,
-		Description:           yamlConfig.Description,
-		ValidateAgainstSchema: yamlConfig.ValidateSchema,
-		Concurrency:           yamlConfig.Concurrency,
-		TimeoutSeconds:        yamlConfig.TimeoutSeconds,
-		OutputDir:             yamlConfig.OutputDir,
-	}
-
-	// Set default values if not specified
-	if config.Concurrency == 0 {
-		config.Concurrency = 5
-	}
-	if config.TimeoutSeconds == 0 {
-		config.TimeoutSeconds = 30
-	}
-	if config.OutputDir == "" {
-		config.OutputDir = "comparison-results"
-	}
-
-	// Convert clients
-	config.Clients = yamlConfig.Clients
-
-	// Extract methods
-	config.Methods = make([]string, 0, len(yamlConfig.Methods))
-	config.CustomParameters = make(map[string][]interface{})
-	for _, method := range yamlConfig.Methods {
-		config.Methods = append(config.Methods, method.Name)
-		if len(method.Params) > 0 {
-			config.CustomParameters[method.Name] = method.Params
-		}
-	}
-
-	return config, nil
-}
diff --git a/runner/comparator/diff.go b/runner/comparator/diff.go
index cee4c0b..34e5f61 100644
--- a/runner/comparator/diff.go
+++ b/runner/comparator/diff.go
@@ -201,7 +201,7 @@ func compareObjects(path string, obj1, obj2 map[string]interface{}) ([]DiffEntry
 
 	// Compare each key
 	for _, key := range keys {
-		keyPath := path
+		var keyPath string
 		if path != "" {
 			keyPath = path + "." + key
 		} else {
diff --git a/runner/config/loader_test.go b/runner/config/loader_test.go
index 1635e1b..800d0cf 100644
--- a/runner/config/loader_test.go
+++ b/runner/config/loader_test.go
@@ -43,13 +43,14 @@ clients:
   - erigon
 duration: "30s"
 rps: 100
-endpoints:
-  - method: "eth_blockNumber"
+vus: 1
+calls:
+  - name: "blockNumber"
+    method: "eth_blockNumber"
     params: []
-    frequency: "50%"
-  - method: "eth_chainId"
+  - name: "chainId"
+    method: "eth_chainId"
     params: []
-    frequency: "50%"
 `
 		tmpFile, err := os.CreateTemp("", "test-config-*.yaml")
 		require.NoError(t, err)
@@ -107,10 +108,11 @@ clients:
   - erigon
 duration: "30s"
 rps: 100
-endpoints:
-  - method: "eth_blockNumber"
+vus: 1
+calls:
+  - name: "blockNumber"
+    method: "eth_blockNumber"
     params: []
-    frequency: "100%"
 `
 		tmpFile, err := os.CreateTemp("", "test-new-style-*.yaml")
 		require.NoError(t, err)
@@ -134,16 +136,17 @@ endpoints:
 test_name: "old-style-test"
 description: "Old style configuration"
 clients:
-  - name: "local-geth"
+  - name: "local_geth"
     url: "http://localhost:9545"
-  - name: "local-erigon"
+  - name: "local_erigon"
     url: "http://localhost:9546"
 duration: "30s"
 rps: 100
-endpoints:
-  - method: "eth_blockNumber"
+vus: 1
+calls:
+  - name: "blockNumber"
+    method: "eth_blockNumber"
     params: []
-    frequency: "100%"
 `
 		tmpFile, err := os.CreateTemp("", "test-old-style-*.yaml")
 		require.NoError(t, err)
@@ -157,9 +160,9 @@ endpoints:
 		require.NoError(t, err)
 
 		assert.Equal(t, "old-style-test", config.TestName)
-		assert.Equal(t, []string{"local-geth", "local-erigon"}, config.ClientRefs)
+		assert.Equal(t, []string{"local_geth", "local_erigon"}, config.ClientRefs)
 		assert.Len(t, config.ResolvedClients, 2)
-		assert.Equal(t, "local-geth", config.ResolvedClients[0].Name)
+		assert.Equal(t, "local_geth", config.ResolvedClients[0].Name)
 		assert.Equal(t, "http://localhost:9545", config.ResolvedClients[0].URL)
 	})
 }
diff --git a/runner/config/storage_test.go b/runner/config/storage_test.go
index 6941803..43147c1 100644
--- a/runner/config/storage_test.go
+++ b/runner/config/storage_test.go
@@ -1,6 +1,7 @@
 package config
 
 import (
+	"fmt"
 	"os"
 	"path/filepath"
 	"testing"
@@ -755,22 +756,22 @@ func (suite *StorageConfigTestSuite) TestConcurrentConfigOperations() {
 	// Create multiple config files
 	configs := make([]string, 5)
 	for i := 0; i < 5; i++ {
-		configFile := filepath.Join(suite.tempDir, "concurrent_config_"+string(rune(i+'0'))+".yaml")
-		configContent := `
-historic_path: "` + filepath.Join(suite.tempDir, "concurrent_historic_"+string(rune(i+'0'))) + `"
-retention_days: ` + string(rune(30+i)) + `
+		configFile := filepath.Join(suite.tempDir, fmt.Sprintf("concurrent_config_%d.yaml", i))
+		configContent := fmt.Sprintf(`
+historic_path: "%s"
+retention_days: %d
 enable_historic: true
 
 postgresql:
   host: "localhost"
   port: 5432
-  database: "concurrent_db_` + string(rune(i+'0')) + `"
-  user: "user_` + string(rune(i+'0')) + `"
-  max_open_conns: ` + string(rune(10+i)) + `
-  max_idle_conns: ` + string(rune(5+i)) + `
-  metrics_table: "metrics_` + string(rune(i+'0')) + `"
-  runs_table: "runs_` + string(rune(i+'0')) + `"
-`
+  database: "concurrent_db_%d"
+  user: "user_%d"
+  max_open_conns: %d
+  max_idle_conns: %d
+  metrics_table: "metrics_%d"
+  runs_table: "runs_%d"
+`, filepath.Join(suite.tempDir, fmt.Sprintf("concurrent_historic_%d", i)), 30+i, i, i, 10+i, 5+i, i, i)
 
 		err := os.WriteFile(configFile, []byte(configContent), 0644)
 		require.NoError(t, err)
diff --git a/runner/e2e_integration_test.go b/runner/e2e_integration_test.go
deleted file mode 100644
index f6be912..0000000
--- a/runner/e2e_integration_test.go
+++ /dev/null
@@ -1,959 +0,0 @@
-package main
-
-import (
-	"bytes"
-	"context"
-	"database/sql"
-	"encoding/json"
-	"fmt"
-	"net/http"
-	"os"
-	"path/filepath"
-	"testing"
-	"time"
-
-	"github.com/gorilla/websocket"
-	"github.com/sirupsen/logrus"
-	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/require"
-	"github.com/stretchr/testify/suite"
-	"github.com/testcontainers/testcontainers-go"
-	"github.com/testcontainers/testcontainers-go/modules/postgres"
-	"github.com/testcontainers/testcontainers-go/wait"
-
-	"github.com/jsonrpc-bench/runner/analysis"
-	"github.com/jsonrpc-bench/runner/api"
-	"github.com/jsonrpc-bench/runner/config"
-	"github.com/jsonrpc-bench/runner/storage"
-	"github.com/jsonrpc-bench/runner/types"
-)
-
-// E2EIntegrationTestSuite provides end-to-end integration tests for the entire system
-type E2EIntegrationTestSuite struct {
-	suite.Suite
-
-	// Infrastructure
-	container *postgres.PostgresContainer
-	db        *sql.DB
-	tempDir   string
-	ctx       context.Context
-	logger    logrus.FieldLogger
-
-	// System components
-	historicStorage    storage.HistoricStorage
-	baselineManager    analysis.BaselineManager
-	trendAnalyzer      analysis.TrendAnalyzer
-	regressionDetector analysis.RegressionDetector
-	apiServer          api.Server
-
-	// Configuration
-	storageConfig *config.StorageConfig
-
-	// Test server URL
-	serverURL string
-}
-
-// SetupSuite initializes the complete test environment
-func (suite *E2EIntegrationTestSuite) SetupSuite() {
-	suite.ctx = context.Background()
-	suite.logger = logrus.New().WithField("test", "e2e_integration")
-
-	// Create temporary directory for file storage
-	tempDir, err := os.MkdirTemp("", "e2e_integration_test_*")
-	require.NoError(suite.T(), err)
-	suite.tempDir = tempDir
-
-	// Start PostgreSQL container with TimescaleDB extension
-	pgContainer, err := postgres.RunContainer(suite.ctx,
-		testcontainers.WithImage("timescale/timescaledb:latest-pg15"),
-		postgres.WithDatabase("e2e_test_db"),
-		postgres.WithUsername("e2e_test_user"),
-		postgres.WithPassword("e2e_test_pass"),
-		testcontainers.WithWaitStrategy(
-			wait.ForLog("database system is ready to accept connections").
-				WithOccurrence(2).
-				WithStartupTimeout(60*time.Second)),
-	)
-	require.NoError(suite.T(), err)
-	suite.container = pgContainer
-
-	// Setup database connection
-	mappedPort, err := pgContainer.MappedPort(suite.ctx, "5432")
-	require.NoError(suite.T(), err)
-
-	connStr := fmt.Sprintf("host=localhost port=%d user=e2e_test_user password=e2e_test_pass dbname=e2e_test_db sslmode=disable",
-		mappedPort.Int())
-	db, err := sql.Open("postgres", connStr)
-	require.NoError(suite.T(), err)
-	suite.db = db
-
-	// Create storage configuration
-	suite.storageConfig = &config.StorageConfig{
-		HistoricPath:   filepath.Join(suite.tempDir, "historic"),
-		RetentionDays:  30,
-		EnableHistoric: true,
-		PostgreSQL: config.PostgreSQLConfig{
-			Host:            "localhost",
-			Port:            int(mappedPort.Int()),
-			Database:        "e2e_test_db",
-			User:            "e2e_test_user",
-			Password:        "e2e_test_pass",
-			SSLMode:         "disable",
-			MaxOpenConns:    10,
-			MaxIdleConns:    5,
-			MetricsTable:    "benchmark_metrics",
-			RunsTable:       "benchmark_runs",
-			RetentionPolicy: "7d",
-		},
-	}
-
-	// Validate configuration
-	err = suite.storageConfig.Validate()
-	require.NoError(suite.T(), err)
-
-	// Initialize database schema
-	migration := storage.NewMigrationService(db, suite.logger)
-	err = migration.Up()
-	require.NoError(suite.T(), err)
-
-	// Create indices for better performance
-	err = migration.CreateIndices()
-	require.NoError(suite.T(), err)
-
-	// Initialize system components
-	suite.historicStorage = storage.NewHistoricStorage(db, suite.storageConfig.HistoricPath, suite.logger)
-	suite.baselineManager = analysis.NewBaselineManager(suite.historicStorage, db, suite.logger)
-	suite.trendAnalyzer = analysis.NewTrendAnalyzer(suite.historicStorage, db, suite.logger)
-	suite.regressionDetector = analysis.NewRegressionDetector(suite.historicStorage, suite.baselineManager, db, suite.logger)
-
-	// Start all components
-	err = suite.historicStorage.Start(suite.ctx)
-	require.NoError(suite.T(), err)
-
-	err = suite.baselineManager.Start(suite.ctx)
-	require.NoError(suite.T(), err)
-
-	err = suite.trendAnalyzer.Start(suite.ctx)
-	require.NoError(suite.T(), err)
-
-	err = suite.regressionDetector.Start(suite.ctx)
-	require.NoError(suite.T(), err)
-
-	// Create and start API server
-	suite.apiServer = api.NewServer(
-		suite.historicStorage,
-		suite.baselineManager,
-		suite.trendAnalyzer,
-		suite.regressionDetector,
-		db,
-		suite.logger,
-	)
-
-	err = suite.apiServer.Start(suite.ctx)
-	require.NoError(suite.T(), err)
-
-	// Give server time to start
-	time.Sleep(500 * time.Millisecond)
-
-	suite.serverURL = "http://localhost:8080"
-}
-
-// TearDownSuite cleans up all test resources
-func (suite *E2EIntegrationTestSuite) TearDownSuite() {
-	if suite.apiServer != nil {
-		suite.apiServer.Stop()
-	}
-	if suite.regressionDetector != nil {
-		suite.regressionDetector.Stop()
-	}
-	if suite.trendAnalyzer != nil {
-		suite.trendAnalyzer.Stop()
-	}
-	if suite.baselineManager != nil {
-		suite.baselineManager.Stop()
-	}
-	if suite.historicStorage != nil {
-		suite.historicStorage.Stop()
-	}
-	if suite.db != nil {
-		suite.db.Close()
-	}
-	if suite.container != nil {
-		suite.container.Terminate(suite.ctx)
-	}
-	if suite.tempDir != "" {
-		os.RemoveAll(suite.tempDir)
-	}
-}
-
-// SetupTest prepares clean state for each test
-func (suite *E2EIntegrationTestSuite) SetupTest() {
-	// Clean up any existing test data
-	_, err := suite.db.Exec("DELETE FROM benchmark_runs WHERE test_name LIKE 'e2e_%'")
-	if err != nil {
-		suite.logger.WithError(err).Warn("Failed to clean up test data")
-	}
-}
-
-// TestE2ECompleteWorkflow tests the complete benchmark workflow
-func (suite *E2EIntegrationTestSuite) TestE2ECompleteWorkflow() {
-	t := suite.T()
-
-	testName := "e2e_complete_workflow"
-
-	// 1. Create and save benchmark results through storage layer
-	benchmarkResult := &types.BenchmarkResult{
-		TestName:      testName,
-		Description:   "End-to-end integration test",
-		GitCommit:     "abc123def456",
-		GitBranch:     "main",
-		StartTime:     time.Now().Add(-15 * time.Minute),
-		EndTime:       time.Now().Add(-5 * time.Minute),
-		Duration:      10 * time.Minute,
-		TargetRPS:     100,
-		ActualRPS:     95.5,
-		TotalRequests: 57300,
-		TotalErrors:   286,
-		Config: map[string]interface{}{
-			"endpoints": []string{"eth_getBalance", "eth_getBlockByNumber"},
-			"clients":   []string{"geth", "nethermind", "erigon"},
-			"duration":  "10m",
-		},
-		Environment: map[string]interface{}{
-			"region":     "us-east-1",
-			"node_count": 3,
-			"cpu_cores":  8,
-			"memory_gb":  16,
-		},
-		ClientMetrics: map[string]*types.ClientMetrics{
-			"geth": {
-				Name:          "geth",
-				TotalRequests: 19100,
-				TotalErrors:   95,
-				ErrorRate:     0.00497,
-				Latency: types.LatencyMetrics{
-					Avg:        145.2,
-					P50:        125.0,
-					P95:        285.5,
-					P99:        475.8,
-					Max:        892.3,
-					Throughput: 31.83,
-				},
-				Methods: map[string]types.MetricSummary{
-					"eth_getBalance": {
-						Count:      9550,
-						ErrorRate:  0.00471,
-						Avg:        142.1,
-						P95:        280.2,
-						Throughput: 15.92,
-					},
-					"eth_getBlockByNumber": {
-						Count:      9550,
-						ErrorRate:  0.00524,
-						Avg:        148.3,
-						P95:        290.8,
-						Throughput: 15.92,
-					},
-				},
-			},
-			"nethermind": {
-				Name:          "nethermind",
-				TotalRequests: 19100,
-				TotalErrors:   97,
-				ErrorRate:     0.00508,
-				Latency: types.LatencyMetrics{
-					Avg:        152.8,
-					P50:        135.0,
-					P95:        295.2,
-					P99:        485.1,
-					Max:        912.7,
-					Throughput: 31.83,
-				},
-				Methods: map[string]types.MetricSummary{
-					"eth_getBalance": {
-						Count:      9550,
-						ErrorRate:  0.00503,
-						Avg:        149.5,
-						P95:        290.8,
-						Throughput: 15.92,
-					},
-					"eth_getBlockByNumber": {
-						Count:      9550,
-						ErrorRate:  0.00513,
-						Avg:        156.1,
-						P95:        299.6,
-						Throughput: 15.92,
-					},
-				},
-			},
-			"erigon": {
-				Name:          "erigon",
-				TotalRequests: 19100,
-				TotalErrors:   94,
-				ErrorRate:     0.00492,
-				Latency: types.LatencyMetrics{
-					Avg:        138.9,
-					P50:        118.0,
-					P95:        275.3,
-					P99:        465.2,
-					Max:        845.6,
-					Throughput: 31.83,
-				},
-				Methods: map[string]types.MetricSummary{
-					"eth_getBalance": {
-						Count:      9550,
-						ErrorRate:  0.00482,
-						Avg:        135.7,
-						P95:        270.1,
-						Throughput: 15.92,
-					},
-					"eth_getBlockByNumber": {
-						Count:      9550,
-						ErrorRate:  0.00503,
-						Avg:        142.1,
-						P95:        280.5,
-						Throughput: 15.92,
-					},
-				},
-			},
-		},
-	}
-
-	// 2. Save the run and verify it's stored correctly
-	savedRun, err := suite.historicStorage.SaveHistoricRun(suite.ctx, benchmarkResult)
-	require.NoError(t, err)
-	assert.NotEmpty(t, savedRun.ID)
-	assert.Equal(t, testName, savedRun.TestName)
-
-	// 3. Verify the run can be retrieved via API
-	resp, err := http.Get(suite.serverURL + "/api/v1/runs/" + savedRun.ID)
-	require.NoError(t, err)
-	defer resp.Body.Close()
-
-	assert.Equal(t, http.StatusOK, resp.StatusCode)
-
-	var apiRun types.HistoricRun
-	err = json.NewDecoder(resp.Body).Decode(&apiRun)
-	require.NoError(t, err)
-	assert.Equal(t, savedRun.ID, apiRun.ID)
-	assert.Equal(t, savedRun.TestName, apiRun.TestName)
-
-	// 4. Create a baseline via API
-	baselineData := map[string]interface{}{
-		"run_id":      savedRun.ID,
-		"name":        "e2e_workflow_baseline",
-		"description": "Baseline for E2E workflow test",
-	}
-
-	jsonData, err := json.Marshal(baselineData)
-	require.NoError(t, err)
-
-	resp, err = http.Post(
-		suite.serverURL+"/api/v1/baselines",
-		"application/json",
-		bytes.NewBuffer(jsonData),
-	)
-	require.NoError(t, err)
-	defer resp.Body.Close()
-
-	assert.Equal(t, http.StatusCreated, resp.StatusCode)
-
-	var baseline analysis.Baseline
-	err = json.NewDecoder(resp.Body).Decode(&baseline)
-	require.NoError(t, err)
-	assert.Equal(t, "e2e_workflow_baseline", baseline.Name)
-
-	// 5. Create a second run with different performance characteristics
-	degradedResult := *benchmarkResult // Copy
-	degradedResult.StartTime = time.Now().Add(-5 * time.Minute)
-	degradedResult.EndTime = time.Now()
-	degradedResult.TotalErrors = 572 // Double the errors
-	degradedResult.ActualRPS = 85.2  // Lower RPS
-
-	// Degrade performance for all clients
-	for clientName, metrics := range degradedResult.ClientMetrics {
-		degradedMetrics := *metrics // Copy
-		degradedMetrics.TotalErrors = metrics.TotalErrors * 2
-		degradedMetrics.ErrorRate = metrics.ErrorRate * 2
-		degradedMetrics.Latency.Avg = metrics.Latency.Avg * 1.5
-		degradedMetrics.Latency.P95 = metrics.Latency.P95 * 1.6
-		degradedMetrics.Latency.P99 = metrics.Latency.P99 * 1.7
-		degradedMetrics.Latency.Throughput = metrics.Latency.Throughput * 0.85
-		degradedResult.ClientMetrics[clientName] = &degradedMetrics
-	}
-
-	// 6. Save the degraded run
-	degradedRun, err := suite.historicStorage.SaveHistoricRun(suite.ctx, &degradedResult)
-	require.NoError(t, err)
-
-	// 7. Detect regressions via API
-	detectionOptions := map[string]interface{}{
-		"comparison_mode": "baseline",
-		"baseline_name":   baseline.Name,
-	}
-
-	jsonData, err = json.Marshal(detectionOptions)
-	require.NoError(t, err)
-
-	resp, err = http.Post(
-		suite.serverURL+"/api/v1/regressions/detect/"+degradedRun.ID,
-		"application/json",
-		bytes.NewBuffer(jsonData),
-	)
-	require.NoError(t, err)
-	defer resp.Body.Close()
-
-	assert.Equal(t, http.StatusOK, resp.StatusCode)
-
-	var regressionReport types.RegressionReport
-	err = json.NewDecoder(resp.Body).Decode(&regressionReport)
-	require.NoError(t, err)
-	assert.Equal(t, degradedRun.ID, regressionReport.RunID)
-	assert.NotEmpty(t, regressionReport.Regressions)
-
-	// 8. Verify regressions were detected for performance degradation
-	foundLatencyRegression := false
-	foundErrorRateRegression := false
-	for _, regression := range regressionReport.Regressions {
-		if regression.Metric == "avg_latency" {
-			foundLatencyRegression = true
-			assert.Greater(t, regression.PercentChange, 20.0) // Should be significant increase
-		}
-		if regression.Metric == "error_rate" {
-			foundErrorRateRegression = true
-			assert.Greater(t, regression.PercentChange, 50.0) // Should be significant increase
-		}
-	}
-	assert.True(t, foundLatencyRegression, "Should detect latency regression")
-	assert.True(t, foundErrorRateRegression, "Should detect error rate regression")
-
-	// 9. Test trend analysis with multiple runs
-	resp, err = http.Get(fmt.Sprintf("%s/api/v1/trends?test_name=%s&days=1", suite.serverURL, testName))
-	require.NoError(t, err)
-	defer resp.Body.Close()
-
-	assert.Equal(t, http.StatusOK, resp.StatusCode)
-
-	var trends types.TrendAnalysis
-	err = json.NewDecoder(resp.Body).Decode(&trends)
-	require.NoError(t, err)
-	assert.Equal(t, testName, trends.TestName)
-
-	// 10. Analyze the degraded run
-	resp, err = http.Get(suite.serverURL + "/api/v1/runs/" + degradedRun.ID + "/analyze")
-	require.NoError(t, err)
-	defer resp.Body.Close()
-
-	assert.Equal(t, http.StatusOK, resp.StatusCode)
-
-	var runAnalysis types.RunAnalysis
-	err = json.NewDecoder(resp.Body).Decode(&runAnalysis)
-	require.NoError(t, err)
-	assert.Equal(t, degradedRun.ID, runAnalysis.RunID)
-	assert.Less(t, runAnalysis.OverallHealthScore, 90.0) // Should be lower due to degradation
-
-	// 11. Test file storage - verify files were created
-	historicFiles, err := os.ReadDir(suite.storageConfig.HistoricPath)
-	require.NoError(t, err)
-	assert.NotEmpty(t, historicFiles)
-
-	// Find files for our test runs
-	foundOriginalFile := false
-	foundDegradedFile := false
-	for _, file := range historicFiles {
-		if !file.IsDir() && file.Name() != ".gitkeep" {
-			content, err := os.ReadFile(filepath.Join(suite.storageConfig.HistoricPath, file.Name()))
-			require.NoError(t, err)
-
-			var storedRun types.HistoricRun
-			err = json.Unmarshal(content, &storedRun)
-			require.NoError(t, err)
-
-			if storedRun.ID == savedRun.ID {
-				foundOriginalFile = true
-			}
-			if storedRun.ID == degradedRun.ID {
-				foundDegradedFile = true
-			}
-		}
-	}
-	assert.True(t, foundOriginalFile, "Original run should be saved to file")
-	assert.True(t, foundDegradedFile, "Degraded run should be saved to file")
-}
-
-// TestE2EWebSocketIntegration tests real-time WebSocket functionality
-func (suite *E2EIntegrationTestSuite) TestE2EWebSocketIntegration() {
-	t := suite.T()
-
-	// Connect to WebSocket
-	wsURL := "ws://localhost:8080/ws"
-	conn, _, err := websocket.DefaultDialer.Dial(wsURL, nil)
-	require.NoError(t, err)
-	defer conn.Close()
-
-	// Set up message collection
-	messages := make(chan map[string]interface{}, 10)
-	done := make(chan bool)
-
-	go func() {
-		defer func() { done <- true }()
-		for {
-			var msg map[string]interface{}
-			err := conn.ReadJSON(&msg)
-			if err != nil {
-				return
-			}
-			messages <- msg
-		}
-	}()
-
-	// Create a benchmark run that should trigger WebSocket notifications
-	benchmarkResult := &types.BenchmarkResult{
-		TestName:  "e2e_websocket_test",
-		StartTime: time.Now().Add(-10 * time.Minute),
-		EndTime:   time.Now(),
-		Duration:  10 * time.Minute,
-		ClientMetrics: map[string]*types.ClientMetrics{
-			"geth": {
-				Name:          "geth",
-				TotalRequests: 1000,
-				TotalErrors:   10,
-				ErrorRate:     0.01,
-				Latency: types.LatencyMetrics{
-					Avg:        150.0,
-					P95:        300.0,
-					P99:        500.0,
-					Throughput: 100.0,
-				},
-			},
-		},
-	}
-
-	// Save the run - this should trigger WebSocket notifications
-	_, err = suite.historicStorage.SaveHistoricRun(suite.ctx, benchmarkResult)
-	require.NoError(t, err)
-
-	// Wait for WebSocket messages
-	receivedMessages := []map[string]interface{}{}
-	timeout := time.After(3 * time.Second)
-
-	for {
-		select {
-		case msg := <-messages:
-			receivedMessages = append(receivedMessages, msg)
-			// Stop after receiving some messages or if we get a specific type
-			if len(receivedMessages) >= 1 {
-				goto checkMessages
-			}
-		case <-timeout:
-			goto checkMessages
-		case <-done:
-			goto checkMessages
-		}
-	}
-
-checkMessages:
-	// We might not receive messages if WebSocket broadcasting isn't implemented
-	// for the historic storage save operation, which is acceptable
-	if len(receivedMessages) > 0 {
-		// Verify message structure if we received any
-		for _, msg := range receivedMessages {
-			assert.Contains(t, msg, "type")
-			assert.Contains(t, msg, "data")
-		}
-	}
-}
-
-// TestE2EErrorScenarios tests error handling in end-to-end scenarios
-func (suite *E2EIntegrationTestSuite) TestE2EErrorScenarios() {
-	t := suite.T()
-
-	// Test 1: Invalid API requests
-	resp, err := http.Get(suite.serverURL + "/api/v1/runs/non-existent-run")
-	require.NoError(t, err)
-	defer resp.Body.Close()
-	assert.Equal(t, http.StatusNotFound, resp.StatusCode)
-
-	// Test 2: Malformed JSON in POST requests
-	invalidJSON := bytes.NewBuffer([]byte("{invalid json"))
-	resp, err = http.Post(
-		suite.serverURL+"/api/v1/baselines",
-		"application/json",
-		invalidJSON,
-	)
-	require.NoError(t, err)
-	defer resp.Body.Close()
-	assert.Equal(t, http.StatusBadRequest, resp.StatusCode)
-
-	// Test 3: Database constraint violations
-	// Try to create baseline with non-existent run ID
-	baselineData := map[string]interface{}{
-		"run_id":      "non-existent-run-id",
-		"name":        "invalid_baseline",
-		"description": "This should fail",
-	}
-
-	jsonData, err := json.Marshal(baselineData)
-	require.NoError(t, err)
-
-	resp, err = http.Post(
-		suite.serverURL+"/api/v1/baselines",
-		"application/json",
-		bytes.NewBuffer(jsonData),
-	)
-	require.NoError(t, err)
-	defer resp.Body.Close()
-	assert.Equal(t, http.StatusBadRequest, resp.StatusCode)
-
-	// Test 4: Regression detection with invalid parameters
-	invalidDetectionOptions := map[string]interface{}{
-		"comparison_mode": "invalid_mode",
-	}
-
-	jsonData, err = json.Marshal(invalidDetectionOptions)
-	require.NoError(t, err)
-
-	resp, err = http.Post(
-		suite.serverURL+"/api/v1/regressions/detect/non-existent-run",
-		"application/json",
-		bytes.NewBuffer(jsonData),
-	)
-	require.NoError(t, err)
-	defer resp.Body.Close()
-	assert.Equal(t, http.StatusBadRequest, resp.StatusCode)
-}
-
-// TestE2EPerformanceWithLoad tests system performance under load
-func (suite *E2EIntegrationTestSuite) TestE2EPerformanceWithLoad() {
-	t := suite.T()
-
-	if testing.Short() {
-		t.Skip("Skipping performance test in short mode")
-	}
-
-	testName := "e2e_performance_load_test"
-
-	// Create multiple benchmark runs concurrently
-	const numRuns = 10
-	const concurrency = 5
-
-	results := make(chan error, numRuns)
-	semaphore := make(chan struct{}, concurrency)
-
-	start := time.Now()
-
-	for i := 0; i < numRuns; i++ {
-		go func(runIndex int) {
-			semaphore <- struct{}{}        // Acquire
-			defer func() { <-semaphore }() // Release
-
-			benchmarkResult := &types.BenchmarkResult{
-				TestName:  testName,
-				StartTime: time.Now().Add(time.Duration(-runIndex*10) * time.Minute),
-				EndTime:   time.Now().Add(time.Duration(-runIndex*10+10) * time.Minute),
-				Duration:  10 * time.Minute,
-				ClientMetrics: map[string]*types.ClientMetrics{
-					"geth": {
-						Name:          "geth",
-						TotalRequests: 1000 + int64(runIndex*100),
-						TotalErrors:   10 + int64(runIndex),
-						ErrorRate:     float64(10+runIndex) / float64(1000+runIndex*100),
-						Latency: types.LatencyMetrics{
-							Avg:        150.0 + float64(runIndex)*5.0,
-							P95:        300.0 + float64(runIndex)*10.0,
-							P99:        500.0 + float64(runIndex)*15.0,
-							Throughput: 100.0 - float64(runIndex)*1.0,
-						},
-					},
-				},
-			}
-
-			_, err := suite.historicStorage.SaveHistoricRun(suite.ctx, benchmarkResult)
-			results <- err
-		}(i)
-	}
-
-	// Collect results
-	for i := 0; i < numRuns; i++ {
-		err := <-results
-		require.NoError(t, err, "Run %d should succeed", i)
-	}
-
-	duration := time.Since(start)
-	t.Logf("Created %d runs in %v (%.2f runs/sec)", numRuns, duration, float64(numRuns)/duration.Seconds())
-
-	// Performance should be reasonable
-	assert.Less(t, duration, 30*time.Second, "Should create runs within 30 seconds")
-
-	// Verify all runs were created
-	resp, err := http.Get(suite.serverURL + "/api/v1/runs?test_name=" + testName)
-	require.NoError(t, err)
-	defer resp.Body.Close()
-
-	var runs []*types.HistoricRun
-	err = json.NewDecoder(resp.Body).Decode(&runs)
-	require.NoError(t, err)
-	assert.Len(t, runs, numRuns)
-
-	// Test API performance with multiple requests
-	start = time.Now()
-	for _, run := range runs {
-		resp, err := http.Get(suite.serverURL + "/api/v1/runs/" + run.ID)
-		require.NoError(t, err)
-		resp.Body.Close()
-		assert.Equal(t, http.StatusOK, resp.StatusCode)
-	}
-	apiDuration := time.Since(start)
-
-	t.Logf("Retrieved %d runs via API in %v (%.2f req/sec)",
-		numRuns, apiDuration, float64(numRuns)/apiDuration.Seconds())
-
-	// API should be responsive
-	assert.Less(t, apiDuration, 10*time.Second, "API should respond quickly")
-}
-
-// TestE2EDataConsistency tests data consistency across the entire system
-func (suite *E2EIntegrationTestSuite) TestE2EDataConsistency() {
-	t := suite.T()
-
-	testName := "e2e_data_consistency_test"
-
-	// Create a benchmark run
-	benchmarkResult := &types.BenchmarkResult{
-		TestName:  testName,
-		StartTime: time.Now().Add(-10 * time.Minute),
-		EndTime:   time.Now(),
-		Duration:  10 * time.Minute,
-		ClientMetrics: map[string]*types.ClientMetrics{
-			"geth": {
-				Name:          "geth",
-				TotalRequests: 1000,
-				TotalErrors:   25,
-				ErrorRate:     0.025,
-				Latency: types.LatencyMetrics{
-					Avg:        175.5,
-					P50:        150.0,
-					P95:        325.8,
-					P99:        525.2,
-					Max:        892.1,
-					Throughput: 95.5,
-				},
-			},
-		},
-	}
-
-	// Save through storage layer
-	savedRun, err := suite.historicStorage.SaveHistoricRun(suite.ctx, benchmarkResult)
-	require.NoError(t, err)
-
-	// 1. Verify consistency between storage and API
-	resp, err := http.Get(suite.serverURL + "/api/v1/runs/" + savedRun.ID)
-	require.NoError(t, err)
-	defer resp.Body.Close()
-
-	var apiRun types.HistoricRun
-	err = json.NewDecoder(resp.Body).Decode(&apiRun)
-	require.NoError(t, err)
-
-	// Compare critical fields
-	assert.Equal(t, savedRun.ID, apiRun.ID)
-	assert.Equal(t, savedRun.TestName, apiRun.TestName)
-	assert.Equal(t, savedRun.AvgLatencyMs, apiRun.AvgLatencyMs)
-	assert.Equal(t, savedRun.OverallErrorRate, apiRun.OverallErrorRate)
-	assert.Equal(t, savedRun.TotalRequests, apiRun.TotalRequests)
-	assert.Equal(t, savedRun.TotalErrors, apiRun.TotalErrors)
-
-	// 2. Verify consistency in database
-	var dbRun types.HistoricRun
-	err = suite.db.QueryRow(`
-		SELECT id, test_name, avg_latency_ms, overall_error_rate, total_requests, total_errors
-		FROM historic_runs WHERE id = $1
-	`, savedRun.ID).Scan(
-		&dbRun.ID,
-		&dbRun.TestName,
-		&dbRun.AvgLatencyMs,
-		&dbRun.OverallErrorRate,
-		&dbRun.TotalRequests,
-		&dbRun.TotalErrors,
-	)
-	require.NoError(t, err)
-
-	assert.Equal(t, savedRun.ID, dbRun.ID)
-	assert.Equal(t, savedRun.TestName, dbRun.TestName)
-	assert.Equal(t, savedRun.AvgLatencyMs, dbRun.AvgLatencyMs)
-	assert.Equal(t, savedRun.OverallErrorRate, dbRun.OverallErrorRate)
-	assert.Equal(t, savedRun.TotalRequests, dbRun.TotalRequests)
-	assert.Equal(t, savedRun.TotalErrors, dbRun.TotalErrors)
-
-	// 3. Verify consistency in file storage
-	historicFiles, err := os.ReadDir(suite.storageConfig.HistoricPath)
-	require.NoError(t, err)
-
-	var fileRun types.HistoricRun
-	foundFile := false
-
-	for _, file := range historicFiles {
-		if !file.IsDir() && file.Name() != ".gitkeep" {
-			content, err := os.ReadFile(filepath.Join(suite.storageConfig.HistoricPath, file.Name()))
-			require.NoError(t, err)
-
-			var tempRun types.HistoricRun
-			err = json.Unmarshal(content, &tempRun)
-			require.NoError(t, err)
-
-			if tempRun.ID == savedRun.ID {
-				fileRun = tempRun
-				foundFile = true
-				break
-			}
-		}
-	}
-
-	assert.True(t, foundFile, "Run should be saved to file")
-	if foundFile {
-		assert.Equal(t, savedRun.ID, fileRun.ID)
-		assert.Equal(t, savedRun.TestName, fileRun.TestName)
-		assert.Equal(t, savedRun.AvgLatencyMs, fileRun.AvgLatencyMs)
-		assert.Equal(t, savedRun.OverallErrorRate, fileRun.OverallErrorRate)
-	}
-
-	// 4. Create baseline and verify consistency
-	baselineData := map[string]interface{}{
-		"run_id":      savedRun.ID,
-		"name":        "e2e_consistency_baseline",
-		"description": "Baseline for consistency test",
-	}
-
-	jsonData, err := json.Marshal(baselineData)
-	require.NoError(t, err)
-
-	resp, err = http.Post(
-		suite.serverURL+"/api/v1/baselines",
-		"application/json",
-		bytes.NewBuffer(jsonData),
-	)
-	require.NoError(t, err)
-	defer resp.Body.Close()
-
-	var apiBaseline analysis.Baseline
-	err = json.NewDecoder(resp.Body).Decode(&apiBaseline)
-	require.NoError(t, err)
-
-	// Verify baseline in database
-	var dbBaseline analysis.Baseline
-	err = suite.db.QueryRow(`
-		SELECT id, name, run_id, test_name FROM baselines WHERE name = $1
-	`, "e2e_consistency_baseline").Scan(
-		&dbBaseline.ID,
-		&dbBaseline.Name,
-		&dbBaseline.RunID,
-		&dbBaseline.TestName,
-	)
-	require.NoError(t, err)
-
-	assert.Equal(t, apiBaseline.ID, dbBaseline.ID)
-	assert.Equal(t, apiBaseline.Name, dbBaseline.Name)
-	assert.Equal(t, apiBaseline.RunID, dbBaseline.RunID)
-	assert.Equal(t, apiBaseline.TestName, dbBaseline.TestName)
-}
-
-// TestE2ESystemHealth tests overall system health and monitoring
-func (suite *E2EIntegrationTestSuite) TestE2ESystemHealth() {
-	t := suite.T()
-
-	// Test health endpoint
-	resp, err := http.Get(suite.serverURL + "/health")
-	require.NoError(t, err)
-	defer resp.Body.Close()
-
-	assert.Equal(t, http.StatusOK, resp.StatusCode)
-
-	var health map[string]interface{}
-	err = json.NewDecoder(resp.Body).Decode(&health)
-	require.NoError(t, err)
-	assert.Equal(t, "ok", health["status"])
-	assert.NotNil(t, health["timestamp"])
-
-	// Test database connectivity
-	err = suite.db.Ping()
-	assert.NoError(t, err)
-
-	// Test file system access
-	testFile := filepath.Join(suite.storageConfig.HistoricPath, "health_test.tmp")
-	err = os.WriteFile(testFile, []byte("health test"), 0644)
-	assert.NoError(t, err)
-
-	_, err = os.Stat(testFile)
-	assert.NoError(t, err)
-
-	err = os.Remove(testFile)
-	assert.NoError(t, err)
-
-	// Test component health
-	components := []struct {
-		name      string
-		component interface{ Stop() error }
-	}{
-		{"historic_storage", suite.historicStorage},
-		{"baseline_manager", suite.baselineManager},
-		{"trend_analyzer", suite.trendAnalyzer},
-		{"regression_detector", suite.regressionDetector},
-	}
-
-	for _, comp := range components {
-		// Components should be running (stop method exists but we won't call it)
-		assert.NotNil(t, comp.component, "Component %s should be initialized", comp.name)
-	}
-}
-
-// Run the test suite
-func TestE2EIntegrationTestSuite(t *testing.T) {
-	if os.Getenv("SKIP_E2E_TESTS") != "" {
-		t.Skip("Skipping E2E integration tests")
-	}
-
-	suite.Run(t, new(E2EIntegrationTestSuite))
-}
-
-// Benchmark tests for end-to-end performance
-
-func BenchmarkE2ERunCreation(b *testing.B) {
-	if testing.Short() {
-		b.Skip("Skipping benchmark in short mode")
-	}
-
-	// Setup minimal test environment (this would be slow in real scenarios)
-	// In practice, you'd use a shared test environment
-	b.Log("Setting up E2E benchmark environment...")
-
-	// This is a simplified benchmark - real implementation would need
-	// proper setup/teardown for database and storage
-	b.ResetTimer()
-
-	for i := 0; i < b.N; i++ {
-		// Simulate creating a benchmark run
-		benchmarkResult := &types.BenchmarkResult{
-			TestName:  fmt.Sprintf("bench_test_%d", i),
-			StartTime: time.Now().Add(-10 * time.Minute),
-			EndTime:   time.Now(),
-			Duration:  10 * time.Minute,
-			ClientMetrics: map[string]*types.ClientMetrics{
-				"geth": {
-					Name:          "geth",
-					TotalRequests: 1000,
-					TotalErrors:   10,
-					ErrorRate:     0.01,
-					Latency: types.LatencyMetrics{
-						Avg:        150.0,
-						P95:        300.0,
-						P99:        500.0,
-						Throughput: 100.0,
-					},
-				},
-			},
-		}
-
-		// In a real benchmark, this would save to the actual storage
-		_ = benchmarkResult // Placeholder
-	}
-}
diff --git a/runner/integration_test.go b/runner/integration_test.go
deleted file mode 100644
index e01e778..0000000
--- a/runner/integration_test.go
+++ /dev/null
@@ -1,1193 +0,0 @@
-package main
-
-import (
-	"bytes"
-	"context"
-	"database/sql"
-	"encoding/json"
-	"fmt"
-	"net/http"
-	"net/http/httptest"
-	"os"
-	"path/filepath"
-	"runtime"
-	"strconv"
-	"strings"
-	"sync"
-	"testing"
-	"time"
-
-	"github.com/gorilla/websocket"
-	_ "github.com/lib/pq"
-	"github.com/sirupsen/logrus"
-	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/require"
-	"github.com/stretchr/testify/suite"
-
-	"github.com/jsonrpc-bench/runner/analysis"
-	"github.com/jsonrpc-bench/runner/api"
-	"github.com/jsonrpc-bench/runner/config"
-	"github.com/jsonrpc-bench/runner/storage"
-	"github.com/jsonrpc-bench/runner/types"
-)
-
-// IntegrationTestSuite provides a comprehensive test suite for the historic tracking system
-type IntegrationTestSuite struct {
-	suite.Suite
-
-	// Test infrastructure
-	ctx     context.Context
-	cancel  context.CancelFunc
-	logger  *logrus.Logger
-	testDir string
-
-	// Database and storage
-	db               *sql.DB
-	historicStorage  storage.HistoricStorage
-	migrationService *storage.MigrationService
-	storageConfig    *config.StorageConfig
-
-	// Analysis components
-	baselineManager    analysis.BaselineManager
-	trendAnalyzer      analysis.TrendAnalyzer
-	regressionDetector analysis.RegressionDetector
-
-	// API server
-	apiServer  api.Server
-	serverURL  string
-	httpClient *http.Client
-
-	// WebSocket testing
-	wsClients  []*websocket.Conn
-	wsMessages []map[string]interface{}
-	wsMutex    sync.RWMutex
-
-	// Performance monitoring
-	memoryUsage      []int64
-	cpuUsage         []float64
-	performanceMutex sync.RWMutex
-
-	// Test data
-	testRuns        []*types.HistoricRun
-	testBaselines   []*api.Baseline
-	testComparisons []*types.HistoricComparison
-
-	// Cleanup functions
-	cleanupFunctions []func() error
-}
-
-// SetupSuite initializes the test environment before running any tests
-func (suite *IntegrationTestSuite) SetupSuite() {
-	suite.ctx, suite.cancel = context.WithCancel(context.Background())
-
-	// Initialize logger
-	suite.logger = logrus.New()
-	suite.logger.SetLevel(logrus.InfoLevel)
-	suite.logger.SetFormatter(&logrus.JSONFormatter{})
-
-	// Create test directory
-	var err error
-	suite.testDir, err = os.MkdirTemp("", "jsonrpc-bench-integration-*")
-	require.NoError(suite.T(), err)
-
-	// Setup database
-	suite.setupDatabase()
-
-	// Setup storage
-	suite.setupStorage()
-
-	// Setup analysis components
-	suite.setupAnalysisComponents()
-
-	// Setup API server
-	suite.setupAPIServer()
-
-	// Setup WebSocket monitoring
-	suite.setupWebSocketMonitoring()
-
-	// Setup performance monitoring
-	suite.setupPerformanceMonitoring()
-
-	// Initialize HTTP client
-	suite.httpClient = &http.Client{
-		Timeout: 30 * time.Second,
-	}
-
-	suite.logger.WithFields(logrus.Fields{
-		"test_dir":   suite.testDir,
-		"server_url": suite.serverURL,
-	}).Info("Integration test suite initialized")
-}
-
-// TearDownSuite cleans up after all tests have completed
-func (suite *IntegrationTestSuite) TearDownSuite() {
-	suite.logger.Info("Cleaning up integration test suite")
-
-	// Cancel context to stop all goroutines
-	suite.cancel()
-
-	// Close WebSocket connections
-	for _, ws := range suite.wsClients {
-		if ws != nil {
-			ws.Close()
-		}
-	}
-
-	// Stop API server
-	if suite.apiServer != nil {
-		suite.apiServer.Stop()
-	}
-
-	// Run cleanup functions in reverse order
-	for i := len(suite.cleanupFunctions) - 1; i >= 0; i-- {
-		if err := suite.cleanupFunctions[i](); err != nil {
-			suite.logger.WithError(err).Warn("Cleanup function failed")
-		}
-	}
-
-	// Close database connection
-	if suite.db != nil {
-		suite.db.Close()
-	}
-
-	// Remove test directory
-	if suite.testDir != "" {
-		os.RemoveAll(suite.testDir)
-	}
-
-	suite.logger.Info("Integration test suite cleanup completed")
-}
-
-// setupDatabase creates and initializes the test database
-func (suite *IntegrationTestSuite) setupDatabase() {
-	// For now, use a simple local PostgreSQL instance
-	// In a complete implementation, this would use testcontainers
-
-	// Create storage config
-	suite.storageConfig = &config.StorageConfig{
-		HistoricPath:   filepath.Join(suite.testDir, "historic"),
-		RetentionDays:  30,
-		EnableHistoric: true,
-		PostgreSQL: &config.PostgreSQLConfig{
-			Host:                  "localhost",
-			Port:                  5432,
-			Database:              "jsonrpc_bench_test",
-			Username:              "postgres",
-			Password:              "postgres",
-			SSLMode:               "disable",
-			MaxConnections:        10,
-			MaxIdleConnections:    2,
-			ConnectionMaxLifetime: 30 * time.Minute,
-			ConnectionTimeout:     10 * time.Second,
-			Schema:                "public",
-		},
-	}
-
-	// Open database connection
-	var err error
-	suite.db, err = sql.Open("postgres", suite.storageConfig.PostgreSQL.GetConnectionString())
-	if err != nil {
-		suite.T().Skip("PostgreSQL not available for integration tests:", err)
-		return
-	}
-
-	// Test connection
-	ctx, cancel := context.WithTimeout(suite.ctx, 10*time.Second)
-	defer cancel()
-	if err := suite.db.PingContext(ctx); err != nil {
-		suite.T().Skip("Cannot connect to test PostgreSQL database:", err)
-		return
-	}
-
-	// Initialize migration service
-	suite.migrationService = storage.NewMigrationService(suite.db, suite.logger)
-
-	// Reset database to clean state
-	err = suite.migrationService.Reset()
-	require.NoError(suite.T(), err)
-
-	// Create performance indices
-	err = suite.migrationService.CreateIndices()
-	require.NoError(suite.T(), err)
-
-	suite.logger.Info("Test database initialized successfully")
-}
-
-// setupStorage initializes the historic storage system
-func (suite *IntegrationTestSuite) setupStorage() {
-	suite.historicStorage = storage.NewHistoricStorage(
-		suite.db,
-		suite.storageConfig,
-		suite.logger,
-	)
-
-	err := suite.historicStorage.Start(suite.ctx)
-	require.NoError(suite.T(), err)
-
-	suite.cleanupFunctions = append(suite.cleanupFunctions, func() error {
-		return suite.historicStorage.Stop()
-	})
-
-	suite.logger.Info("Historic storage initialized successfully")
-}
-
-// setupAnalysisComponents initializes analysis components
-func (suite *IntegrationTestSuite) setupAnalysisComponents() {
-	// Initialize baseline manager
-	suite.baselineManager = analysis.NewBaselineManager(
-		suite.db,
-		suite.historicStorage,
-		suite.logger,
-	)
-
-	err := suite.baselineManager.Start(suite.ctx)
-	require.NoError(suite.T(), err)
-
-	// Initialize trend analyzer
-	suite.trendAnalyzer = analysis.NewTrendAnalyzer(
-		suite.db,
-		suite.historicStorage,
-		suite.logger,
-	)
-
-	err = suite.trendAnalyzer.Start(suite.ctx)
-	require.NoError(suite.T(), err)
-
-	// Initialize regression detector
-	suite.regressionDetector = analysis.NewRegressionDetector(
-		suite.db,
-		suite.historicStorage,
-		suite.baselineManager,
-		suite.trendAnalyzer,
-		suite.logger,
-	)
-
-	err = suite.regressionDetector.Start(suite.ctx)
-	require.NoError(suite.T(), err)
-
-	suite.cleanupFunctions = append(suite.cleanupFunctions, func() error {
-		suite.regressionDetector.Stop()
-		suite.trendAnalyzer.Stop()
-		suite.baselineManager.Stop()
-		return nil
-	})
-
-	suite.logger.Info("Analysis components initialized successfully")
-}
-
-// setupAPIServer initializes the HTTP API server
-func (suite *IntegrationTestSuite) setupAPIServer() {
-	suite.apiServer = api.NewServer(
-		suite.historicStorage,
-		suite.baselineManager,
-		suite.trendAnalyzer,
-		suite.regressionDetector,
-		suite.db,
-		suite.logger,
-	)
-
-	err := suite.apiServer.Start(suite.ctx)
-	require.NoError(suite.T(), err)
-
-	// Set server URL (assuming it starts on :8080)
-	suite.serverURL = "http://localhost:8080"
-
-	// Wait for server to be ready
-	suite.waitForServerReady()
-
-	suite.cleanupFunctions = append(suite.cleanupFunctions, func() error {
-		return suite.apiServer.Stop()
-	})
-
-	suite.logger.Info("API server initialized successfully")
-}
-
-// setupWebSocketMonitoring initializes WebSocket connections for testing
-func (suite *IntegrationTestSuite) setupWebSocketMonitoring() {
-	suite.wsMessages = make([]map[string]interface{}, 0)
-
-	// Connect to WebSocket endpoint
-	wsURL := strings.Replace(suite.serverURL, "http://", "ws://", 1) + "/api/ws"
-
-	conn, _, err := websocket.DefaultDialer.Dial(wsURL, nil)
-	if err != nil {
-		suite.logger.WithError(err).Warn("Failed to connect to WebSocket for testing")
-		return
-	}
-
-	suite.wsClients = append(suite.wsClients, conn)
-
-	// Start message listener
-	go suite.listenWebSocketMessages(conn)
-
-	suite.logger.Info("WebSocket monitoring initialized successfully")
-}
-
-// setupPerformanceMonitoring initializes performance monitoring
-func (suite *IntegrationTestSuite) setupPerformanceMonitoring() {
-	suite.memoryUsage = make([]int64, 0)
-	suite.cpuUsage = make([]float64, 0)
-
-	// Start performance monitoring goroutine
-	go suite.monitorPerformance()
-
-	suite.logger.Info("Performance monitoring initialized successfully")
-}
-
-// waitForServerReady waits for the API server to be ready
-func (suite *IntegrationTestSuite) waitForServerReady() {
-	maxRetries := 30
-	for i := 0; i < maxRetries; i++ {
-		resp, err := suite.httpClient.Get(suite.serverURL + "/health")
-		if err == nil && resp.StatusCode == http.StatusOK {
-			resp.Body.Close()
-			return
-		}
-		if resp != nil {
-			resp.Body.Close()
-		}
-		time.Sleep(1 * time.Second)
-	}
-
-	suite.T().Fatal("API server did not become ready within expected time")
-}
-
-// listenWebSocketMessages listens for WebSocket messages
-func (suite *IntegrationTestSuite) listenWebSocketMessages(conn *websocket.Conn) {
-	defer conn.Close()
-
-	for {
-		select {
-		case <-suite.ctx.Done():
-			return
-		default:
-			var msg map[string]interface{}
-			err := conn.ReadJSON(&msg)
-			if err != nil {
-				if websocket.IsUnexpectedCloseError(err, websocket.CloseGoingAway, websocket.CloseAbnormalClosure) {
-					suite.logger.WithError(err).Error("WebSocket connection error")
-				}
-				return
-			}
-
-			suite.wsMutex.Lock()
-			suite.wsMessages = append(suite.wsMessages, msg)
-			suite.wsMutex.Unlock()
-		}
-	}
-}
-
-// monitorPerformance monitors system performance metrics
-func (suite *IntegrationTestSuite) monitorPerformance() {
-	ticker := time.NewTicker(1 * time.Second)
-	defer ticker.Stop()
-
-	for {
-		select {
-		case <-suite.ctx.Done():
-			return
-		case <-ticker.C:
-			// Monitor memory usage (simplified)
-			var m runtime.MemStats
-			runtime.ReadMemStats(&m)
-
-			suite.performanceMutex.Lock()
-			suite.memoryUsage = append(suite.memoryUsage, int64(m.Alloc))
-			suite.cpuUsage = append(suite.cpuUsage, 0.0) // Simplified CPU monitoring
-			suite.performanceMutex.Unlock()
-		}
-	}
-}
-
-// Test Scenario 1: Fresh system setup and first benchmark run
-func (suite *IntegrationTestSuite) TestScenario1_FreshSystemSetup() {
-	suite.logger.Info("Running Test Scenario 1: Fresh system setup and first benchmark run")
-
-	// Generate a benchmark result
-	result := suite.generateBenchmarkResult("fresh-system-test", 1)
-
-	// Save the run to historic storage
-	savedRun, err := suite.historicStorage.SaveHistoricRun(suite.ctx, result)
-	require.NoError(suite.T(), err)
-	assert.NotEmpty(suite.T(), savedRun.ID)
-	assert.Equal(suite.T(), "fresh-system-test", savedRun.TestName)
-
-	// Verify the run can be retrieved
-	retrievedRun, err := suite.historicStorage.GetHistoricRun(suite.ctx, savedRun.ID)
-	require.NoError(suite.T(), err)
-	assert.Equal(suite.T(), savedRun.ID, retrievedRun.ID)
-	assert.Equal(suite.T(), savedRun.TestName, retrievedRun.TestName)
-
-	// Verify files are saved if historic storage is enabled
-	if suite.storageConfig.EnableHistoric {
-		filesPath, err := suite.historicStorage.GetResultFiles(suite.ctx, savedRun.ID)
-		require.NoError(suite.T(), err)
-		assert.DirExists(suite.T(), filesPath)
-
-		// Check that result.json exists
-		resultFile := filepath.Join(filesPath, "result.json")
-		assert.FileExists(suite.T(), resultFile)
-
-		// Check that metadata.json exists
-		metadataFile := filepath.Join(filesPath, "metadata.json")
-		assert.FileExists(suite.T(), metadataFile)
-	}
-
-	// Verify API endpoint returns the run
-	resp, err := suite.httpClient.Get(suite.serverURL + "/api/runs/" + savedRun.ID)
-	require.NoError(suite.T(), err)
-	assert.Equal(suite.T(), http.StatusOK, resp.StatusCode)
-	resp.Body.Close()
-
-	// Store the run for later tests
-	suite.testRuns = append(suite.testRuns, savedRun)
-
-	suite.logger.Info("Test Scenario 1 completed successfully")
-}
-
-// Test Scenario 2: Multiple runs with trend analysis and regression detection
-func (suite *IntegrationTestSuite) TestScenario2_TrendAnalysisAndRegressionDetection() {
-	suite.logger.Info("Running Test Scenario 2: Multiple runs with trend analysis and regression detection")
-
-	testName := "trend-analysis-test"
-
-	// Generate multiple runs with gradually increasing latency (simulating degradation)
-	for i := 0; i < 10; i++ {
-		result := suite.generateBenchmarkResult(testName, i+1)
-
-		// Introduce gradual performance degradation
-		for clientName, clientMetrics := range result.ClientMetrics {
-			degradationFactor := 1.0 + float64(i)*0.1 // 10% increase per run
-			clientMetrics.Latency.Avg *= degradationFactor
-			clientMetrics.Latency.P95 *= degradationFactor
-			clientMetrics.Latency.P99 *= degradationFactor
-			result.ClientMetrics[clientName] = clientMetrics
-		}
-
-		savedRun, err := suite.historicStorage.SaveHistoricRun(suite.ctx, result)
-		require.NoError(suite.T(), err)
-		suite.testRuns = append(suite.testRuns, savedRun)
-
-		// Small delay to ensure different timestamps
-		time.Sleep(100 * time.Millisecond)
-	}
-
-	// Wait a moment for data to be processed
-	time.Sleep(1 * time.Second)
-
-	// Test trend analysis
-	trend, err := suite.historicStorage.GetHistoricTrends(suite.ctx, testName, "overall", "avg_latency", 30)
-	require.NoError(suite.T(), err)
-	assert.Equal(suite.T(), testName, trend.TestName)
-	assert.Equal(suite.T(), "avg_latency", trend.Metric)
-	assert.True(suite.T(), len(trend.Points) >= 5, "Should have multiple trend points")
-
-	// The trend should show degradation
-	assert.Contains(suite.T(), []string{"degrading", "stable"}, trend.Trend, "Trend should show degradation or be stable")
-
-	// Test regression detection
-	if len(suite.testRuns) >= 2 {
-		lastRunID := suite.testRuns[len(suite.testRuns)-1].ID
-
-		// Detect regressions
-		options := analysis.DetectionOptions{
-			ComparisonMode:     "sequential",
-			LookbackCount:      1,
-			WindowSize:         3,
-			EnableStatistical:  true,
-			MinConfidence:      0.90,
-			IgnoreImprovements: false,
-		}
-
-		report, err := suite.regressionDetector.DetectRegressions(suite.ctx, lastRunID, options)
-		require.NoError(suite.T(), err)
-		assert.Equal(suite.T(), lastRunID, report.RunID)
-		assert.Equal(suite.T(), testName, report.TestName)
-	}
-
-	// Test API endpoints for trends
-	resp, err := suite.httpClient.Get(fmt.Sprintf("%s/api/tests/%s/trends?days=30", suite.serverURL, testName))
-	require.NoError(suite.T(), err)
-	assert.Equal(suite.T(), http.StatusOK, resp.StatusCode)
-	resp.Body.Close()
-
-	suite.logger.Info("Test Scenario 2 completed successfully")
-}
-
-// Test Scenario 3: Baseline management and comparison workflows
-func (suite *IntegrationTestSuite) TestScenario3_BaselineManagement() {
-	suite.logger.Info("Running Test Scenario 3: Baseline management and comparison workflows")
-
-	require.True(suite.T(), len(suite.testRuns) > 0, "Need at least one test run for baseline testing")
-
-	testRun := suite.testRuns[0]
-	baselineName := "baseline-test-v1"
-
-	// Create a baseline
-	baseline, err := suite.baselineManager.SetBaseline(suite.ctx, testRun.ID, baselineName, "Test baseline for integration testing")
-	require.NoError(suite.T(), err)
-	assert.Equal(suite.T(), baselineName, baseline.Name)
-	assert.Equal(suite.T(), testRun.ID, baseline.RunID)
-	assert.Equal(suite.T(), testRun.TestName, baseline.TestName)
-
-	suite.testBaselines = append(suite.testBaselines, baseline)
-
-	// Verify baseline can be retrieved
-	retrievedBaseline, err := suite.baselineManager.GetBaseline(suite.ctx, baselineName)
-	require.NoError(suite.T(), err)
-	assert.Equal(suite.T(), baseline.ID, retrievedBaseline.ID)
-	assert.Equal(suite.T(), baseline.Name, retrievedBaseline.Name)
-
-	// List baselines
-	baselines, err := suite.baselineManager.ListBaselines(suite.ctx, testRun.TestName)
-	require.NoError(suite.T(), err)
-	assert.True(suite.T(), len(baselines) >= 1, "Should have at least one baseline")
-
-	// Test baseline comparison if we have multiple runs
-	if len(suite.testRuns) > 1 {
-		compareRun := suite.testRuns[1]
-		comparison, err := suite.baselineManager.CompareToBaseline(suite.ctx, compareRun.ID, baselineName)
-		require.NoError(suite.T(), err)
-		assert.Equal(suite.T(), baselineName, comparison.BaselineName)
-		assert.Equal(suite.T(), compareRun.ID, comparison.RunID)
-		assert.NotEmpty(suite.T(), comparison.Summary)
-	}
-
-	// Test API endpoints
-	// Create baseline via API
-	baselineReq := api.BaselineRequest{
-		RunID:       testRun.ID,
-		Name:        "api-baseline-test",
-		Description: "Baseline created via API",
-	}
-	reqBody, _ := json.Marshal(baselineReq)
-
-	resp, err := suite.httpClient.Post(
-		suite.serverURL+"/api/baselines",
-		"application/json",
-		bytes.NewBuffer(reqBody),
-	)
-	require.NoError(suite.T(), err)
-	assert.Equal(suite.T(), http.StatusCreated, resp.StatusCode)
-	resp.Body.Close()
-
-	// List baselines via API
-	resp, err = suite.httpClient.Get(suite.serverURL + "/api/baselines")
-	require.NoError(suite.T(), err)
-	assert.Equal(suite.T(), http.StatusOK, resp.StatusCode)
-	resp.Body.Close()
-
-	suite.logger.Info("Test Scenario 3 completed successfully")
-}
-
-// Test Scenario 4: WebSocket notifications during system operations
-func (suite *IntegrationTestSuite) TestScenario4_WebSocketNotifications() {
-	suite.logger.Info("Running Test Scenario 4: WebSocket notifications during system operations")
-
-	// Record initial message count
-	suite.wsMutex.RLock()
-	initialMessageCount := len(suite.wsMessages)
-	suite.wsMutex.RUnlock()
-
-	// Perform operations that should generate WebSocket notifications
-	result := suite.generateBenchmarkResult("websocket-test", 1)
-	savedRun, err := suite.historicStorage.SaveHistoricRun(suite.ctx, result)
-	require.NoError(suite.T(), err)
-
-	// Create a baseline (should trigger notification)
-	if len(suite.wsClients) > 0 {
-		baseline, err := suite.baselineManager.SetBaseline(suite.ctx, savedRun.ID, "ws-test-baseline", "WebSocket test baseline")
-		require.NoError(suite.T(), err)
-
-		// Wait for notifications to be processed
-		time.Sleep(2 * time.Second)
-
-		// Check if we received WebSocket messages
-		suite.wsMutex.RLock()
-		currentMessageCount := len(suite.wsMessages)
-		suite.wsMutex.RUnlock()
-
-		if currentMessageCount > initialMessageCount {
-			suite.logger.WithFields(logrus.Fields{
-				"initial_count": initialMessageCount,
-				"current_count": currentMessageCount,
-			}).Info("WebSocket notifications received")
-		}
-
-		// Test WebSocket ping/pong
-		if len(suite.wsClients) > 0 {
-			conn := suite.wsClients[0]
-			pingMsg := map[string]interface{}{
-				"type":      "ping",
-				"timestamp": time.Now(),
-			}
-
-			err := conn.WriteJSON(pingMsg)
-			assert.NoError(suite.T(), err)
-
-			// Wait for pong response
-			time.Sleep(1 * time.Second)
-		}
-
-		suite.testBaselines = append(suite.testBaselines, baseline)
-	}
-
-	suite.logger.Info("Test Scenario 4 completed successfully")
-}
-
-// Test Scenario 5: Grafana dashboard data queries
-func (suite *IntegrationTestSuite) TestScenario5_GrafanaDashboardQueries() {
-	suite.logger.Info("Running Test Scenario 5: Grafana dashboard data queries")
-
-	// Test Grafana root endpoint
-	resp, err := suite.httpClient.Get(suite.serverURL + "/grafana/")
-	require.NoError(suite.T(), err)
-	assert.Equal(suite.T(), http.StatusOK, resp.StatusCode)
-	resp.Body.Close()
-
-	// Test Grafana search endpoint
-	searchReq := api.GrafanaSearchRequest{
-		Target: "test",
-	}
-	reqBody, _ := json.Marshal(searchReq)
-
-	resp, err = suite.httpClient.Post(
-		suite.serverURL+"/grafana/search",
-		"application/json",
-		bytes.NewBuffer(reqBody),
-	)
-	require.NoError(suite.T(), err)
-	assert.Equal(suite.T(), http.StatusOK, resp.StatusCode)
-
-	var searchResults []string
-	err = json.NewDecoder(resp.Body).Decode(&searchResults)
-	require.NoError(suite.T(), err)
-	resp.Body.Close()
-
-	// Test Grafana query endpoint with time series data
-	if len(suite.testRuns) > 0 {
-		testName := suite.testRuns[0].TestName
-
-		queryReq := api.GrafanaQueryRequest{
-			Range: api.GrafanaTimeRange{
-				From: time.Now().Add(-24 * time.Hour).Format(time.RFC3339),
-				To:   time.Now().Format(time.RFC3339),
-			},
-			Targets: []api.GrafanaTarget{
-				{
-					Target: fmt.Sprintf("%s.avg_latency", testName),
-				},
-				{
-					Target: fmt.Sprintf("%s.error_rate", testName),
-				},
-			},
-		}
-		reqBody, _ := json.Marshal(queryReq)
-
-		resp, err = suite.httpClient.Post(
-			suite.serverURL+"/grafana/query",
-			"application/json",
-			bytes.NewBuffer(reqBody),
-		)
-		require.NoError(suite.T(), err)
-		assert.Equal(suite.T(), http.StatusOK, resp.StatusCode)
-
-		var queryResults []api.GrafanaTimeSeries
-		err = json.NewDecoder(resp.Body).Decode(&queryResults)
-		require.NoError(suite.T(), err)
-		resp.Body.Close()
-
-		// Verify we got results for both targets
-		assert.True(suite.T(), len(queryResults) >= 0, "Should return query results")
-	}
-
-	// Test tag keys endpoint
-	resp, err = suite.httpClient.Post(
-		suite.serverURL+"/grafana/tag-keys",
-		"application/json",
-		bytes.NewBuffer([]byte("{}")),
-	)
-	require.NoError(suite.T(), err)
-	assert.Equal(suite.T(), http.StatusOK, resp.StatusCode)
-	resp.Body.Close()
-
-	suite.logger.Info("Test Scenario 5 completed successfully")
-}
-
-// Test Scenario 6: Large dataset performance testing
-func (suite *IntegrationTestSuite) TestScenario6_LargeDatasetPerformance() {
-	suite.logger.Info("Running Test Scenario 6: Large dataset performance testing")
-
-	testName := "performance-test"
-	numRuns := 100
-
-	// Record initial performance metrics
-	suite.performanceMutex.RLock()
-	initialMemory := int64(0)
-	if len(suite.memoryUsage) > 0 {
-		initialMemory = suite.memoryUsage[len(suite.memoryUsage)-1]
-	}
-	suite.performanceMutex.RUnlock()
-
-	startTime := time.Now()
-
-	// Generate a large number of benchmark runs
-	var runs []*types.HistoricRun
-	for i := 0; i < numRuns; i++ {
-		result := suite.generateBenchmarkResult(testName, i+1)
-
-		// Add more complexity to the data
-		for clientName, clientMetrics := range result.ClientMetrics {
-			// Add more methods
-			for j := 0; j < 10; j++ {
-				methodName := fmt.Sprintf("additional_method_%d", j)
-				clientMetrics.Methods[methodName] = types.MetricSummary{
-					Count:     int64(100 + i*10),
-					Min:       float64(10 + i),
-					Max:       float64(500 + i*5),
-					Avg:       float64(100 + i*2),
-					P50:       float64(90 + i*2),
-					P95:       float64(200 + i*3),
-					P99:       float64(300 + i*4),
-					ErrorRate: float64(i) / 1000.0,
-				}
-			}
-			result.ClientMetrics[clientName] = clientMetrics
-		}
-
-		savedRun, err := suite.historicStorage.SaveHistoricRun(suite.ctx, result)
-		require.NoError(suite.T(), err)
-		runs = append(runs, savedRun)
-
-		// Log progress every 10 runs
-		if (i+1)%10 == 0 {
-			suite.logger.WithField("completed", i+1).Info("Generated runs for performance test")
-		}
-	}
-
-	ingestionDuration := time.Since(startTime)
-
-	// Record final performance metrics
-	suite.performanceMutex.RLock()
-	finalMemory := int64(0)
-	if len(suite.memoryUsage) > 0 {
-		finalMemory = suite.memoryUsage[len(suite.memoryUsage)-1]
-	}
-	suite.performanceMutex.RUnlock()
-
-	memoryIncrease := finalMemory - initialMemory
-
-	suite.logger.WithFields(logrus.Fields{
-		"num_runs":           numRuns,
-		"ingestion_duration": ingestionDuration,
-		"memory_increase_mb": memoryIncrease / (1024 * 1024),
-		"runs_per_second":    float64(numRuns) / ingestionDuration.Seconds(),
-	}).Info("Large dataset performance metrics")
-
-	// Test performance of various queries
-	queryStartTime := time.Now()
-
-	// List runs
-	listedRuns, err := suite.historicStorage.ListHistoricRuns(suite.ctx, testName, 50)
-	require.NoError(suite.T(), err)
-	assert.True(suite.T(), len(listedRuns) >= 50, "Should return requested number of runs")
-
-	// Get trends
-	trend, err := suite.historicStorage.GetHistoricTrends(suite.ctx, testName, "overall", "avg_latency", 30)
-	require.NoError(suite.T(), err)
-	assert.Equal(suite.T(), testName, trend.TestName)
-
-	// Get summary
-	summary, err := suite.historicStorage.GetHistoricSummary(suite.ctx, testName)
-	require.NoError(suite.T(), err)
-	assert.Equal(suite.T(), testName, summary.TestName)
-	assert.True(suite.T(), summary.TotalRuns >= numRuns, "Summary should reflect all runs")
-
-	queryDuration := time.Since(queryStartTime)
-
-	suite.logger.WithFields(logrus.Fields{
-		"query_duration": queryDuration,
-		"total_runs":     summary.TotalRuns,
-	}).Info("Query performance metrics")
-
-	// Performance assertions
-	assert.Less(suite.T(), ingestionDuration.Seconds(), 60.0, "Ingestion should complete within 60 seconds")
-	assert.Less(suite.T(), queryDuration.Seconds(), 10.0, "Queries should complete within 10 seconds")
-	assert.Less(suite.T(), memoryIncrease, int64(100*1024*1024), "Memory increase should be less than 100MB")
-
-	suite.logger.Info("Test Scenario 6 completed successfully")
-}
-
-// Test Scenario 7: System recovery after failures
-func (suite *IntegrationTestSuite) TestScenario7_SystemRecovery() {
-	suite.logger.Info("Running Test Scenario 7: System recovery after failures")
-
-	// Test database connection recovery
-	originalDB := suite.db
-
-	// Simulate database connection failure by closing it
-	suite.db.Close()
-
-	// Try to perform operations (should fail gracefully)
-	result := suite.generateBenchmarkResult("recovery-test", 1)
-	_, err := suite.historicStorage.SaveHistoricRun(suite.ctx, result)
-	assert.Error(suite.T(), err, "Should fail when database is unavailable")
-
-	// Restore database connection
-	suite.db, err = sql.Open("postgres", suite.storageConfig.PostgreSQL.GetConnectionString())
-	require.NoError(suite.T(), err)
-
-	// Test that operations work again
-	savedRun, err := suite.historicStorage.SaveHistoricRun(suite.ctx, result)
-	require.NoError(suite.T(), err)
-	assert.NotEmpty(suite.T(), savedRun.ID)
-
-	// Test file system recovery
-	if suite.storageConfig.EnableHistoric {
-		// Temporarily make historic directory read-only
-		historicDir := suite.storageConfig.HistoricPath
-		originalPerm := os.FileMode(0755)
-
-		err := os.Chmod(historicDir, 0444) // Read-only
-		if err == nil {
-			// Try to save files (should fail gracefully)
-			result2 := suite.generateBenchmarkResult("recovery-test-2", 1)
-			savedRun2, err := suite.historicStorage.SaveHistoricRun(suite.ctx, result2)
-
-			// The run should still be saved to database even if file save fails
-			assert.NoError(suite.T(), err)
-			assert.NotEmpty(suite.T(), savedRun2.ID)
-
-			// Restore permissions
-			os.Chmod(historicDir, originalPerm)
-
-			// Test that file operations work again
-			result3 := suite.generateBenchmarkResult("recovery-test-3", 1)
-			savedRun3, err := suite.historicStorage.SaveHistoricRun(suite.ctx, result3)
-			require.NoError(suite.T(), err)
-
-			// Verify files are saved
-			filesPath, err := suite.historicStorage.GetResultFiles(suite.ctx, savedRun3.ID)
-			require.NoError(suite.T(), err)
-			assert.DirExists(suite.T(), filesPath)
-		}
-	}
-
-	// Test API server resilience
-	// Make invalid requests to test error handling
-	resp, err := suite.httpClient.Get(suite.serverURL + "/api/runs/invalid-run-id")
-	require.NoError(suite.T(), err)
-	assert.Equal(suite.T(), http.StatusNotFound, resp.StatusCode)
-	resp.Body.Close()
-
-	// Test invalid JSON in request body
-	resp, err = suite.httpClient.Post(
-		suite.serverURL+"/api/baselines",
-		"application/json",
-		bytes.NewBuffer([]byte("invalid json")),
-	)
-	require.NoError(suite.T(), err)
-	assert.Equal(suite.T(), http.StatusBadRequest, resp.StatusCode)
-	resp.Body.Close()
-
-	// Test cleanup and recovery
-	err = suite.historicStorage.CleanupOldFiles(suite.ctx)
-	assert.NoError(suite.T(), err, "Cleanup should handle errors gracefully")
-
-	suite.logger.Info("Test Scenario 7 completed successfully")
-}
-
-// Test concurrent access and race conditions
-func (suite *IntegrationTestSuite) TestConcurrentAccess() {
-	suite.logger.Info("Testing concurrent access and race conditions")
-
-	testName := "concurrent-test"
-	numGoroutines := 10
-	opsPerGoroutine := 5
-
-	var wg sync.WaitGroup
-	errors := make(chan error, numGoroutines*opsPerGoroutine)
-
-	// Concurrent writes
-	for i := 0; i < numGoroutines; i++ {
-		wg.Add(1)
-		go func(goroutineID int) {
-			defer wg.Done()
-
-			for j := 0; j < opsPerGoroutine; j++ {
-				result := suite.generateBenchmarkResult(testName, goroutineID*opsPerGoroutine+j+1)
-				_, err := suite.historicStorage.SaveHistoricRun(suite.ctx, result)
-				if err != nil {
-					errors <- err
-				}
-			}
-		}(i)
-	}
-
-	// Concurrent reads
-	for i := 0; i < numGoroutines; i++ {
-		wg.Add(1)
-		go func() {
-			defer wg.Done()
-
-			for j := 0; j < opsPerGoroutine; j++ {
-				_, err := suite.historicStorage.ListHistoricRuns(suite.ctx, testName, 10)
-				if err != nil {
-					errors <- err
-				}
-
-				_, err = suite.historicStorage.GetHistoricSummary(suite.ctx, testName)
-				if err != nil {
-					errors <- err
-				}
-			}
-		}()
-	}
-
-	wg.Wait()
-	close(errors)
-
-	// Check for errors
-	var errorCount int
-	for err := range errors {
-		errorCount++
-		suite.logger.WithError(err).Error("Concurrent operation error")
-	}
-
-	assert.Equal(suite.T(), 0, errorCount, "No errors should occur during concurrent operations")
-
-	suite.logger.Info("Concurrent access test completed successfully")
-}
-
-// Test WebSocket connection limits and behavior
-func (suite *IntegrationTestSuite) TestWebSocketConnectionLimits() {
-	suite.logger.Info("Testing WebSocket connection limits")
-
-	// Create multiple WebSocket connections
-	maxConnections := 100
-	connections := make([]*websocket.Conn, 0, maxConnections)
-
-	wsURL := strings.Replace(suite.serverURL, "http://", "ws://", 1) + "/api/ws"
-
-	for i := 0; i < maxConnections; i++ {
-		conn, _, err := websocket.DefaultDialer.Dial(wsURL, nil)
-		if err != nil {
-			suite.logger.WithError(err).WithField("connection_num", i).Warn("Failed to create WebSocket connection")
-			break
-		}
-
-		connections = append(connections, conn)
-	}
-
-	suite.logger.WithField("connections_created", len(connections)).Info("Created WebSocket connections")
-
-	// Send messages to all connections
-	message := map[string]interface{}{
-		"type":    "test",
-		"content": "connection limit test",
-	}
-
-	for i, conn := range connections {
-		err := conn.WriteJSON(message)
-		if err != nil {
-			suite.logger.WithError(err).WithField("connection_num", i).Warn("Failed to write to WebSocket")
-		}
-	}
-
-	// Close all connections
-	for _, conn := range connections {
-		conn.Close()
-	}
-
-	suite.logger.Info("WebSocket connection limits test completed")
-}
-
-// generateBenchmarkResult creates a sample benchmark result for testing
-func (suite *IntegrationTestSuite) generateBenchmarkResult(testName string, runNumber int) *types.BenchmarkResult {
-	now := time.Now()
-	startTime := now.Add(-5 * time.Minute)
-	endTime := now
-
-	// Generate client metrics
-	clients := []string{"client-1", "client-2", "client-3"}
-	clientMetrics := make(map[string]*types.ClientMetrics)
-
-	for i, clientName := range clients {
-		metrics := &types.ClientMetrics{
-			Name:          clientName,
-			TotalRequests: int64(1000 + runNumber*100),
-			TotalErrors:   int64(10 + runNumber),
-			ErrorRate:     float64(10+runNumber) / float64(1000+runNumber*100),
-			Latency: types.MetricSummary{
-				Count:     int64(1000 + runNumber*100),
-				Min:       float64(5 + i),
-				Max:       float64(500 + i*10),
-				Avg:       float64(100 + i*5 + runNumber),
-				P50:       float64(90 + i*5 + runNumber),
-				P75:       float64(150 + i*7 + runNumber),
-				P90:       float64(200 + i*10 + runNumber),
-				P95:       float64(250 + i*12 + runNumber),
-				P99:       float64(400 + i*15 + runNumber),
-				ErrorRate: float64(10+runNumber+i) / float64(1000+runNumber*100),
-			},
-			Methods: make(map[string]types.MetricSummary),
-			ConnectionMetrics: types.ConnectionMetrics{
-				ActiveConnections:  int64(10 + i),
-				ConnectionsCreated: int64(20 + i*2),
-				ConnectionsClosed:  int64(15 + i),
-				ConnectionReuse:    0.8 + float64(i)*0.05,
-			},
-			TimeSeries:    make(map[string][]types.TimeSeriesPoint),
-			SystemMetrics: []types.SystemMetrics{},
-			ErrorTypes:    make(map[string]int64),
-			StatusCodes:   make(map[int]int64),
-		}
-
-		// Add method metrics
-		methods := []string{"eth_getBalance", "eth_getBlockByNumber", "eth_getTransactionByHash"}
-		for j, methodName := range methods {
-			metrics.Methods[methodName] = types.MetricSummary{
-				Count:     int64(300 + runNumber*30 + j*10),
-				Min:       float64(3 + j),
-				Max:       float64(200 + j*5),
-				Avg:       float64(50 + j*3 + runNumber),
-				P50:       float64(45 + j*3 + runNumber),
-				P95:       float64(120 + j*5 + runNumber),
-				P99:       float64(180 + j*7 + runNumber),
-				ErrorRate: float64(5+runNumber+j) / float64(300+runNumber*30+j*10),
-			}
-		}
-
-		// Add status codes
-		metrics.StatusCodes[200] = int64(900 + runNumber*90)
-		metrics.StatusCodes[500] = int64(5 + runNumber)
-		metrics.StatusCodes[429] = int64(3 + runNumber/2)
-
-		clientMetrics[clientName] = metrics
-	}
-
-	return &types.BenchmarkResult{
-		Config: map[string]interface{}{
-			"test_name":   testName,
-			"description": fmt.Sprintf("Integration test run #%d", runNumber),
-			"rps":         100,
-			"duration":    "5m",
-			"endpoints":   []string{"eth_getBalance", "eth_getBlockByNumber", "eth_getTransactionByHash"},
-		},
-		Summary: map[string]interface{}{
-			"total_requests": int64(3000 + runNumber*300),
-			"total_errors":   int64(30 + runNumber*3),
-			"duration":       "5m",
-		},
-		ClientMetrics: clientMetrics,
-		Timestamp:     now.Format(time.RFC3339),
-		StartTime:     startTime.Format(time.RFC3339),
-		EndTime:       endTime.Format(time.RFC3339),
-		Duration:      endTime.Sub(startTime).String(),
-		Environment: types.EnvironmentInfo{
-			OS:            "linux",
-			Architecture:  "amd64",
-			CPUModel:      "Intel Core i7",
-			CPUCores:      8,
-			TotalMemoryGB: 16.0,
-			GoVersion:     "go1.21.0",
-			K6Version:     "v0.45.0",
-			NetworkType:   "ethernet",
-		},
-	}
-}
-
-// Helper method to validate benchmark result structure
-func (suite *IntegrationTestSuite) validateBenchmarkResult(result *types.BenchmarkResult) {
-	assert.NotNil(suite.T(), result)
-	assert.NotEmpty(suite.T(), result.Timestamp)
-	assert.NotEmpty(suite.T(), result.StartTime)
-	assert.NotEmpty(suite.T(), result.EndTime)
-	assert.NotNil(suite.T(), result.Config)
-	assert.NotNil(suite.T(), result.Summary)
-	assert.NotNil(suite.T(), result.ClientMetrics)
-	assert.True(suite.T(), len(result.ClientMetrics) > 0)
-
-	// Validate client metrics
-	for clientName, metrics := range result.ClientMetrics {
-		assert.NotEmpty(suite.T(), clientName)
-		assert.NotNil(suite.T(), metrics)
-		assert.True(suite.T(), metrics.TotalRequests > 0)
-		assert.True(suite.T(), metrics.Latency.Count > 0)
-		assert.True(suite.T(), metrics.Latency.Avg > 0)
-		assert.True(suite.T(), len(metrics.Methods) > 0)
-	}
-}
-
-// Helper method to wait for specific WebSocket message
-func (suite *IntegrationTestSuite) waitForWebSocketMessage(messageType string, timeout time.Duration) map[string]interface{} {
-	deadline := time.Now().Add(timeout)
-
-	for time.Now().Before(deadline) {
-		suite.wsMutex.RLock()
-		for _, msg := range suite.wsMessages {
-			if msgType, ok := msg["type"].(string); ok && msgType == messageType {
-				suite.wsMutex.RUnlock()
-				return msg
-			}
-		}
-		suite.wsMutex.RUnlock()
-
-		time.Sleep(100 * time.Millisecond)
-	}
-
-	return nil
-}
-
-// Helper method to get performance metrics summary
-func (suite *IntegrationTestSuite) getPerformanceMetricsSummary() map[string]interface{} {
-	suite.performanceMutex.RLock()
-	defer suite.performanceMutex.RUnlock()
-
-	if len(suite.memoryUsage) == 0 {
-		return map[string]interface{}{
-			"memory_samples": 0,
-			"cpu_samples":    0,
-		}
-	}
-
-	// Calculate memory statistics
-	var totalMemory, minMemory, maxMemory int64
-	minMemory = suite.memoryUsage[0]
-	maxMemory = suite.memoryUsage[0]
-
-	for _, mem := range suite.memoryUsage {
-		totalMemory += mem
-		if mem < minMemory {
-			minMemory = mem
-		}
-		if mem > maxMemory {
-			maxMemory = mem
-		}
-	}
-
-	avgMemory := totalMemory / int64(len(suite.memoryUsage))
-
-	return map[string]interface{}{
-		"memory_samples":   len(suite.memoryUsage),
-		"cpu_samples":      len(suite.cpuUsage),
-		"avg_memory_bytes": avgMemory,
-		"min_memory_bytes": minMemory,
-		"max_memory_bytes": maxMemory,
-		"memory_increase":  maxMemory - minMemory,
-	}
-}
-
-// Run the integration test suite
-func TestIntegrationSuite(t *testing.T) {
-	// Skip integration tests if not enabled
-	if testing.Short() {
-		t.Skip("Skipping integration tests in short mode")
-	}
-
-	if os.Getenv("INTEGRATION_TESTS") != "1" {
-		t.Skip("Integration tests not enabled. Set INTEGRATION_TESTS=1 to run.")
-	}
-
-	suite.Run(t, new(IntegrationTestSuite))
-}
diff --git a/runner/integration_testcontainers_test.go b/runner/integration_testcontainers_test.go
deleted file mode 100644
index 86c19bc..0000000
--- a/runner/integration_testcontainers_test.go
+++ /dev/null
@@ -1,566 +0,0 @@
-package main
-
-import (
-	"context"
-	"database/sql"
-	"fmt"
-	"net/http"
-	"os"
-	"testing"
-	"time"
-
-	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/require"
-	"github.com/stretchr/testify/suite"
-	"github.com/testcontainers/testcontainers-go"
-	"github.com/testcontainers/testcontainers-go/modules/postgres"
-	"github.com/testcontainers/testcontainers-go/wait"
-
-	"github.com/jsonrpc-bench/runner/config"
-	"github.com/jsonrpc-bench/runner/storage"
-)
-
-// TestContainersIntegrationSuite provides comprehensive integration tests using testcontainers
-type TestContainersIntegrationSuite struct {
-	IntegrationTestSuite
-
-	// Testcontainers
-	postgresContainer *postgres.PostgresContainer
-	containerCtx      context.Context
-}
-
-// SetupSuite initializes the test environment with real containers
-func (suite *TestContainersIntegrationSuite) SetupSuite() {
-	suite.containerCtx = context.Background()
-
-	// Check if Docker is available
-	if !suite.isDockerAvailable() {
-		suite.T().Skip("Docker not available for testcontainers")
-		return
-	}
-
-	// Start PostgreSQL container
-	suite.setupPostgreSQLContainer()
-
-	// Call parent setup with our container database
-	suite.IntegrationTestSuite.SetupSuite()
-}
-
-// TearDownSuite cleans up containers and parent resources
-func (suite *TestContainersIntegrationSuite) TearDownSuite() {
-	// Call parent teardown first
-	suite.IntegrationTestSuite.TearDownSuite()
-
-	// Terminate containers
-	if suite.postgresContainer != nil {
-		if err := suite.postgresContainer.Terminate(suite.containerCtx); err != nil {
-			suite.logger.WithError(err).Error("Failed to terminate PostgreSQL container")
-		}
-	}
-}
-
-// isDockerAvailable checks if Docker is available
-func (suite *TestContainersIntegrationSuite) isDockerAvailable() bool {
-	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
-	defer cancel()
-
-	// Try to create a simple container to test Docker availability
-	req := testcontainers.ContainerRequest{
-		Image:        "hello-world",
-		ExposedPorts: []string{},
-		WaitingFor:   wait.ForExit(),
-	}
-
-	container, err := testcontainers.GenericContainer(ctx, testcontainers.GenericContainerRequest{
-		ContainerRequest: req,
-		Started:          true,
-	})
-
-	if err != nil {
-		return false
-	}
-
-	defer container.Terminate(ctx)
-	return true
-}
-
-// setupPostgreSQLContainer starts a PostgreSQL container for testing
-func (suite *TestContainersIntegrationSuite) setupPostgreSQLContainer() {
-	ctx, cancel := context.WithTimeout(suite.containerCtx, 120*time.Second)
-	defer cancel()
-
-	// Start PostgreSQL container
-	postgresContainer, err := postgres.RunContainer(ctx,
-		testcontainers.WithImage("postgres:15-alpine"),
-		postgres.WithDatabase("testdb"),
-		postgres.WithUsername("testuser"),
-		postgres.WithPassword("testpass"),
-		testcontainers.WithWaitStrategy(
-			wait.ForLog("database system is ready to accept connections").
-				WithOccurrence(2).
-				WithStartupTimeout(60*time.Second)),
-	)
-
-	require.NoError(suite.T(), err)
-	suite.postgresContainer = postgresContainer
-
-	// Get connection details
-	host, err := postgresContainer.Host(ctx)
-	require.NoError(suite.T(), err)
-
-	port, err := postgresContainer.MappedPort(ctx, "5432")
-	require.NoError(suite.T(), err)
-
-	// Update storage config to use container
-	suite.storageConfig = &config.StorageConfig{
-		HistoricPath:   suite.testDir + "/historic",
-		RetentionDays:  30,
-		EnableHistoric: true,
-		PostgreSQL: &config.PostgreSQLConfig{
-			Host:                  host,
-			Port:                  port.Int(),
-			Database:              "testdb",
-			Username:              "testuser",
-			Password:              "testpass",
-			SSLMode:               "disable",
-			MaxConnections:        10,
-			MaxIdleConnections:    2,
-			ConnectionMaxLifetime: 30 * time.Minute,
-			ConnectionTimeout:     10 * time.Second,
-			Schema:                "public",
-		},
-	}
-
-	// Open database connection
-	suite.db, err = sql.Open("postgres", suite.storageConfig.PostgreSQL.GetConnectionString())
-	require.NoError(suite.T(), err)
-
-	// Test connection
-	err = suite.db.Ping()
-	require.NoError(suite.T(), err)
-
-	suite.logger.WithFields(map[string]interface{}{
-		"host": host,
-		"port": port.Int(),
-	}).Info("PostgreSQL container started successfully")
-}
-
-// TestCompleteSystemLifecycle tests the entire system lifecycle with real containers
-func (suite *TestContainersIntegrationSuite) TestCompleteSystemLifecycle() {
-	suite.logger.Info("Testing complete system lifecycle with testcontainers")
-
-	// Run all test scenarios in sequence
-	suite.Run("Scenario1_FreshSystemSetup", func() {
-		suite.TestScenario1_FreshSystemSetup()
-	})
-
-	suite.Run("Scenario2_TrendAnalysisAndRegressionDetection", func() {
-		suite.TestScenario2_TrendAnalysisAndRegressionDetection()
-	})
-
-	suite.Run("Scenario3_BaselineManagement", func() {
-		suite.TestScenario3_BaselineManagement()
-	})
-
-	suite.Run("Scenario4_WebSocketNotifications", func() {
-		suite.TestScenario4_WebSocketNotifications()
-	})
-
-	suite.Run("Scenario5_GrafanaDashboardQueries", func() {
-		suite.TestScenario5_GrafanaDashboardQueries()
-	})
-
-	suite.Run("Scenario6_LargeDatasetPerformance", func() {
-		suite.TestScenario6_LargeDatasetPerformance()
-	})
-
-	suite.Run("Scenario7_SystemRecovery", func() {
-		suite.TestScenario7_SystemRecovery()
-	})
-
-	suite.Run("ConcurrentAccess", func() {
-		suite.TestConcurrentAccess()
-	})
-
-	suite.Run("WebSocketConnectionLimits", func() {
-		suite.TestWebSocketConnectionLimits()
-	})
-
-	suite.logger.Info("Complete system lifecycle test completed successfully")
-}
-
-// TestDatabaseMigrations tests database migrations with a real PostgreSQL instance
-func (suite *TestContainersIntegrationSuite) TestDatabaseMigrations() {
-	suite.logger.Info("Testing database migrations")
-
-	// Get current schema version
-	initialVersion, err := suite.migrationService.GetVersion()
-	require.NoError(suite.T(), err)
-	assert.True(suite.T(), initialVersion > 0, "Should have migrations applied")
-
-	// Test rollback and reapply
-	targetVersion := initialVersion - 1
-	if targetVersion >= 0 {
-		err = suite.migrationService.Down(targetVersion)
-		require.NoError(suite.T(), err)
-
-		currentVersion, err := suite.migrationService.GetVersion()
-		require.NoError(suite.T(), err)
-		assert.Equal(suite.T(), targetVersion, currentVersion)
-
-		// Reapply migrations
-		err = suite.migrationService.Up()
-		require.NoError(suite.T(), err)
-
-		finalVersion, err := suite.migrationService.GetVersion()
-		require.NoError(suite.T(), err)
-		assert.Equal(suite.T(), initialVersion, finalVersion)
-	}
-
-	suite.logger.Info("Database migrations test completed successfully")
-}
-
-// TestPostgreSQLSpecificFeatures tests PostgreSQL-specific features
-func (suite *TestContainersIntegrationSuite) TestPostgreSQLSpecificFeatures() {
-	suite.logger.Info("Testing PostgreSQL-specific features")
-
-	// Test JSONB operations
-	query := `
-		SELECT COUNT(*) FROM historic_runs 
-		WHERE config ? 'test_name' 
-		AND config->>'test_name' LIKE 'test%'
-	`
-
-	var count int
-	err := suite.db.QueryRow(query).Scan(&count)
-	require.NoError(suite.T(), err)
-
-	// Test full-text search capabilities
-	query = `
-		SELECT COUNT(*) FROM historic_runs 
-		WHERE to_tsvector('english', description) @@ to_tsquery('english', 'test')
-	`
-
-	err = suite.db.QueryRow(query).Scan(&count)
-	require.NoError(suite.T(), err)
-
-	// Test array operations
-	query = `
-		SELECT COUNT(*) FROM historic_runs 
-		WHERE tags && ARRAY['performance', 'benchmark']
-	`
-
-	err = suite.db.QueryRow(query).Scan(&count)
-	require.NoError(suite.T(), err)
-
-	// Test window functions for trend analysis
-	query = `
-		SELECT 
-			test_name,
-			timestamp,
-			avg_latency_ms,
-			LAG(avg_latency_ms) OVER (PARTITION BY test_name ORDER BY timestamp) as prev_latency,
-			avg_latency_ms - LAG(avg_latency_ms) OVER (PARTITION BY test_name ORDER BY timestamp) as latency_diff
-		FROM historic_runs 
-		WHERE test_name = $1
-		ORDER BY timestamp
-		LIMIT 5
-	`
-
-	if len(suite.testRuns) > 0 {
-		rows, err := suite.db.Query(query, suite.testRuns[0].TestName)
-		require.NoError(suite.T(), err)
-
-		var results []struct {
-			TestName    string
-			Timestamp   time.Time
-			AvgLatency  float64
-			PrevLatency *float64
-			LatencyDiff *float64
-		}
-
-		for rows.Next() {
-			var r struct {
-				TestName    string
-				Timestamp   time.Time
-				AvgLatency  float64
-				PrevLatency *float64
-				LatencyDiff *float64
-			}
-
-			err := rows.Scan(&r.TestName, &r.Timestamp, &r.AvgLatency, &r.PrevLatency, &r.LatencyDiff)
-			require.NoError(suite.T(), err)
-			results = append(results, r)
-		}
-		rows.Close()
-
-		assert.True(suite.T(), len(results) > 0, "Should return window function results")
-	}
-
-	suite.logger.Info("PostgreSQL-specific features test completed successfully")
-}
-
-// TestDatabasePerformanceUnderLoad tests database performance with concurrent load
-func (suite *TestContainersIntegrationSuite) TestDatabasePerformanceUnderLoad() {
-	suite.logger.Info("Testing database performance under load")
-
-	// Test concurrent writes
-	numWorkers := 20
-	operationsPerWorker := 50
-	testName := "load-test"
-
-	results := make(chan error, numWorkers*operationsPerWorker)
-	startTime := time.Now()
-
-	for i := 0; i < numWorkers; i++ {
-		go func(workerID int) {
-			for j := 0; j < operationsPerWorker; j++ {
-				result := suite.generateBenchmarkResult(
-					fmt.Sprintf("%s-worker-%d", testName, workerID),
-					j+1,
-				)
-
-				_, err := suite.historicStorage.SaveHistoricRun(suite.ctx, result)
-				results <- err
-			}
-		}(i)
-	}
-
-	// Collect results
-	var errors []error
-	for i := 0; i < numWorkers*operationsPerWorker; i++ {
-		if err := <-results; err != nil {
-			errors = append(errors, err)
-		}
-	}
-
-	duration := time.Since(startTime)
-
-	suite.logger.WithFields(map[string]interface{}{
-		"workers":               numWorkers,
-		"operations_per_worker": operationsPerWorker,
-		"total_operations":      numWorkers * operationsPerWorker,
-		"duration":              duration,
-		"ops_per_second":        float64(numWorkers*operationsPerWorker) / duration.Seconds(),
-		"error_count":           len(errors),
-	}).Info("Database load test completed")
-
-	// Performance assertions
-	assert.Less(suite.T(), len(errors), numWorkers*operationsPerWorker/10, "Error rate should be less than 10%")
-	assert.Less(suite.T(), duration.Seconds(), 60.0, "Load test should complete within 60 seconds")
-
-	// Test concurrent reads during writes
-	go func() {
-		for i := 0; i < 100; i++ {
-			_, err := suite.historicStorage.ListHistoricRuns(suite.ctx, testName, 10)
-			if err != nil {
-				suite.logger.WithError(err).Warn("Read operation failed during load test")
-			}
-			time.Sleep(10 * time.Millisecond)
-		}
-	}()
-
-	time.Sleep(2 * time.Second) // Let concurrent reads run
-
-	suite.logger.Info("Database performance under load test completed successfully")
-}
-
-// TestContainerResourceLimits tests behavior under resource constraints
-func (suite *TestContainersIntegrationSuite) TestContainerResourceLimits() {
-	suite.logger.Info("Testing container resource limits")
-
-	// Test behavior with limited connections
-	originalMaxConnections := suite.storageConfig.PostgreSQL.MaxConnections
-	suite.storageConfig.PostgreSQL.MaxConnections = 5
-
-	// Create multiple connections to test pool limits
-	var connections []*sql.DB
-	for i := 0; i < 10; i++ {
-		db, err := sql.Open("postgres", suite.storageConfig.PostgreSQL.GetConnectionString())
-		if err != nil {
-			break
-		}
-
-		db.SetMaxOpenConns(1)
-		connections = append(connections, db)
-
-		// Test connection
-		ctx, cancel := context.WithTimeout(suite.ctx, 5*time.Second)
-		if err := db.PingContext(ctx); err != nil {
-			cancel()
-			db.Close()
-			break
-		}
-		cancel()
-	}
-
-	suite.logger.WithField("connections_created", len(connections)).Info("Created database connections")
-
-	// Test operations with limited connections
-	for i := 0; i < 5; i++ {
-		result := suite.generateBenchmarkResult("resource-limit-test", i+1)
-		_, err := suite.historicStorage.SaveHistoricRun(suite.ctx, result)
-		assert.NoError(suite.T(), err, "Operations should work within connection limits")
-	}
-
-	// Cleanup connections
-	for _, db := range connections {
-		db.Close()
-	}
-
-	// Restore original settings
-	suite.storageConfig.PostgreSQL.MaxConnections = originalMaxConnections
-
-	suite.logger.Info("Container resource limits test completed successfully")
-}
-
-// TestDataIntegrityAndConsistency tests data integrity across operations
-func (suite *TestContainersIntegrationSuite) TestDataIntegrityAndConsistency() {
-	suite.logger.Info("Testing data integrity and consistency")
-
-	testName := "integrity-test"
-
-	// Create multiple related runs
-	var runIDs []string
-	for i := 0; i < 5; i++ {
-		result := suite.generateBenchmarkResult(testName, i+1)
-		savedRun, err := suite.historicStorage.SaveHistoricRun(suite.ctx, result)
-		require.NoError(suite.T(), err)
-		runIDs = append(runIDs, savedRun.ID)
-	}
-
-	// Create baseline from first run
-	baseline, err := suite.baselineManager.SetBaseline(
-		suite.ctx,
-		runIDs[0],
-		"integrity-baseline",
-		"Baseline for integrity testing",
-	)
-	require.NoError(suite.T(), err)
-
-	// Test referential integrity - try to delete run that's used as baseline
-	err = suite.historicStorage.DeleteHistoricRun(suite.ctx, runIDs[0])
-	// This should either fail or cascade properly
-	if err == nil {
-		// If deletion succeeded, baseline should be cleaned up
-		_, err = suite.baselineManager.GetBaseline(suite.ctx, baseline.Name)
-		assert.Error(suite.T(), err, "Baseline should be removed when referenced run is deleted")
-	}
-
-	// Test transaction consistency
-	tx, err := suite.db.Begin()
-	require.NoError(suite.T(), err)
-
-	// Insert partial data and rollback
-	_, err = tx.Exec(`
-		INSERT INTO historic_runs (id, test_name, timestamp, avg_latency_ms) 
-		VALUES ($1, $2, $3, $4)
-	`, "partial-test", testName, time.Now(), 100.0)
-	require.NoError(suite.T(), err)
-
-	// Rollback transaction
-	err = tx.Rollback()
-	require.NoError(suite.T(), err)
-
-	// Verify data was not committed
-	var count int
-	err = suite.db.QueryRow("SELECT COUNT(*) FROM historic_runs WHERE id = $1", "partial-test").Scan(&count)
-	require.NoError(suite.T(), err)
-	assert.Equal(suite.T(), 0, count, "Rolled back data should not exist")
-
-	suite.logger.Info("Data integrity and consistency test completed successfully")
-}
-
-// TestBackupAndRestore tests backup and restore capabilities
-func (suite *TestContainersIntegrationSuite) TestBackupAndRestore() {
-	suite.logger.Info("Testing backup and restore capabilities")
-
-	// Create some test data
-	testName := "backup-test"
-	var originalRunIDs []string
-
-	for i := 0; i < 3; i++ {
-		result := suite.generateBenchmarkResult(testName, i+1)
-		savedRun, err := suite.historicStorage.SaveHistoricRun(suite.ctx, result)
-		require.NoError(suite.T(), err)
-		originalRunIDs = append(originalRunIDs, savedRun.ID)
-	}
-
-	// Get count of original data
-	var originalCount int
-	err := suite.db.QueryRow("SELECT COUNT(*) FROM historic_runs WHERE test_name = $1", testName).Scan(&originalCount)
-	require.NoError(suite.T(), err)
-
-	// Simulate backup by dumping specific data
-	backupQuery := `
-		SELECT id, test_name, description, timestamp, avg_latency_ms 
-		FROM historic_runs 
-		WHERE test_name = $1
-	`
-
-	rows, err := suite.db.Query(backupQuery, testName)
-	require.NoError(suite.T(), err)
-	defer rows.Close()
-
-	type backupRow struct {
-		ID          string
-		TestName    string
-		Description string
-		Timestamp   time.Time
-		AvgLatency  float64
-	}
-
-	var backupData []backupRow
-	for rows.Next() {
-		var row backupRow
-		err := rows.Scan(&row.ID, &row.TestName, &row.Description, &row.Timestamp, &row.AvgLatency)
-		require.NoError(suite.T(), err)
-		backupData = append(backupData, row)
-	}
-
-	assert.Equal(suite.T(), originalCount, len(backupData), "Backup should contain all original data")
-
-	// Delete original data
-	_, err = suite.db.Exec("DELETE FROM historic_runs WHERE test_name = $1", testName)
-	require.NoError(suite.T(), err)
-
-	// Verify deletion
-	var afterDeleteCount int
-	err = suite.db.QueryRow("SELECT COUNT(*) FROM historic_runs WHERE test_name = $1", testName).Scan(&afterDeleteCount)
-	require.NoError(suite.T(), err)
-	assert.Equal(suite.T(), 0, afterDeleteCount, "Data should be deleted")
-
-	// Restore from backup (simplified)
-	for _, row := range backupData {
-		_, err = suite.db.Exec(`
-			INSERT INTO historic_runs (id, test_name, description, timestamp, avg_latency_ms, 
-			                          total_requests, total_errors, overall_error_rate, 
-			                          p95_latency_ms, p99_latency_ms, max_latency_ms, best_client,
-			                          config, performance_scores, full_results, environment) 
-			VALUES ($1, $2, $3, $4, $5, 0, 0, 0, 0, 0, 0, '', '{}', '{}', '{}', '{}')
-		`, row.ID, row.TestName, row.Description, row.Timestamp, row.AvgLatency)
-		require.NoError(suite.T(), err)
-	}
-
-	// Verify restore
-	var afterRestoreCount int
-	err = suite.db.QueryRow("SELECT COUNT(*) FROM historic_runs WHERE test_name = $1", testName).Scan(&afterRestoreCount)
-	require.NoError(suite.T(), err)
-	assert.Equal(suite.T(), originalCount, afterRestoreCount, "Restored data should match original count")
-
-	suite.logger.Info("Backup and restore test completed successfully")
-}
-
-// Run the testcontainers integration test suite
-func TestTestContainersIntegrationSuite(t *testing.T) {
-	// Skip if not enabled
-	if testing.Short() {
-		t.Skip("Skipping testcontainers integration tests in short mode")
-	}
-
-	if os.Getenv("TESTCONTAINERS_TESTS") != "1" {
-		t.Skip("Testcontainers integration tests not enabled. Set TESTCONTAINERS_TESTS=1 to run.")
-	}
-
-	suite.Run(t, new(TestContainersIntegrationSuite))
-}
diff --git a/runner/internal/sanitize/sanitize.go b/runner/internal/sanitize/sanitize.go
new file mode 100644
index 0000000..41d043f
--- /dev/null
+++ b/runner/internal/sanitize/sanitize.go
@@ -0,0 +1,51 @@
+// Package sanitize centralizes the small input-cleaning helpers that
+// the api, analysis, and storage packages share. Keeping them here
+// avoids an api -> analysis import cycle while letting non-api code
+// scrub user-controlled values before they reach logrus.
+package sanitize
+
+import (
+	"errors"
+	"strings"
+)
+
+const maxLogValueLen = 512
+
+// logSanitizer strips the bytes that enable log forgery (CR, LF, NUL)
+// plus the rest of the C0 control range and DEL. Defined as a single
+// strings.Replacer so CodeQL's go/log-injection query recognizes the
+// .Replace call as a sanitizer barrier without needing a custom
+// dataflow extension to cross our function boundary.
+var logSanitizer = strings.NewReplacer(
+	"\r", "", "\n", "", "\x00", "", "\x01", "", "\x02", "",
+	"\x03", "", "\x04", "", "\x05", "", "\x06", "", "\x07", "",
+	"\x08", "", "\x09", "", "\x0b", "", "\x0c", "", "\x0e", "",
+	"\x0f", "", "\x10", "", "\x11", "", "\x12", "", "\x13", "",
+	"\x14", "", "\x15", "", "\x16", "", "\x17", "", "\x18", "",
+	"\x19", "", "\x1a", "", "\x1b", "", "\x1c", "", "\x1d", "",
+	"\x1e", "", "\x1f", "", "\x7f", "",
+)
+
+// LogValue scrubs ASCII control characters (CR, LF, NUL, etc.) and
+// truncates to a fixed maximum length. Use it for fields that flow
+// from a request into a logger entry, even when an upstream validator
+// has already constrained the character class — CodeQL's taint
+// analysis does not cross package boundaries on its own.
+func LogValue(s string) string {
+	s = logSanitizer.Replace(s)
+	if len(s) > maxLogValueLen {
+		s = s[:maxLogValueLen] + "..."
+	}
+	return strings.TrimSpace(s)
+}
+
+// LogError returns an error whose message has been scrubbed of control
+// bytes. Use it whenever an upstream error message may quote a
+// user-controlled value (run IDs, baseline names, etc.) and is about to
+// reach logrus.WithError or another structured-log sink.
+func LogError(err error) error {
+	if err == nil {
+		return nil
+	}
+	return errors.New(LogValue(err.Error()))
+}
diff --git a/runner/internal/sanitize/sanitize_test.go b/runner/internal/sanitize/sanitize_test.go
new file mode 100644
index 0000000..6ef1676
--- /dev/null
+++ b/runner/internal/sanitize/sanitize_test.go
@@ -0,0 +1,58 @@
+package sanitize
+
+import (
+	"errors"
+	"strings"
+	"testing"
+)
+
+func TestLogValue(t *testing.T) {
+	t.Run("strips CR LF NUL TAB", func(t *testing.T) {
+		got := LogValue("hello\r\nworld\x00x\ty")
+		want := "helloworldxy"
+		if got != want {
+			t.Errorf("LogValue = %q, want %q", got, want)
+		}
+	})
+
+	t.Run("truncates long input", func(t *testing.T) {
+		input := strings.Repeat("a", maxLogValueLen+50)
+		got := LogValue(input)
+		if len(got) != maxLogValueLen+3 {
+			t.Errorf("unexpected length: %d", len(got))
+		}
+		if !strings.HasSuffix(got, "...") {
+			t.Errorf("expected truncation suffix, got %q", got[len(got)-5:])
+		}
+	})
+
+	t.Run("trims surrounding whitespace", func(t *testing.T) {
+		got := LogValue("  padded  ")
+		if got != "padded" {
+			t.Errorf("LogValue = %q, want %q", got, "padded")
+		}
+	})
+
+	t.Run("passes safe input through", func(t *testing.T) {
+		got := LogValue("legit-id_v2.0")
+		if got != "legit-id_v2.0" {
+			t.Errorf("LogValue = %q, want passthrough", got)
+		}
+	})
+}
+
+func TestLogError(t *testing.T) {
+	t.Run("scrubs CR LF from error message", func(t *testing.T) {
+		got := LogError(errors.New("baseline not found: ../bad\r\nINJECTED"))
+		want := "baseline not found: ../badINJECTED"
+		if got.Error() != want {
+			t.Errorf("LogError = %q, want %q", got.Error(), want)
+		}
+	})
+
+	t.Run("returns nil for nil input", func(t *testing.T) {
+		if LogError(nil) != nil {
+			t.Errorf("LogError(nil) should be nil")
+		}
+	})
+}
diff --git a/runner/metrics/results_collection.go b/runner/metrics/results_collection.go
index 2d5ffa4..bcf19ce 100644
--- a/runner/metrics/results_collection.go
+++ b/runner/metrics/results_collection.go
@@ -4,6 +4,7 @@ import (
 	"context"
 	"fmt"
 	"net/url"
+	"strconv"
 	"strings"
 	"time"
 
@@ -32,12 +33,10 @@ func collectPrometheusClientsMetrics(cfg *config.Config, timestamp time.Time, su
 			Name:              client.Name,
 			Methods:           make(map[string]types.MetricSummary, len(cfg.Calls)),
 			ConnectionMetrics: types.ConnectionMetrics{},
-			// TODO(post-merge): populate from k6 Prometheus labels (error_code, status).
-			// Tracked separately; not in scope for the develop->main merge.
-			ErrorTypes:    make(map[string]int64),
-			StatusCodes:   make(map[int]int64),
-			TotalRequests: 0,
-			TotalErrors:   0,
+			ErrorTypes:        make(map[string]int64),
+			StatusCodes:       make(map[int]int64),
+			TotalRequests:     0,
+			TotalErrors:       0,
 			Latency: types.MetricSummary{
 				Min: 9999999999,
 				Max: 0,
@@ -155,17 +154,23 @@ func collectPrometheusClientsMetrics(cfg *config.Config, timestamp time.Time, su
 				client.ConnectionMetrics.TCPHandshakeTime += milliseconds
 			}
 		} else if strings.EqualFold(string(metricName), "k6_http_reqs_total") { // Parse total requests metrics per tags
-			_, isError := sample.Metric["error_code"]
+			errorCode, isError := sample.Metric["error_code"]
 			method.Count += int64(metricValue)
 			if isError {
 				method.ErrorCount += int64(metricValue)
-				// Update rates with latest available values
 				method.ErrorRate = (float64(method.ErrorCount) / float64(method.Count)) * 100
+				if code := strings.TrimSpace(string(errorCode)); code != "" {
+					client.ErrorTypes[code] += int64(metricValue)
+				}
 			} else {
 				method.SuccessCount += int64(metricValue)
-				// Update rates with latest available values
 				method.SuccessRate = (float64(method.SuccessCount) / float64(method.Count)) * 100
 			}
+			if statusLabel, ok := sample.Metric["status"]; ok {
+				if code, err := strconv.Atoi(string(statusLabel)); err == nil {
+					client.StatusCodes[code] += int64(metricValue)
+				}
+			}
 		}
 		// Update method metrics
 		client.Methods[string(metricMethod)] = method
diff --git a/runner/metrics/summary_fallback.go b/runner/metrics/summary_fallback.go
index 51b95ef..c622cbb 100644
--- a/runner/metrics/summary_fallback.go
+++ b/runner/metrics/summary_fallback.go
@@ -114,56 +114,85 @@ func collectMissingPairs(clientsMetrics map[string]*types.ClientMetrics, cfg *co
 	return missing
 }
 
+// lookupSubmetric finds a k6 submetric value keyed on either tag ordering
+// (`{req_name:M,scenario:C}` or `{scenario:C,req_name:M}`). k6's tag order
+// is deterministic per version but we tolerate either form to avoid binding
+// the parser to a single upstream choice.
+func lookupSubmetric(s *k6Summary, base, clientName, methodName string) (k6MetricValue, bool) {
+	candidates := [2]string{
+		fmt.Sprintf("%s{req_name:%s,scenario:%s}", base, methodName, clientName),
+		fmt.Sprintf("%s{scenario:%s,req_name:%s}", base, clientName, methodName),
+	}
+	for _, k := range candidates {
+		if v, ok := s.Metrics[k]; ok {
+			return v, true
+		}
+	}
+	return k6MetricValue{}, false
+}
+
+// metricFloat returns the float keyed under `name` in `values`, or 0 if
+// `values` is nil or the key is absent. k6's `--summary-export` serializes
+// numeric aggregates under both top-level fields (Avg, P95, ...) and a
+// generic `values` map; prefer the explicit field, fall back to `values`.
+func metricFloat(v k6MetricValue, valueKey string) float64 {
+	if f, ok := v.Values[valueKey]; ok {
+		return f
+	}
+	return 0
+}
+
 func extractMethodFromSummary(s *k6Summary, clientName, methodName string) *types.MetricSummary {
 	if s == nil {
 		return nil
 	}
-	callsKey := fmt.Sprintf("client_%s_method_calls_%s", clientName, methodName)
-	latencyKey := fmt.Sprintf("client_%s_method_latency_%s", clientName, methodName)
-	errorsKey := fmt.Sprintf("client_%s_method_errors_%s", clientName, methodName)
-	successKey := fmt.Sprintf("client_%s_method_success_%s", clientName, methodName)
 
-	calls, hasCalls := s.Metrics[callsKey]
-	latency, hasLatency := s.Metrics[latencyKey]
-	if !hasCalls && !hasLatency {
+	duration, hasDuration := lookupSubmetric(s, "http_req_duration", clientName, methodName)
+	reqs, hasReqs := lookupSubmetric(s, "http_reqs", clientName, methodName)
+	if !hasDuration && !hasReqs {
 		return nil
 	}
 
 	method := types.MetricSummary{}
-	if hasCalls {
-		method.Count = calls.Count
-	}
-	if hasLatency {
-		method.Min = latency.Min
-		method.Max = latency.Max
-		method.Avg = latency.Avg
-		method.P50 = latency.Med
-		method.P90 = latency.P90
-		method.P95 = latency.P95
-		method.P99 = latency.P99
-		method.StdDev = (latency.Max - latency.Min) / 4
+
+	if hasDuration {
+		method.Min = pickFloat(duration.Min, metricFloat(duration, "min"))
+		method.Max = pickFloat(duration.Max, metricFloat(duration, "max"))
+		method.Avg = pickFloat(duration.Avg, metricFloat(duration, "avg"))
+		method.P50 = pickFloat(duration.Med, metricFloat(duration, "med"))
+		method.P90 = pickFloat(duration.P90, metricFloat(duration, "p(90)"))
+		method.P95 = pickFloat(duration.P95, metricFloat(duration, "p(95)"))
+		method.P99 = pickFloat(duration.P99, metricFloat(duration, "p(99)"))
+		method.StdDev = (method.Max - method.Min) / 4
 		if method.Avg > 0 {
 			method.CoeffVar = (method.StdDev / method.Avg) * 100
 		}
 	}
-	if errors, ok := s.Metrics[errorsKey]; ok {
-		method.ErrorCount = errors.Count
-	}
-	if success, ok := s.Metrics[successKey]; ok {
-		method.SuccessCount = success.Count
+	if hasReqs {
+		if reqs.Count > 0 {
+			method.Count = reqs.Count
+		} else if c, ok := reqs.Values["count"]; ok {
+			method.Count = int64(c)
+		}
 	}
 
-	if method.Count > 0 {
-		if method.SuccessCount == 0 && method.ErrorCount > 0 {
-			method.SuccessCount = method.Count - method.ErrorCount
-		} else if method.ErrorCount == 0 && method.SuccessCount > 0 {
-			method.ErrorCount = method.Count - method.SuccessCount
-		}
-		if method.ErrorCount > 0 {
-			method.ErrorRate = float64(method.ErrorCount) / float64(method.Count) * 100
-		}
+	if failed, ok := lookupSubmetric(s, "http_req_failed", clientName, methodName); ok && method.Count > 0 {
+		failRate := pickFloat(failed.Rate, metricFloat(failed, "rate"))
+		method.ErrorCount = int64(float64(method.Count)*failRate + 0.5)
+		method.SuccessCount = method.Count - method.ErrorCount
+		method.ErrorRate = failRate * 100
 		method.SuccessRate = 100.0 - method.ErrorRate
+	} else if method.Count > 0 {
+		method.SuccessCount = method.Count
+		method.SuccessRate = 100.0
 	}
 
 	return &method
 }
+
+func pickFloat(primary, fallback float64) float64 {
+	if primary != 0 {
+		return primary
+	}
+	return fallback
+}
diff --git a/runner/metrics/summary_fallback_test.go b/runner/metrics/summary_fallback_test.go
index 654e81d..d37cba2 100644
--- a/runner/metrics/summary_fallback_test.go
+++ b/runner/metrics/summary_fallback_test.go
@@ -97,10 +97,11 @@ func TestApplySummaryFallback_FillsMissingPair(t *testing.T) {
 
 	dir := t.TempDir()
 	path := writeSummary(t, dir, map[string]k6MetricValue{
-		"client_geth_method_calls_eth_blockNumber":   {Count: 250},
-		"client_geth_method_latency_eth_blockNumber": {Avg: 12.5, Min: 1, Max: 100, Med: 8, P90: 25, P95: 35, P99: 80},
-		"client_geth_method_errors_eth_blockNumber":  {Count: 5},
-		"client_geth_method_success_eth_blockNumber": {Count: 245},
+		"http_req_duration{req_name:eth_blockNumber,scenario:geth}": {
+			Avg: 12.5, Min: 1, Max: 100, Med: 8, P90: 25, P95: 35, P99: 80,
+		},
+		"http_reqs{req_name:eth_blockNumber,scenario:geth}":       {Count: 250},
+		"http_req_failed{req_name:eth_blockNumber,scenario:geth}": {Rate: 0.02},
 	})
 
 	logger, buf := makeLogger()
diff --git a/runner/p99_integration_test.go b/runner/p99_integration_test.go
index a965430..029cdde 100644
--- a/runner/p99_integration_test.go
+++ b/runner/p99_integration_test.go
@@ -12,6 +12,7 @@ import (
 
 	"github.com/gorilla/mux"
 	_ "github.com/lib/pq"
+	"github.com/sirupsen/logrus"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 
@@ -37,7 +38,7 @@ func TestP99DataFlow(t *testing.T) {
 	defer db.Close()
 
 	// Ensure benchmark_metrics table exists
-	err := storage.RunMigrations(db)
+	err := storage.RunMigrations(db, logrus.New())
 	require.NoError(t, err, "Failed to run migrations")
 
 	// Create test handler function that uses the database
diff --git a/runner/storage/historic.go b/runner/storage/historic.go
index b25942f..dcdd597 100644
--- a/runner/storage/historic.go
+++ b/runner/storage/historic.go
@@ -14,6 +14,7 @@ import (
 	"github.com/sirupsen/logrus"
 
 	"github.com/jsonrpc-bench/runner/config"
+	"github.com/jsonrpc-bench/runner/internal/sanitize"
 	"github.com/jsonrpc-bench/runner/types"
 )
 
@@ -498,26 +499,349 @@ func (h *HistoricStorage) ListHistoricRuns(ctx context.Context, filter types.Run
 	return h.db.ListRuns(filter)
 }
 
-// GetHistoricTrends retrieves historic trend data.
+// GetHistoricTrends returns one TrendData per top-level run metric
+// (avg_latency, p95_latency, success_rate, total_requests) over the
+// runs matching filter.TestName / GitBranch / Since / Until.
 //
-// Aggregated trend computation is tracked as a post-merge follow-up
-// (see POST_MERGE_FOLLOWUPS.md). Until then we return an empty slice
-// rather than an error so callers degrade to "no trend data" instead
-// of failing the request.
+// Each TrendData carries the raw per-run samples in chronological
+// order; the caller (analysis package, dashboard, etc.) is responsible
+// for any further smoothing. PercentChange is computed first-to-last
+// and Direction is derived from a 1% threshold so flat series read as
+// "stable" rather than spuriously labeled improving/degrading.
 func (h *HistoricStorage) GetHistoricTrends(ctx context.Context, filter types.TrendFilter) ([]*types.TrendData, error) {
-	return []*types.TrendData{}, nil
+	query := `SELECT id, timestamp, git_commit, total_requests, success_rate, avg_latency, p95_latency
+		FROM benchmark_runs WHERE 1=1`
+	args := []interface{}{}
+	idx := 1
+	if filter.TestName != "" {
+		query += fmt.Sprintf(" AND test_name = $%d", idx)
+		args = append(args, filter.TestName)
+		idx++
+	}
+	if filter.GitBranch != "" {
+		query += fmt.Sprintf(" AND git_branch = $%d", idx)
+		args = append(args, filter.GitBranch)
+		idx++
+	}
+	if !filter.Since.IsZero() {
+		query += fmt.Sprintf(" AND timestamp >= $%d", idx)
+		args = append(args, filter.Since)
+		idx++
+	}
+	if !filter.Until.IsZero() {
+		query += fmt.Sprintf(" AND timestamp <= $%d", idx)
+		args = append(args, filter.Until)
+	}
+	query += " ORDER BY timestamp ASC"
+
+	rows, err := h.db.db.QueryContext(ctx, query, args...)
+	if err != nil {
+		return nil, fmt.Errorf("query benchmark_runs: %w", err)
+	}
+	defer rows.Close()
+
+	type sample struct {
+		ts            time.Time
+		runID         string
+		gitCommit     string
+		totalRequests float64
+		successRate   float64
+		avgLatency    float64
+		p95Latency    float64
+	}
+	var samples []sample
+	for rows.Next() {
+		var s sample
+		var totalRequests int64
+		if err := rows.Scan(&s.runID, &s.ts, &s.gitCommit, &totalRequests, &s.successRate, &s.avgLatency, &s.p95Latency); err != nil {
+			return nil, fmt.Errorf("scan trend row: %w", err)
+		}
+		s.totalRequests = float64(totalRequests)
+		samples = append(samples, s)
+	}
+	if err := rows.Err(); err != nil {
+		return nil, fmt.Errorf("iterate trend rows: %w", err)
+	}
+
+	if len(samples) == 0 {
+		return []*types.TrendData{}, nil
+	}
+
+	metrics := []struct {
+		name   string
+		select_ func(sample) float64
+	}{
+		{"avg_latency", func(s sample) float64 { return s.avgLatency }},
+		{"p95_latency", func(s sample) float64 { return s.p95Latency }},
+		{"success_rate", func(s sample) float64 { return s.successRate }},
+		{"total_requests", func(s sample) float64 { return s.totalRequests }},
+	}
+
+	period := filter.Interval
+	if period == "" {
+		period = "raw"
+	}
+
+	result := make([]*types.TrendData, 0, len(metrics))
+	for _, m := range metrics {
+		points := make([]types.TrendPoint, 0, len(samples))
+		for _, s := range samples {
+			points = append(points, types.TrendPoint{
+				Timestamp: s.ts,
+				Value:     m.select_(s),
+				RunID:     s.runID,
+				GitCommit: s.gitCommit,
+			})
+		}
+		first, last := points[0].Value, points[len(points)-1].Value
+		var pctChange float64
+		if first != 0 {
+			pctChange = (last - first) / first * 100
+		}
+		direction := "stable"
+		if pctChange > 1 {
+			direction = trendDirectionFor(m.name, true)
+		} else if pctChange < -1 {
+			direction = trendDirectionFor(m.name, false)
+		}
+		result = append(result, &types.TrendData{
+			Period:        period,
+			TrendPoints:   points,
+			Direction:     direction,
+			PercentChange: pctChange,
+		})
+	}
+	return result, nil
 }
 
-// DeleteHistoricRun deletes a historic run (placeholder implementation)
+// trendDirectionFor maps an absolute change direction onto the
+// improving/degrading semantics the dashboard wants. Latency going up
+// is bad; success rate going up is good. metricUp == true means the
+// last sample is larger than the first.
+func trendDirectionFor(metric string, metricUp bool) string {
+	higherIsBetter := metric == "success_rate" || metric == "total_requests"
+	if metricUp == higherIsBetter {
+		return "improving"
+	}
+	return "degrading"
+}
+
+// DeleteHistoricRun removes a run row and best-effort cleans up its
+// per-run files under basePath. The caller (HandleDeleteRun) maps a
+// "run not found" error to HTTP 404; everything else is a 500.
 func (h *HistoricStorage) DeleteHistoricRun(ctx context.Context, runID string) error {
-	// Placeholder implementation
-	return fmt.Errorf("not implemented")
+	tx, err := h.db.db.BeginTx(ctx, nil)
+	if err != nil {
+		return fmt.Errorf("begin tx: %w", err)
+	}
+	defer tx.Rollback()
+
+	// Drop dependent rows first so the FK-less per-metric table doesn't
+	// orphan samples whose run has gone away.
+	if _, err := tx.ExecContext(ctx, `DELETE FROM benchmark_metrics WHERE run_id = $1`, runID); err != nil {
+		return fmt.Errorf("delete benchmark_metrics: %w", err)
+	}
+
+	res, err := tx.ExecContext(ctx, `DELETE FROM benchmark_runs WHERE id = $1`, runID)
+	if err != nil {
+		return fmt.Errorf("delete benchmark_runs: %w", err)
+	}
+	rows, err := res.RowsAffected()
+	if err != nil {
+		return fmt.Errorf("rows affected: %w", err)
+	}
+	if rows == 0 {
+		return fmt.Errorf("run not found: %s", runID)
+	}
+
+	if err := tx.Commit(); err != nil {
+		return fmt.Errorf("commit: %w", err)
+	}
+
+	if h.basePath != "" {
+		runDir := filepath.Join(h.basePath, runID)
+		if err := os.RemoveAll(runDir); err != nil && !os.IsNotExist(err) {
+			h.log.WithError(sanitize.LogError(err)).WithField("run_dir", sanitize.LogValue(runDir)).Warn("Failed to remove run directory")
+		}
+	}
+	return nil
 }
 
-// CompareRuns compares two historic runs (placeholder implementation)
+// CompareRuns produces a BaselineComparison-shaped diff between two
+// runs: runID1 is treated as the baseline (older / reference) and
+// runID2 as the current run under analysis. Per-client/per-method
+// average-latency, p95, and success-rate deltas drive the regression /
+// improvement lists. The CurrentRun slot is filled from the parsed
+// `full_results` blob so the React dashboard sees the same shape it
+// gets from a fresh in-memory comparison.
 func (h *HistoricStorage) CompareRuns(ctx context.Context, runID1, runID2 string) (*types.BaselineComparison, error) {
-	// Placeholder implementation
-	return &types.BaselineComparison{}, fmt.Errorf("not implemented")
+	baselineRun, err := h.db.GetRun(runID1)
+	if err != nil {
+		return nil, fmt.Errorf("get baseline run %s: %w", runID1, err)
+	}
+	currentRun, err := h.db.GetRun(runID2)
+	if err != nil {
+		return nil, fmt.Errorf("get current run %s: %w", runID2, err)
+	}
+
+	current := &types.BenchmarkResult{}
+	if len(currentRun.FullResults) > 0 {
+		if err := json.Unmarshal(currentRun.FullResults, current); err != nil {
+			h.log.WithError(sanitize.LogError(err)).WithField("run_id", sanitize.LogValue(runID2)).Warn("Failed to parse full_results; comparison will use top-level metrics only")
+			current = nil
+		}
+	}
+
+	baseline := &types.BenchmarkResult{}
+	if len(baselineRun.FullResults) > 0 {
+		if err := json.Unmarshal(baselineRun.FullResults, baseline); err != nil {
+			h.log.WithError(sanitize.LogError(err)).WithField("run_id", sanitize.LogValue(runID1)).Warn("Failed to parse baseline full_results; comparison will use top-level metrics only")
+			baseline = nil
+		}
+	}
+
+	comparison := &types.BaselineComparison{
+		BaselineRun: baselineRun,
+		CurrentRun:  current,
+	}
+
+	if baseline == nil || current == nil {
+		// Fall back to the top-level aggregate diff so the API still has
+		// something meaningful to return when full_results is missing.
+		comparison.Summary = summarizeAggregateDiff(baselineRun, currentRun)
+		return comparison, nil
+	}
+
+	regressions, improvements := diffClientMetrics(baseline.ClientMetrics, current.ClientMetrics, runID2, runID1)
+	comparison.Regressions = regressions
+	comparison.Improvements = improvements
+	comparison.Summary = fmt.Sprintf("Compared %s -> %s: %d regression(s), %d improvement(s)",
+		runID1, runID2, len(regressions), len(improvements))
+	return comparison, nil
+}
+
+// summarizeAggregateDiff is the fallback summary used when one or both
+// FullResults blobs are missing or malformed.
+func summarizeAggregateDiff(base, curr *types.HistoricRun) string {
+	avgDelta := pctDelta(base.AvgLatency, curr.AvgLatency)
+	p95Delta := pctDelta(base.P95Latency, curr.P95Latency)
+	succDelta := curr.SuccessRate - base.SuccessRate
+	return fmt.Sprintf("avg_latency %+.2f%%, p95_latency %+.2f%%, success_rate %+.2f pp",
+		avgDelta, p95Delta, succDelta)
+}
+
+func pctDelta(base, curr float64) float64 {
+	if base == 0 {
+		return 0
+	}
+	return (curr - base) / base * 100
+}
+
+// diffClientMetrics walks the per-client/per-method MetricSummary maps
+// and emits a Regression for any (client, method, metric) tuple where
+// the current value is materially worse than the baseline. The 5%
+// threshold matches the analysis package's RegressionThresholds.LatencyThreshold
+// default and the 1pp threshold for success rate matches the API
+// contract.
+func diffClientMetrics(base, curr map[string]*types.ClientMetrics, runID, baselineRunID string) ([]types.Regression, []types.Improvement) {
+	var regressions []types.Regression
+	var improvements []types.Improvement
+	now := time.Now()
+
+	for clientName, currClient := range curr {
+		baseClient, ok := base[clientName]
+		if !ok {
+			continue
+		}
+		for methodName, currMethod := range currClient.Methods {
+			baseMethod, ok := baseClient.Methods[methodName]
+			if !ok {
+				continue
+			}
+
+			for _, m := range []struct {
+				name        string
+				base, curr  float64
+				higherWorse bool
+				threshold   float64
+			}{
+				{"avg_latency", baseMethod.Avg, currMethod.Avg, true, 5},
+				{"p95_latency", baseMethod.P95, currMethod.P95, true, 5},
+				{"success_rate", baseMethod.SuccessRate, currMethod.SuccessRate, false, 1},
+			} {
+				delta := m.curr - m.base
+				pct := pctDelta(m.base, m.curr)
+				worse := (m.higherWorse && pct > m.threshold) || (!m.higherWorse && delta < -m.threshold)
+				better := (m.higherWorse && pct < -m.threshold) || (!m.higherWorse && delta > m.threshold)
+
+				if worse {
+					regressions = append(regressions, types.Regression{
+						ID:             fmt.Sprintf("%s-%s-%s-%s", runID, clientName, methodName, m.name),
+						RunID:          runID,
+						BaselineRunID:  baselineRunID,
+						Client:         clientName,
+						Method:         methodName,
+						Metric:         m.name,
+						BaselineValue:  m.base,
+						CurrentValue:   m.curr,
+						AbsoluteChange: delta,
+						PercentChange:  pct,
+						Severity:       severityFor(pct, m.higherWorse),
+						IsSignificant:  true,
+						DetectedAt:     now,
+					})
+				} else if better {
+					improvements = append(improvements, types.Improvement{
+						ID:             fmt.Sprintf("%s-%s-%s-%s", runID, clientName, methodName, m.name),
+						RunID:          runID,
+						BaselineRunID:  baselineRunID,
+						Client:         clientName,
+						Method:         methodName,
+						Metric:         m.name,
+						BaselineValue:  m.base,
+						CurrentValue:   m.curr,
+						AbsoluteChange: delta,
+						PercentChange:  pct,
+						Significance:   significanceFor(pct, m.higherWorse),
+						DetectedAt:     now,
+					})
+				}
+			}
+		}
+	}
+	return regressions, improvements
+}
+
+func severityFor(pct float64, higherIsWorse bool) string {
+	mag := pct
+	if !higherIsWorse {
+		mag = -pct
+	}
+	switch {
+	case mag > 25:
+		return "critical"
+	case mag > 15:
+		return "high"
+	case mag > 5:
+		return "medium"
+	default:
+		return "low"
+	}
+}
+
+func significanceFor(pct float64, higherIsWorse bool) string {
+	mag := pct
+	if higherIsWorse {
+		mag = -pct
+	}
+	switch {
+	case mag > 20:
+		return "significant"
+	case mag > 10:
+		return "major"
+	default:
+		return "minor"
+	}
 }
 
 // GetHistoricSummary aggregates a per-test historic snapshot:
diff --git a/runner/storage/historic_test.go b/runner/storage/historic_test.go
deleted file mode 100644
index 7769d4f..0000000
--- a/runner/storage/historic_test.go
+++ /dev/null
@@ -1,886 +0,0 @@
-package storage
-
-import (
-	"context"
-	"database/sql"
-	"encoding/json"
-	"fmt"
-	"io/ioutil"
-	"os"
-	"os/exec"
-	"path/filepath"
-	"strings"
-	"testing"
-	"time"
-
-	"github.com/sirupsen/logrus"
-	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/mock"
-	"github.com/stretchr/testify/require"
-	"github.com/stretchr/testify/suite"
-	"github.com/testcontainers/testcontainers-go"
-	"github.com/testcontainers/testcontainers-go/modules/postgres"
-	"github.com/testcontainers/testcontainers-go/wait"
-
-	"github.com/jsonrpc-bench/runner/config"
-	"github.com/jsonrpc-bench/runner/types"
-)
-
-// MockGitExecutor allows mocking git commands for testing
-type MockGitExecutor struct {
-	mock.Mock
-}
-
-func (m *MockGitExecutor) ExecCommand(name string, args ...string) ([]byte, error) {
-	arguments := m.Called(name, args)
-	return arguments.Get(0).([]byte), arguments.Error(1)
-}
-
-// GitExecutor interface for dependency injection
-type GitExecutor interface {
-	ExecCommand(name string, args ...string) ([]byte, error)
-}
-
-// RealGitExecutor implements GitExecutor using actual git commands
-type RealGitExecutor struct{}
-
-func (r *RealGitExecutor) ExecCommand(name string, args ...string) ([]byte, error) {
-	return exec.Command(name, args...).Output()
-}
-
-// HistoricStorageTestSuite provides comprehensive tests for historic storage
-type HistoricStorageTestSuite struct {
-	suite.Suite
-	container     *postgres.PostgresContainer
-	db            *sql.DB
-	storage       HistoricStorage
-	storageImpl   *historicStorage
-	ctx           context.Context
-	logger        logrus.FieldLogger
-	tempDir       string
-	storageConfig *config.StorageConfig
-	gitExecutor   *MockGitExecutor
-}
-
-// SetupSuite initializes the test environment
-func (suite *HistoricStorageTestSuite) SetupSuite() {
-	suite.ctx = context.Background()
-	suite.logger = logrus.New().WithField("test", "historic_storage")
-
-	// Create temporary directory for file storage
-	tempDir, err := ioutil.TempDir("", "historic_test_*")
-	require.NoError(suite.T(), err)
-	suite.tempDir = tempDir
-
-	// Setup PostgreSQL container
-	pgContainer, err := postgres.RunContainer(suite.ctx,
-		testcontainers.WithImage("postgres:15-alpine"),
-		postgres.WithDatabase("testdb"),
-		postgres.WithUsername("testuser"),
-		postgres.WithPassword("testpass"),
-		testcontainers.WithWaitStrategy(
-			wait.ForLog("database system is ready to accept connections").
-				WithOccurrence(2).
-				WithStartupTimeout(30*time.Second)),
-	)
-	require.NoError(suite.T(), err)
-	suite.container = pgContainer
-
-	// Setup database connection
-	mappedPort, err := pgContainer.MappedPort(suite.ctx, "5432")
-	require.NoError(suite.T(), err)
-
-	connStr := fmt.Sprintf("host=localhost port=%d user=testuser password=testpass dbname=testdb sslmode=disable",
-		mappedPort.Int())
-	db, err := sql.Open("postgres", connStr)
-	require.NoError(suite.T(), err)
-	suite.db = db
-
-	// Run migrations
-	migrationService := NewMigrationService(db, suite.logger)
-	err = migrationService.Up()
-	require.NoError(suite.T(), err)
-
-	// Setup storage config
-	suite.storageConfig = &config.StorageConfig{
-		EnableHistoric: true,
-		HistoricPath:   filepath.Join(suite.tempDir, "historic"),
-		RetentionDays:  30,
-	}
-
-	// Create historic storage
-	storage := NewHistoricStorage(db, suite.storageConfig, suite.logger)
-	suite.storage = storage
-	suite.storageImpl = storage.(*historicStorage)
-
-	// Setup git executor mock
-	suite.gitExecutor = new(MockGitExecutor)
-
-	// Start storage
-	err = suite.storage.Start(suite.ctx)
-	require.NoError(suite.T(), err)
-}
-
-// TearDownSuite cleans up test resources
-func (suite *HistoricStorageTestSuite) TearDownSuite() {
-	if suite.storage != nil {
-		suite.storage.Stop()
-	}
-	if suite.db != nil {
-		suite.db.Close()
-	}
-	if suite.container != nil {
-		suite.container.Terminate(suite.ctx)
-	}
-	if suite.tempDir != "" {
-		os.RemoveAll(suite.tempDir)
-	}
-}
-
-// SetupTest prepares clean state for each test
-func (suite *HistoricStorageTestSuite) SetupTest() {
-	// Clean database tables
-	tables := []string{
-		"regressions",
-		"baselines",
-		"historic_runs",
-	}
-
-	for _, table := range tables {
-		_, err := suite.db.Exec(fmt.Sprintf("TRUNCATE TABLE %s CASCADE", table))
-		require.NoError(suite.T(), err)
-	}
-
-	// Clean file system
-	historicPath := suite.storageConfig.HistoricPath
-	if _, err := os.Stat(historicPath); err == nil {
-		entries, err := ioutil.ReadDir(historicPath)
-		require.NoError(suite.T(), err)
-		for _, entry := range entries {
-			os.RemoveAll(filepath.Join(historicPath, entry.Name()))
-		}
-	}
-
-	// Reset git executor mock
-	suite.gitExecutor = new(MockGitExecutor)
-}
-
-// TestHistoricStorageInitialization tests storage initialization
-func (suite *HistoricStorageTestSuite) TestHistoricStorageInitialization() {
-	t := suite.T()
-
-	// Test that historic directory was created
-	assert.DirExists(t, suite.storageConfig.HistoricPath)
-
-	// Test with disabled historic storage
-	disabledConfig := &config.StorageConfig{
-		EnableHistoric: false,
-	}
-	disabledStorage := NewHistoricStorage(suite.db, disabledConfig, suite.logger)
-	err := disabledStorage.Start(suite.ctx)
-	assert.NoError(t, err)
-}
-
-// TestSaveHistoricRunBasic tests basic historic run saving
-func (suite *HistoricStorageTestSuite) TestSaveHistoricRunBasic() {
-	t := suite.T()
-
-	// Setup git mocks
-	suite.gitExecutor.On("ExecCommand", "git", []string{"rev-parse", "HEAD"}).
-		Return([]byte("abc123def456\n"), nil)
-	suite.gitExecutor.On("ExecCommand", "git", []string{"rev-parse", "--abbrev-ref", "HEAD"}).
-		Return([]byte("main\n"), nil)
-
-	result := createTestBenchmarkResult("basic_historic_test")
-
-	savedRun, err := suite.storage.SaveHistoricRun(suite.ctx, result)
-	assert.NoError(t, err)
-	assert.NotNil(t, savedRun)
-	assert.NotEmpty(t, savedRun.ID)
-	assert.Equal(t, "basic_historic_test", savedRun.TestName)
-	assert.Equal(t, 1, savedRun.ClientsCount)
-	assert.Greater(t, savedRun.TotalRequests, int64(0))
-
-	// Verify database insertion
-	var count int
-	err = suite.db.QueryRow("SELECT COUNT(*) FROM historic_runs").Scan(&count)
-	assert.NoError(t, err)
-	assert.Equal(t, 1, count)
-}
-
-// TestSaveHistoricRunWithFiles tests file saving functionality
-func (suite *HistoricStorageTestSuite) TestSaveHistoricRunWithFiles() {
-	t := suite.T()
-
-	result := createTestBenchmarkResult("file_test")
-
-	savedRun, err := suite.storage.SaveHistoricRun(suite.ctx, result)
-	require.NoError(t, err)
-
-	// Check that files were saved
-	runDir := filepath.Join(suite.storageConfig.HistoricPath, savedRun.ID)
-	assert.DirExists(t, runDir)
-
-	// Check result.json exists and is valid
-	resultPath := filepath.Join(runDir, "result.json")
-	assert.FileExists(t, resultPath)
-
-	resultData, err := ioutil.ReadFile(resultPath)
-	assert.NoError(t, err)
-
-	var savedResult types.BenchmarkResult
-	err = json.Unmarshal(resultData, &savedResult)
-	assert.NoError(t, err)
-	assert.Equal(t, result.StartTime, savedResult.StartTime)
-
-	// Check metadata.json exists and is valid
-	metadataPath := filepath.Join(runDir, "metadata.json")
-	assert.FileExists(t, metadataPath)
-
-	metadataData, err := ioutil.ReadFile(metadataPath)
-	assert.NoError(t, err)
-
-	var metadata map[string]interface{}
-	err = json.Unmarshal(metadataData, &metadata)
-	assert.NoError(t, err)
-	assert.Equal(t, savedRun.ID, metadata["run_id"])
-	assert.Equal(t, "file_test", metadata["test_name"])
-}
-
-// TestGetHistoricRun tests retrieving historic runs
-func (suite *HistoricStorageTestSuite) TestGetHistoricRun() {
-	t := suite.T()
-
-	// Save a run first
-	result := createTestBenchmarkResult("get_test")
-	savedRun, err := suite.storage.SaveHistoricRun(suite.ctx, result)
-	require.NoError(t, err)
-
-	// Retrieve the run
-	retrievedRun, err := suite.storage.GetHistoricRun(suite.ctx, savedRun.ID)
-	assert.NoError(t, err)
-	assert.NotNil(t, retrievedRun)
-	assert.Equal(t, savedRun.ID, retrievedRun.ID)
-	assert.Equal(t, savedRun.TestName, retrievedRun.TestName)
-	assert.Equal(t, savedRun.TotalRequests, retrievedRun.TotalRequests)
-
-	// Test non-existent run
-	_, err = suite.storage.GetHistoricRun(suite.ctx, "nonexistent_id")
-	assert.Error(t, err)
-	assert.Contains(t, err.Error(), "historic run not found")
-}
-
-// TestListHistoricRuns tests listing historic runs with filtering
-func (suite *HistoricStorageTestSuite) TestListHistoricRuns() {
-	t := suite.T()
-
-	// Save multiple runs
-	testNames := []string{"list_test_1", "list_test_2", "other_test"}
-	savedRuns := make([]*types.HistoricRun, 0, len(testNames))
-
-	for _, testName := range testNames {
-		result := createTestBenchmarkResult(testName)
-		savedRun, err := suite.storage.SaveHistoricRun(suite.ctx, result)
-		require.NoError(t, err)
-		savedRuns = append(savedRuns, savedRun)
-	}
-
-	// List all runs
-	runs, err := suite.storage.ListHistoricRuns(suite.ctx, "", 10)
-	assert.NoError(t, err)
-	assert.Len(t, runs, 3)
-
-	// List runs with filter
-	runs, err = suite.storage.ListHistoricRuns(suite.ctx, "list_test_1", 10)
-	assert.NoError(t, err)
-	assert.Len(t, runs, 1)
-	assert.Equal(t, "list_test_1", runs[0].TestName)
-
-	// Test limit
-	runs, err = suite.storage.ListHistoricRuns(suite.ctx, "", 2)
-	assert.NoError(t, err)
-	assert.Len(t, runs, 2)
-
-	// Verify ordering (newest first)
-	runs, err = suite.storage.ListHistoricRuns(suite.ctx, "", 10)
-	assert.NoError(t, err)
-	for i := 1; i < len(runs); i++ {
-		assert.True(t, runs[i-1].Timestamp.After(runs[i].Timestamp) ||
-			runs[i-1].Timestamp.Equal(runs[i].Timestamp))
-	}
-}
-
-// TestDeleteHistoricRun tests deleting historic runs
-func (suite *HistoricStorageTestSuite) TestDeleteHistoricRun() {
-	t := suite.T()
-
-	// Save a run first
-	result := createTestBenchmarkResult("delete_test")
-	savedRun, err := suite.storage.SaveHistoricRun(suite.ctx, result)
-	require.NoError(t, err)
-
-	// Verify files exist
-	runDir := filepath.Join(suite.storageConfig.HistoricPath, savedRun.ID)
-	assert.DirExists(t, runDir)
-
-	// Delete the run
-	err = suite.storage.DeleteHistoricRun(suite.ctx, savedRun.ID)
-	assert.NoError(t, err)
-
-	// Verify database deletion
-	var count int
-	err = suite.db.QueryRow("SELECT COUNT(*) FROM historic_runs WHERE id = $1", savedRun.ID).Scan(&count)
-	assert.NoError(t, err)
-	assert.Equal(t, 0, count)
-
-	// Verify file deletion
-	assert.NoFileExists(t, runDir)
-
-	// Test deleting non-existent run (should not error)
-	err = suite.storage.DeleteHistoricRun(suite.ctx, "nonexistent_id")
-	assert.NoError(t, err)
-}
-
-// TestGetHistoricTrends tests performance trend analysis
-func (suite *HistoricStorageTestSuite) TestGetHistoricTrends() {
-	t := suite.T()
-
-	// Create multiple runs over time to establish a trend
-	baseTime := time.Now().Add(-7 * 24 * time.Hour)
-
-	for i := 0; i < 5; i++ {
-		result := createTestBenchmarkResult("trend_test")
-
-		// Modify latency to create a trend
-		for clientName, metrics := range result.ClientMetrics {
-			metrics.Latency.P95 = 100.0 + float64(i*10) // Increasing latency trend
-			result.ClientMetrics[clientName] = metrics
-		}
-
-		// Adjust timestamp
-		result.StartTime = baseTime.Add(time.Duration(i) * 24 * time.Hour).Format(time.RFC3339)
-		result.EndTime = baseTime.Add(time.Duration(i)*24*time.Hour + 30*time.Minute).Format(time.RFC3339)
-
-		_, err := suite.storage.SaveHistoricRun(suite.ctx, result)
-		require.NoError(t, err)
-	}
-
-	// Get trend data
-	trend, err := suite.storage.GetHistoricTrends(suite.ctx, "trend_test", "geth", "p95_latency", 30)
-	assert.NoError(t, err)
-	assert.NotNil(t, trend)
-	assert.Equal(t, "trend_test", trend.TestName)
-	assert.Equal(t, "geth", trend.Client)
-	assert.Equal(t, "p95_latency", trend.Metric)
-	assert.Len(t, trend.Points, 5)
-
-	// Verify trend direction (should be degrading due to increasing latency)
-	assert.Equal(t, "degrading", trend.Trend)
-	assert.Greater(t, trend.TrendSlope, 0.0)
-}
-
-// TestCompareRuns tests historic run comparison
-func (suite *HistoricStorageTestSuite) TestCompareRuns() {
-	t := suite.T()
-
-	// Save two runs with different performance characteristics
-	result1 := createTestBenchmarkResult("compare_test")
-	result1.ClientMetrics["geth"].Latency.P95 = 100.0
-	result1.ClientMetrics["geth"].ErrorRate = 0.01
-	savedRun1, err := suite.storage.SaveHistoricRun(suite.ctx, result1)
-	require.NoError(t, err)
-
-	result2 := createTestBenchmarkResult("compare_test")
-	result2.ClientMetrics["geth"].Latency.P95 = 120.0 // 20% worse
-	result2.ClientMetrics["geth"].ErrorRate = 0.015   // 50% worse
-	result2.StartTime = time.Now().Add(-1 * time.Hour).Format(time.RFC3339)
-	result2.EndTime = time.Now().Add(-30 * time.Minute).Format(time.RFC3339)
-	savedRun2, err := suite.storage.SaveHistoricRun(suite.ctx, result2)
-	require.NoError(t, err)
-
-	// Compare the runs
-	comparison, err := suite.storage.CompareRuns(suite.ctx, savedRun1.ID, savedRun2.ID)
-	assert.NoError(t, err)
-	assert.NotNil(t, comparison)
-	assert.Equal(t, savedRun1.ID, comparison.RunID1)
-	assert.Equal(t, savedRun2.ID, comparison.RunID2)
-	assert.Contains(t, comparison.ClientChanges, "overall")
-
-	// Verify comparison detected degradation
-	overallChange := comparison.ClientChanges["overall"]
-	assert.Equal(t, "degraded", overallChange.Status)
-	assert.Greater(t, overallChange.P95LatencyChange, 15.0) // Should be around 20%
-}
-
-// TestGetHistoricSummary tests historic summary generation
-func (suite *HistoricStorageTestSuite) TestGetHistoricSummary() {
-	t := suite.T()
-
-	// Save multiple runs for the same test
-	testName := "summary_test"
-	for i := 0; i < 5; i++ {
-		result := createTestBenchmarkResult(testName)
-
-		// Vary performance to create best/worst runs
-		latency := 50.0 + float64(i*20)
-		result.ClientMetrics["geth"].Latency.Avg = latency
-		result.ClientMetrics["geth"].Latency.P95 = latency * 1.5
-
-		result.StartTime = time.Now().Add(time.Duration(-i) * time.Hour).Format(time.RFC3339)
-		result.EndTime = time.Now().Add(time.Duration(-i)*time.Hour + 30*time.Minute).Format(time.RFC3339)
-
-		_, err := suite.storage.SaveHistoricRun(suite.ctx, result)
-		require.NoError(t, err)
-	}
-
-	// Get summary
-	summary, err := suite.storage.GetHistoricSummary(suite.ctx, testName)
-	assert.NoError(t, err)
-	assert.NotNil(t, summary)
-	assert.Equal(t, testName, summary.TestName)
-	assert.Equal(t, 5, summary.TotalRuns)
-	assert.NotEmpty(t, summary.BestRun.ID)
-	assert.NotEmpty(t, summary.WorstRun.ID)
-	assert.Len(t, summary.RecentRuns, 5)
-
-	// Verify best run has lowest latency
-	assert.Equal(t, 50.0, summary.BestRun.AvgLatency)
-	// Verify worst run has highest latency
-	assert.Equal(t, 130.0, summary.WorstRun.AvgLatency)
-}
-
-// TestSaveResultFiles tests file saving operations
-func (suite *HistoricStorageTestSuite) TestSaveResultFiles() {
-	t := suite.T()
-
-	result := createTestBenchmarkResult("files_test")
-	runID := "test_run_" + time.Now().Format("20060102_150405")
-
-	err := suite.storage.SaveResultFiles(suite.ctx, runID, result)
-	assert.NoError(t, err)
-
-	// Verify files were created
-	runDir := filepath.Join(suite.storageConfig.HistoricPath, runID)
-	assert.DirExists(t, runDir)
-
-	resultPath := filepath.Join(runDir, "result.json")
-	assert.FileExists(t, resultPath)
-
-	metadataPath := filepath.Join(runDir, "metadata.json")
-	assert.FileExists(t, metadataPath)
-
-	// Verify file contents
-	resultData, err := ioutil.ReadFile(resultPath)
-	assert.NoError(t, err)
-
-	var savedResult types.BenchmarkResult
-	err = json.Unmarshal(resultData, &savedResult)
-	assert.NoError(t, err)
-	assert.Equal(t, result.StartTime, savedResult.StartTime)
-}
-
-// TestSaveResultFilesDisabled tests file saving when disabled
-func (suite *HistoricStorageTestSuite) TestSaveResultFilesDisabled() {
-	t := suite.T()
-
-	// Create storage with disabled file saving
-	disabledConfig := &config.StorageConfig{
-		EnableHistoric: false,
-	}
-	disabledStorage := NewHistoricStorage(suite.db, disabledConfig, suite.logger)
-
-	result := createTestBenchmarkResult("disabled_test")
-	runID := "test_run_disabled"
-
-	err := disabledStorage.SaveResultFiles(suite.ctx, runID, result)
-	assert.NoError(t, err) // Should not error
-
-	// Verify no files were created
-	runDir := filepath.Join(suite.tempDir, "historic", runID)
-	assert.NoFileExists(t, runDir)
-}
-
-// TestGetResultFiles tests file retrieval
-func (suite *HistoricStorageTestSuite) TestGetResultFiles() {
-	t := suite.T()
-
-	result := createTestBenchmarkResult("get_files_test")
-	runID := "test_run_get_files"
-
-	// Save files first
-	err := suite.storage.SaveResultFiles(suite.ctx, runID, result)
-	require.NoError(t, err)
-
-	// Get files path
-	filesPath, err := suite.storage.GetResultFiles(suite.ctx, runID)
-	assert.NoError(t, err)
-	assert.Equal(t, filepath.Join(suite.storageConfig.HistoricPath, runID), filesPath)
-
-	// Test non-existent run
-	_, err = suite.storage.GetResultFiles(suite.ctx, "nonexistent")
-	assert.Error(t, err)
-	assert.Contains(t, err.Error(), "result files not found")
-}
-
-// TestCleanupOldFiles tests file cleanup functionality
-func (suite *HistoricStorageTestSuite) TestCleanupOldFiles() {
-	t := suite.T()
-
-	// Create test files with different ages
-	baseDir := suite.storageConfig.HistoricPath
-
-	// Recent file (should not be deleted)
-	recentDir := filepath.Join(baseDir, "recent_run")
-	err := os.MkdirAll(recentDir, 0755)
-	require.NoError(t, err)
-
-	// Old file (should be deleted)
-	oldDir := filepath.Join(baseDir, "old_run")
-	err = os.MkdirAll(oldDir, 0755)
-	require.NoError(t, err)
-
-	// Make old directory appear old by changing modification time
-	oldTime := time.Now().Add(-40 * 24 * time.Hour) // 40 days ago
-	err = os.Chtimes(oldDir, oldTime, oldTime)
-	require.NoError(t, err)
-
-	// Run cleanup
-	err = suite.storage.CleanupOldFiles(suite.ctx)
-	assert.NoError(t, err)
-
-	// Verify recent file still exists
-	assert.DirExists(t, recentDir)
-
-	// Verify old file was deleted
-	assert.NoDirExists(t, oldDir)
-}
-
-// TestCleanupOldFilesDisabled tests cleanup when retention is disabled
-func (suite *HistoricStorageTestSuite) TestCleanupOldFilesDisabled() {
-	t := suite.T()
-
-	// Create storage with retention disabled
-	disabledConfig := &config.StorageConfig{
-		EnableHistoric: true,
-		HistoricPath:   suite.storageConfig.HistoricPath,
-		RetentionDays:  0, // Disabled
-	}
-	disabledStorage := NewHistoricStorage(suite.db, disabledConfig, suite.logger)
-
-	err := disabledStorage.CleanupOldFiles(suite.ctx)
-	assert.NoError(t, err) // Should not error when disabled
-}
-
-// TestGitIntegration tests git information extraction
-func (suite *HistoricStorageTestSuite) TestGitIntegration() {
-	t := suite.T()
-
-	// This test assumes we're in a git repository
-	// In a real scenario, you'd mock the git commands
-	result := createTestBenchmarkResult("git_test")
-
-	savedRun, err := suite.storage.SaveHistoricRun(suite.ctx, result)
-	assert.NoError(t, err)
-
-	// The actual git commit and branch would be empty in tests
-	// unless mocked, which is fine for this basic test
-	assert.NotNil(t, savedRun)
-}
-
-// TestConcurrentAccess tests concurrent storage operations
-func (suite *HistoricStorageTestSuite) TestConcurrentAccess() {
-	t := suite.T()
-
-	concurrency := 5
-	results := make(chan error, concurrency)
-
-	// Start multiple goroutines saving historic runs
-	for i := 0; i < concurrency; i++ {
-		go func(id int) {
-			result := createTestBenchmarkResult(fmt.Sprintf("concurrent_test_%d", id))
-			_, err := suite.storage.SaveHistoricRun(suite.ctx, result)
-			results <- err
-		}(i)
-	}
-
-	// Collect results
-	for i := 0; i < concurrency; i++ {
-		err := <-results
-		assert.NoError(t, err)
-	}
-
-	// Verify all runs were saved
-	var count int
-	err := suite.db.QueryRow("SELECT COUNT(*) FROM historic_runs").Scan(&count)
-	assert.NoError(t, err)
-	assert.Equal(t, concurrency, count)
-}
-
-// TestErrorHandling tests various error scenarios
-func (suite *HistoricStorageTestSuite) TestErrorHandling() {
-	t := suite.T()
-
-	// Test with invalid database connection
-	invalidDB := &sql.DB{} // This will cause errors
-	invalidStorage := NewHistoricStorage(invalidDB, suite.storageConfig, suite.logger)
-
-	result := createTestBenchmarkResult("error_test")
-	_, err := invalidStorage.SaveHistoricRun(suite.ctx, result)
-	assert.Error(t, err)
-
-	// Test with context cancellation
-	cancelledCtx, cancel := context.WithCancel(suite.ctx)
-	cancel() // Cancel immediately
-
-	_, err = suite.storage.SaveHistoricRun(cancelledCtx, result)
-	assert.Error(t, err)
-	assert.Contains(t, err.Error(), "context canceled")
-}
-
-// TestLargeDataHandling tests handling of large benchmark results
-func (suite *HistoricStorageTestSuite) TestLargeDataHandling() {
-	t := suite.T()
-
-	result := createLargeBenchmarkResult()
-	result.Config.(map[string]interface{})["test_name"] = "large_historic_test"
-
-	savedRun, err := suite.storage.SaveHistoricRun(suite.ctx, result)
-	assert.NoError(t, err)
-	assert.NotNil(t, savedRun)
-
-	// Verify large data was saved correctly
-	retrievedRun, err := suite.storage.GetHistoricRun(suite.ctx, savedRun.ID)
-	assert.NoError(t, err)
-	assert.Equal(t, savedRun.ID, retrievedRun.ID)
-}
-
-// BenchmarkSaveHistoricRun benchmarks historic run saving
-func (suite *HistoricStorageTestSuite) BenchmarkSaveHistoricRun() {
-	b := suite.T()
-	if testing.Short() {
-		b.Skip("Skipping benchmark in short mode")
-	}
-
-	result := createTestBenchmarkResult("benchmark_historic_test")
-
-	// ResetTimer not available in test suite context
-
-	for i := 0; i < 50; i++ {
-		// Modify result to ensure unique run IDs
-		result.StartTime = time.Now().Add(time.Duration(i) * time.Second).Format(time.RFC3339)
-		result.Config.(map[string]interface{})["test_name"] = fmt.Sprintf("benchmark_test_%d", i)
-
-		_, err := suite.storage.SaveHistoricRun(suite.ctx, result)
-		require.NoError(b, err)
-	}
-}
-
-// BenchmarkGetHistoricTrends benchmarks trend analysis
-func (suite *HistoricStorageTestSuite) BenchmarkGetHistoricTrends() {
-	b := suite.T()
-	if testing.Short() {
-		b.Skip("Skipping benchmark in short mode")
-	}
-
-	// Setup test data
-	testName := "benchmark_trends"
-	for i := 0; i < 100; i++ {
-		result := createTestBenchmarkResult(testName)
-		result.StartTime = time.Now().Add(time.Duration(-i) * time.Hour).Format(time.RFC3339)
-		_, err := suite.storage.SaveHistoricRun(suite.ctx, result)
-		require.NoError(b, err)
-	}
-
-	// ResetTimer not available in test suite context
-
-	for i := 0; i < 50; i++ {
-		_, err := suite.storage.GetHistoricTrends(suite.ctx, testName, "geth", "p95_latency", 30)
-		require.NoError(b, err)
-	}
-}
-
-// Run the test suite
-func TestHistoricStorageTestSuite(t *testing.T) {
-	// Skip if running in CI without Docker
-	if os.Getenv("SKIP_INTEGRATION_TESTS") != "" {
-		t.Skip("Skipping integration tests")
-	}
-
-	suite.Run(t, new(HistoricStorageTestSuite))
-}
-
-// Unit tests for utility functions
-
-// TestGenerateHistoricRunID tests run ID generation
-func TestGenerateHistoricRunID(t *testing.T) {
-	result := createTestBenchmarkResult("test_name")
-	runID := generateHistoricRunID(result)
-
-	assert.Contains(t, runID, "test_name")
-	assert.Contains(t, runID, time.Now().Format("20060102"))
-	assert.True(t, len(runID) > len("test_name_20240101_120000"))
-}
-
-// TestExtractTestName tests test name extraction
-func TestExtractTestName(t *testing.T) {
-	result := &types.BenchmarkResult{
-		Config: map[string]interface{}{
-			"test_name": "my_test",
-		},
-	}
-
-	testName := extractTestName(result)
-	assert.Equal(t, "my_test", testName)
-
-	// Test with missing test_name
-	result.Config = map[string]interface{}{}
-	testName = extractTestName(result)
-	assert.Equal(t, "unknown", testName)
-
-	// Test with invalid config
-	result.Config = "not a map"
-	testName = extractTestName(result)
-	assert.Equal(t, "unknown", testName)
-}
-
-// TestExtractDescription tests description extraction
-func TestExtractDescription(t *testing.T) {
-	result := &types.BenchmarkResult{
-		Config: map[string]interface{}{
-			"description": "Test description",
-		},
-	}
-
-	description := extractDescription(result)
-	assert.Equal(t, "Test description", description)
-
-	// Test with missing description
-	result.Config = map[string]interface{}{}
-	description = extractDescription(result)
-	assert.Equal(t, "", description)
-}
-
-// TestExtractEndpointsCount tests endpoints count extraction
-func TestExtractEndpointsCount(t *testing.T) {
-	result := &types.BenchmarkResult{
-		Config: map[string]interface{}{
-			"endpoints": []interface{}{
-				map[string]interface{}{"method": "eth_blockNumber"},
-				map[string]interface{}{"method": "eth_getBalance"},
-			},
-		},
-	}
-
-	count := extractEndpointsCount(result)
-	assert.Equal(t, 2, count)
-
-	// Test with missing endpoints
-	result.Config = map[string]interface{}{}
-	count = extractEndpointsCount(result)
-	assert.Equal(t, 0, count)
-}
-
-// TestExtractTargetRPS tests RPS extraction
-func TestExtractTargetRPS(t *testing.T) {
-	// Test with float64 RPS
-	result := &types.BenchmarkResult{
-		Config: map[string]interface{}{
-			"rps": 100.5,
-		},
-	}
-
-	rps := extractTargetRPS(result)
-	assert.Equal(t, 100, rps)
-
-	// Test with int RPS
-	result.Config = map[string]interface{}{
-		"rps": 200,
-	}
-
-	rps = extractTargetRPS(result)
-	assert.Equal(t, 200, rps)
-
-	// Test with missing RPS
-	result.Config = map[string]interface{}{}
-	rps = extractTargetRPS(result)
-	assert.Equal(t, 0, rps)
-}
-
-// TestMustMarshalJSON tests JSON marshaling utility
-func TestMustMarshalJSON(t *testing.T) {
-	// Test valid object
-	obj := map[string]interface{}{
-		"key": "value",
-	}
-
-	result := mustMarshalJSON(obj)
-	assert.NotEqual(t, json.RawMessage("{}"), result)
-
-	// Test with function (which can't be marshaled)
-	invalidObj := map[string]interface{}{
-		"func": func() {},
-	}
-
-	result = mustMarshalJSON(invalidObj)
-	assert.Equal(t, json.RawMessage("{}"), result)
-}
-
-// TestTrendCalculations tests trend calculation logic
-func TestTrendCalculations(t *testing.T) {
-	storage := &historicStorage{
-		log: logrus.New().WithField("test", "trends"),
-	}
-
-	// Test with insufficient data
-	trend := &types.HistoricTrend{
-		Points: []types.TrendPoint{
-			{Value: 100.0},
-		},
-	}
-
-	storage.calculateTrendStatistics(trend)
-	assert.Equal(t, "insufficient_data", trend.Trend)
-
-	// Test with improving trend (decreasing values)
-	trend = &types.HistoricTrend{
-		Points: []types.TrendPoint{
-			{Value: 100.0},
-			{Value: 95.0},
-			{Value: 90.0},
-			{Value: 85.0},
-		},
-	}
-
-	storage.calculateTrendStatistics(trend)
-	assert.Equal(t, "improving", trend.Trend)
-	assert.Less(t, trend.TrendSlope, -0.01)
-
-	// Test with degrading trend (increasing values)
-	trend = &types.HistoricTrend{
-		Points: []types.TrendPoint{
-			{Value: 80.0},
-			{Value: 85.0},
-			{Value: 90.0},
-			{Value: 95.0},
-		},
-	}
-
-	storage.calculateTrendStatistics(trend)
-	assert.Equal(t, "degrading", trend.Trend)
-	assert.Greater(t, trend.TrendSlope, 0.01)
-
-	// Test with stable trend
-	trend = &types.HistoricTrend{
-		Points: []types.TrendPoint{
-			{Value: 90.0},
-			{Value: 90.1},
-			{Value: 89.9},
-			{Value: 90.0},
-		},
-	}
-
-	storage.calculateTrendStatistics(trend)
-	assert.Equal(t, "stable", trend.Trend)
-	assert.Less(t, trend.TrendSlope, 0.01)
-	assert.Greater(t, trend.TrendSlope, -0.01)
-}
diff --git a/runner/storage/migration_test.go b/runner/storage/migration_test.go
deleted file mode 100644
index 75612e7..0000000
--- a/runner/storage/migration_test.go
+++ /dev/null
@@ -1,1447 +0,0 @@
-package storage
-
-import (
-	"context"
-	"database/sql"
-	"fmt"
-	"os"
-	"strings"
-	"testing"
-	"time"
-
-	"github.com/sirupsen/logrus"
-	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/require"
-	"github.com/stretchr/testify/suite"
-	"github.com/testcontainers/testcontainers-go"
-	"github.com/testcontainers/testcontainers-go/modules/postgres"
-	"github.com/testcontainers/testcontainers-go/wait"
-)
-
-// MigrationTestSuite provides comprehensive tests for database migrations
-type MigrationTestSuite struct {
-	suite.Suite
-	container *postgres.PostgresContainer
-	db        *sql.DB
-	migration *MigrationService
-	ctx       context.Context
-	logger    logrus.FieldLogger
-}
-
-// SetupSuite initializes the test environment
-func (suite *MigrationTestSuite) SetupSuite() {
-	suite.ctx = context.Background()
-	suite.logger = logrus.New().WithField("test", "migration")
-
-	// Start PostgreSQL container
-	pgContainer, err := postgres.RunContainer(suite.ctx,
-		testcontainers.WithImage("postgres:15-alpine"),
-		postgres.WithDatabase("testdb"),
-		postgres.WithUsername("testuser"),
-		postgres.WithPassword("testpass"),
-		testcontainers.WithWaitStrategy(
-			wait.ForLog("database system is ready to accept connections").
-				WithOccurrence(2).
-				WithStartupTimeout(30*time.Second)),
-	)
-	require.NoError(suite.T(), err)
-	suite.container = pgContainer
-
-	// Setup database connection
-	mappedPort, err := pgContainer.MappedPort(suite.ctx, "5432")
-	require.NoError(suite.T(), err)
-
-	connStr := fmt.Sprintf("host=localhost port=%d user=testuser password=testpass dbname=testdb sslmode=disable",
-		mappedPort.Int())
-	db, err := sql.Open("postgres", connStr)
-	require.NoError(suite.T(), err)
-	suite.db = db
-
-	// Create migration service
-	suite.migration = NewMigrationService(db, suite.logger)
-}
-
-// TearDownSuite cleans up test resources
-func (suite *MigrationTestSuite) TearDownSuite() {
-	if suite.db != nil {
-		suite.db.Close()
-	}
-	if suite.container != nil {
-		suite.container.Terminate(suite.ctx)
-	}
-}
-
-// SetupTest prepares clean state for each test
-func (suite *MigrationTestSuite) SetupTest() {
-	// Drop all tables to start fresh
-	err := suite.migration.Reset()
-	require.NoError(suite.T(), err)
-
-	// Recreate migration service to ensure clean state
-	suite.migration = NewMigrationService(suite.db, suite.logger)
-}
-
-// TestMigrationServiceInitialization tests migration service creation
-func (suite *MigrationTestSuite) TestMigrationServiceInitialization() {
-	t := suite.T()
-
-	// Test successful initialization
-	err := suite.migration.Initialize()
-	assert.NoError(t, err)
-
-	// Verify schema_migrations table was created
-	var exists bool
-	err = suite.db.QueryRow(`
-		SELECT EXISTS (
-			SELECT FROM information_schema.tables 
-			WHERE table_schema = 'public' 
-			AND table_name = 'schema_migrations'
-		)`).Scan(&exists)
-	assert.NoError(t, err)
-	assert.True(t, exists)
-
-	// Test that initialize is idempotent
-	err = suite.migration.Initialize()
-	assert.NoError(t, err)
-}
-
-// TestGetAppliedMigrations tests retrieving applied migrations
-func (suite *MigrationTestSuite) TestGetAppliedMigrations() {
-	t := suite.T()
-
-	// Initialize and get applied migrations on fresh database
-	err := suite.migration.Initialize()
-	require.NoError(t, err)
-
-	applied, err := suite.migration.GetAppliedMigrations()
-	assert.NoError(t, err)
-	assert.NotNil(t, applied)
-	assert.Empty(t, applied) // Should be empty on fresh database
-
-	// Apply some migrations manually
-	_, err = suite.db.Exec("INSERT INTO schema_migrations (version, name) VALUES (1, 'test_migration_1')")
-	require.NoError(t, err)
-	_, err = suite.db.Exec("INSERT INTO schema_migrations (version, name) VALUES (3, 'test_migration_3')")
-	require.NoError(t, err)
-
-	// Get applied migrations again
-	applied, err = suite.migration.GetAppliedMigrations()
-	assert.NoError(t, err)
-	assert.True(t, applied[1])
-	assert.False(t, applied[2])
-	assert.True(t, applied[3])
-}
-
-// TestUpMigrations tests applying migrations
-func (suite *MigrationTestSuite) TestUpMigrations() {
-	t := suite.T()
-
-	// Apply all migrations
-	err := suite.migration.Up()
-	assert.NoError(t, err)
-
-	// Verify all expected tables were created
-	expectedTables := []string{
-		"benchmark_runs",
-		"benchmark_metrics",
-		"client_summary",
-		"method_summary",
-		"comparison_results",
-		"response_diffs",
-		"historic_runs",
-		"regressions",
-		"baselines",
-	}
-
-	for _, table := range expectedTables {
-		var exists bool
-		err = suite.db.QueryRow(`
-			SELECT EXISTS (
-				SELECT FROM information_schema.tables 
-				WHERE table_schema = 'public' 
-				AND table_name = $1
-			)`, table).Scan(&exists)
-		assert.NoError(t, err, "Failed to check table %s", table)
-		assert.True(t, exists, "Table %s should exist", table)
-	}
-
-	// Verify all migrations were recorded
-	applied, err := suite.migration.GetAppliedMigrations()
-	assert.NoError(t, err)
-	assert.Len(t, applied, len(migrations))
-
-	// Test idempotency - running Up again should not error
-	err = suite.migration.Up()
-	assert.NoError(t, err)
-}
-
-// TestUpMigrationsPartial tests applying migrations when some already exist
-func (suite *MigrationTestSuite) TestUpMigrationsPartial() {
-	t := suite.T()
-
-	// Initialize migration table
-	err := suite.migration.Initialize()
-	require.NoError(t, err)
-
-	// Manually apply first migration
-	migration := migrations[0]
-	err = suite.migration.applyMigration(migration)
-	require.NoError(t, err)
-
-	// Verify only first migration was applied
-	applied, err := suite.migration.GetAppliedMigrations()
-	assert.NoError(t, err)
-	assert.True(t, applied[1])
-	assert.False(t, applied[2])
-
-	// Apply all migrations
-	err = suite.migration.Up()
-	assert.NoError(t, err)
-
-	// Verify all migrations are now applied
-	applied, err = suite.migration.GetAppliedMigrations()
-	assert.NoError(t, err)
-	assert.Len(t, applied, len(migrations))
-}
-
-// TestDownMigrations tests rolling back migrations
-func (suite *MigrationTestSuite) TestDownMigrations() {
-	t := suite.T()
-
-	// Apply all migrations first
-	err := suite.migration.Up()
-	require.NoError(t, err)
-
-	// Rollback to version 2 (should rollback versions 5, 4, 3)
-	err = suite.migration.Down(2)
-	assert.NoError(t, err)
-
-	// Verify correct migrations remain
-	applied, err := suite.migration.GetAppliedMigrations()
-	assert.NoError(t, err)
-	assert.True(t, applied[1])
-	assert.True(t, applied[2])
-	assert.False(t, applied[3])
-	assert.False(t, applied[4])
-	assert.False(t, applied[5])
-
-	// Verify tables were dropped
-	historicTables := []string{"historic_runs", "regressions", "baselines"}
-	for _, table := range historicTables {
-		var exists bool
-		err = suite.db.QueryRow(`
-			SELECT EXISTS (
-				SELECT FROM information_schema.tables 
-				WHERE table_schema = 'public' 
-				AND table_name = $1
-			)`, table).Scan(&exists)
-		assert.NoError(t, err)
-		assert.False(t, exists, "Table %s should have been dropped", table)
-	}
-
-	// Rollback all migrations
-	err = suite.migration.Down(0)
-	assert.NoError(t, err)
-
-	// Verify all migrations were rolled back
-	applied, err = suite.migration.GetAppliedMigrations()
-	assert.NoError(t, err)
-	assert.Empty(t, applied)
-}
-
-// TestDownMigrationsNonExistent tests rolling back non-existent migrations
-func (suite *MigrationTestSuite) TestDownMigrationsNonExistent() {
-	t := suite.T()
-
-	// Apply only first two migrations
-	err := suite.migration.Initialize()
-	require.NoError(t, err)
-
-	for i := 0; i < 2; i++ {
-		err = suite.migration.applyMigration(migrations[i])
-		require.NoError(t, err)
-	}
-
-	// Try to rollback migration that wasn't applied (should not error)
-	err = suite.migration.Down(1)
-	assert.NoError(t, err)
-
-	// Verify correct state
-	applied, err := suite.migration.GetAppliedMigrations()
-	assert.NoError(t, err)
-	assert.True(t, applied[1])
-	assert.False(t, applied[2])
-}
-
-// TestApplyMigration tests applying individual migrations
-func (suite *MigrationTestSuite) TestApplyMigration() {
-	t := suite.T()
-
-	// Initialize migration table
-	err := suite.migration.Initialize()
-	require.NoError(t, err)
-
-	// Apply first migration
-	migration := migrations[0]
-	err = suite.migration.applyMigration(migration)
-	assert.NoError(t, err)
-
-	// Verify migration was recorded
-	var count int
-	err = suite.db.QueryRow("SELECT COUNT(*) FROM schema_migrations WHERE version = $1", migration.Version).Scan(&count)
-	assert.NoError(t, err)
-	assert.Equal(t, 1, count)
-
-	// Verify tables were created
-	var exists bool
-	err = suite.db.QueryRow(`
-		SELECT EXISTS (
-			SELECT FROM information_schema.tables 
-			WHERE table_schema = 'public' 
-			AND table_name = 'benchmark_runs'
-		)`).Scan(&exists)
-	assert.NoError(t, err)
-	assert.True(t, exists)
-}
-
-// TestApplyMigrationFailure tests migration failure handling
-func (suite *MigrationTestSuite) TestApplyMigrationFailure() {
-	t := suite.T()
-
-	// Initialize migration table
-	err := suite.migration.Initialize()
-	require.NoError(t, err)
-
-	// Create a migration with invalid SQL
-	invalidMigration := Migration{
-		Version: 999,
-		Name:    "invalid_migration",
-		Up:      "INVALID SQL STATEMENT",
-		Down:    "DROP TABLE IF EXISTS test_table",
-	}
-
-	// Attempt to apply invalid migration
-	err = suite.migration.applyMigration(invalidMigration)
-	assert.Error(t, err)
-
-	// Verify migration was not recorded
-	var count int
-	err = suite.db.QueryRow("SELECT COUNT(*) FROM schema_migrations WHERE version = $1", invalidMigration.Version).Scan(&count)
-	assert.NoError(t, err)
-	assert.Equal(t, 0, count)
-}
-
-// TestRollbackMigration tests rolling back individual migrations
-func (suite *MigrationTestSuite) TestRollbackMigration() {
-	t := suite.T()
-
-	// Apply first migration
-	err := suite.migration.Initialize()
-	require.NoError(t, err)
-
-	migration := migrations[0]
-	err = suite.migration.applyMigration(migration)
-	require.NoError(t, err)
-
-	// Rollback the migration
-	err = suite.migration.rollbackMigration(migration)
-	assert.NoError(t, err)
-
-	// Verify migration record was removed
-	var count int
-	err = suite.db.QueryRow("SELECT COUNT(*) FROM schema_migrations WHERE version = $1", migration.Version).Scan(&count)
-	assert.NoError(t, err)
-	assert.Equal(t, 0, count)
-
-	// Verify tables were dropped
-	var exists bool
-	err = suite.db.QueryRow(`
-		SELECT EXISTS (
-			SELECT FROM information_schema.tables 
-			WHERE table_schema = 'public' 
-			AND table_name = 'benchmark_runs'
-		)`).Scan(&exists)
-	assert.NoError(t, err)
-	assert.False(t, exists)
-}
-
-// TestRollbackMigrationFailure tests rollback failure handling
-func (suite *MigrationTestSuite) TestRollbackMigrationFailure() {
-	t := suite.T()
-
-	// Initialize and apply a migration
-	err := suite.migration.Initialize()
-	require.NoError(t, err)
-
-	migration := migrations[0]
-	err = suite.migration.applyMigration(migration)
-	require.NoError(t, err)
-
-	// Create a migration with invalid rollback SQL
-	invalidMigration := Migration{
-		Version: migration.Version,
-		Name:    migration.Name,
-		Up:      migration.Up,
-		Down:    "INVALID ROLLBACK SQL",
-	}
-
-	// Attempt to rollback with invalid SQL
-	err = suite.migration.rollbackMigration(invalidMigration)
-	assert.Error(t, err)
-
-	// Verify migration record still exists (rollback was not committed)
-	var count int
-	err = suite.db.QueryRow("SELECT COUNT(*) FROM schema_migrations WHERE version = $1", migration.Version).Scan(&count)
-	assert.NoError(t, err)
-	assert.Equal(t, 1, count)
-}
-
-// TestCreateIndices tests index creation
-func (suite *MigrationTestSuite) TestCreateIndices() {
-	t := suite.T()
-
-	// Apply migrations first to have tables
-	err := suite.migration.Up()
-	require.NoError(t, err)
-
-	// Create indices
-	err = suite.migration.CreateIndices()
-	assert.NoError(t, err)
-
-	// Verify some indices were created
-	var indexCount int
-	err = suite.db.QueryRow(`
-		SELECT COUNT(*) 
-		FROM pg_indexes 
-		WHERE schemaname = 'public' 
-		AND indexname LIKE 'idx_%'`).Scan(&indexCount)
-	assert.NoError(t, err)
-	assert.Greater(t, indexCount, 0)
-
-	// Test idempotency - running CreateIndices again should not error
-	err = suite.migration.CreateIndices()
-	assert.NoError(t, err)
-}
-
-// TestGetVersion tests version retrieval
-func (suite *MigrationTestSuite) TestGetVersion() {
-	t := suite.T()
-
-	// Test on fresh database (should return 0)
-	version, err := suite.migration.GetVersion()
-	assert.Error(t, err) // Table doesn't exist yet
-
-	// Initialize migrations
-	err = suite.migration.Initialize()
-	require.NoError(t, err)
-
-	// Test on empty migration table
-	version, err = suite.migration.GetVersion()
-	assert.NoError(t, err)
-	assert.Equal(t, 0, version)
-
-	// Apply some migrations
-	err = suite.migration.Up()
-	require.NoError(t, err)
-
-	// Test on populated migration table
-	version, err = suite.migration.GetVersion()
-	assert.NoError(t, err)
-	assert.Equal(t, len(migrations), version)
-}
-
-// TestReset tests database reset functionality
-func (suite *MigrationTestSuite) TestReset() {
-	t := suite.T()
-
-	// Apply all migrations
-	err := suite.migration.Up()
-	require.NoError(t, err)
-
-	// Verify tables exist
-	var tableCount int
-	err = suite.db.QueryRow(`
-		SELECT COUNT(*) 
-		FROM information_schema.tables 
-		WHERE table_schema = 'public' 
-		AND table_name != 'schema_migrations'`).Scan(&tableCount)
-	assert.NoError(t, err)
-	assert.Greater(t, tableCount, 5)
-
-	// Reset database
-	err = suite.migration.Reset()
-	assert.NoError(t, err)
-
-	// Verify all migrations were reapplied
-	applied, err := suite.migration.GetAppliedMigrations()
-	assert.NoError(t, err)
-	assert.Len(t, applied, len(migrations))
-
-	// Verify tables exist again
-	err = suite.db.QueryRow(`
-		SELECT COUNT(*) 
-		FROM information_schema.tables 
-		WHERE table_schema = 'public' 
-		AND table_name != 'schema_migrations'`).Scan(&tableCount)
-	assert.NoError(t, err)
-	assert.Greater(t, tableCount, 5)
-}
-
-// TestMigrationOrder tests that migrations are applied in correct order
-func (suite *MigrationTestSuite) TestMigrationOrder() {
-	t := suite.T()
-
-	// Verify migrations are in correct order
-	for i := 1; i < len(migrations); i++ {
-		assert.Greater(t, migrations[i].Version, migrations[i-1].Version,
-			"Migration %d should have higher version than migration %d", i, i-1)
-	}
-
-	// Apply migrations and verify they're applied in order
-	err := suite.migration.Up()
-	assert.NoError(t, err)
-
-	// Check migration records are in order
-	rows, err := suite.db.Query("SELECT version FROM schema_migrations ORDER BY version")
-	require.NoError(t, err)
-	defer rows.Close()
-
-	var versions []int
-	for rows.Next() {
-		var version int
-		err = rows.Scan(&version)
-		require.NoError(t, err)
-		versions = append(versions, version)
-	}
-
-	// Verify versions are sequential
-	for i, version := range versions {
-		assert.Equal(t, i+1, version, "Migration version should be sequential")
-	}
-}
-
-// TestMigrationDependencies tests migration dependencies
-func (suite *MigrationTestSuite) TestMigrationDependencies() {
-	t := suite.T()
-
-	// Apply all migrations
-	err := suite.migration.Up()
-	require.NoError(t, err)
-
-	// Test that foreign key constraints work
-	// Insert a benchmark run
-	_, err = suite.db.Exec(`
-		INSERT INTO benchmark_runs (
-			run_id, test_name, description, start_time, end_time, duration,
-			config, environment, tags
-		) VALUES (
-			'test_run', 'test', 'description', NOW(), NOW(), '30m',
-			'{}', '{}', '{}'
-		)`)
-	require.NoError(t, err)
-
-	// Insert client summary that references the run
-	_, err = suite.db.Exec(`
-		INSERT INTO client_summary (
-			run_id, client_name, total_requests, total_errors, error_rate
-		) VALUES (
-			'test_run', 'test_client', 1000, 10, 0.01
-		)`)
-	assert.NoError(t, err)
-
-	// Try to insert client summary with non-existent run_id (should fail)
-	_, err = suite.db.Exec(`
-		INSERT INTO client_summary (
-			run_id, client_name, total_requests, total_errors, error_rate
-		) VALUES (
-			'nonexistent_run', 'test_client', 1000, 10, 0.01
-		)`)
-	assert.Error(t, err)
-	assert.Contains(t, err.Error(), "foreign key")
-}
-
-// TestTimescaleDBMigration tests TimescaleDB-specific migration
-func (suite *MigrationTestSuite) TestTimescaleDBMigration() {
-	t := suite.T()
-
-	// Apply all migrations (TimescaleDB migration will be skipped without extension)
-	err := suite.migration.Up()
-	assert.NoError(t, err)
-
-	// Verify hypertables were not created (since TimescaleDB is not installed)
-	// This is expected behavior - the migration should not fail even without TimescaleDB
-	var hypertableCount int
-	err = suite.db.QueryRow(`
-		SELECT COUNT(*) 
-		FROM information_schema.tables 
-		WHERE table_name LIKE '%_hypertable%'`).Scan(&hypertableCount)
-	assert.NoError(t, err)
-	// Should be 0 since TimescaleDB is not installed
-}
-
-// TestGrafanaFunctionsMigration tests Grafana functions migration
-func (suite *MigrationTestSuite) TestGrafanaFunctionsMigration() {
-	t := suite.T()
-
-	// Apply all migrations
-	err := suite.migration.Up()
-	require.NoError(t, err)
-
-	// Verify Grafana functions were created
-	functions := []string{
-		"get_metric_timeseries",
-		"get_historic_trend",
-		"get_performance_changes",
-	}
-
-	for _, functionName := range functions {
-		var exists bool
-		err = suite.db.QueryRow(`
-			SELECT EXISTS (
-				SELECT FROM information_schema.routines 
-				WHERE routine_schema = 'public' 
-				AND routine_name = $1
-			)`, functionName).Scan(&exists)
-		assert.NoError(t, err, "Failed to check function %s", functionName)
-		assert.True(t, exists, "Function %s should exist", functionName)
-	}
-
-	// Test one of the functions
-	rows, err := suite.db.Query(`
-		SELECT * FROM get_metric_timeseries('latency_p95', 'geth', null, null, null)
-		LIMIT 0`) // Just test that function exists and can be called
-	assert.NoError(t, err)
-	rows.Close()
-}
-
-// TestConcurrentMigrations tests concurrent migration attempts
-func (suite *MigrationTestSuite) TestConcurrentMigrations() {
-	t := suite.T()
-
-	// This test ensures that concurrent migration attempts don't cause issues
-	// In practice, migrations should be run sequentially, but this tests robustness
-
-	concurrency := 3
-	results := make(chan error, concurrency)
-
-	// Start multiple goroutines trying to apply migrations
-	for i := 0; i < concurrency; i++ {
-		go func() {
-			migrationService := NewMigrationService(suite.db, suite.logger)
-			err := migrationService.Up()
-			results <- err
-		}()
-	}
-
-	// Collect results
-	errors := 0
-	for i := 0; i < concurrency; i++ {
-		err := <-results
-		if err != nil {
-			errors++
-		}
-	}
-
-	// At least one should succeed, others might fail due to concurrent access
-	assert.Less(t, errors, concurrency, "At least one migration attempt should succeed")
-
-	// Verify final state is correct
-	applied, err := suite.migration.GetAppliedMigrations()
-	assert.NoError(t, err)
-	assert.Len(t, applied, len(migrations))
-}
-
-// TestMigrationTransactionRollback tests transaction rollback on migration failure
-func (suite *MigrationTestSuite) TestMigrationTransactionRollback() {
-	t := suite.T()
-
-	// Initialize migration table
-	err := suite.migration.Initialize()
-	require.NoError(t, err)
-
-	// Create a migration that will partially succeed then fail
-	partialMigration := Migration{
-		Version: 999,
-		Name:    "partial_migration",
-		Up: `
-			CREATE TABLE test_table (id SERIAL PRIMARY KEY);
-			INSERT INTO test_table (id) VALUES (1);
-			INVALID SQL STATEMENT; -- This will cause failure
-		`,
-		Down: "DROP TABLE IF EXISTS test_table",
-	}
-
-	// Attempt to apply the migration
-	err = suite.migration.applyMigration(partialMigration)
-	assert.Error(t, err)
-
-	// Verify the table was not created (transaction was rolled back)
-	var exists bool
-	err = suite.db.QueryRow(`
-		SELECT EXISTS (
-			SELECT FROM information_schema.tables 
-			WHERE table_schema = 'public' 
-			AND table_name = 'test_table'
-		)`).Scan(&exists)
-	assert.NoError(t, err)
-	assert.False(t, exists)
-
-	// Verify migration was not recorded
-	var count int
-	err = suite.db.QueryRow("SELECT COUNT(*) FROM schema_migrations WHERE version = $1", partialMigration.Version).Scan(&count)
-	assert.NoError(t, err)
-	assert.Equal(t, 0, count)
-}
-
-// TestMigrationWithData tests migrations on database with existing data
-func (suite *MigrationTestSuite) TestMigrationWithData() {
-	t := suite.T()
-
-	// Apply initial migrations
-	err := suite.migration.Up()
-	require.NoError(t, err)
-
-	// Insert some test data
-	_, err = suite.db.Exec(`
-		INSERT INTO benchmark_runs (
-			run_id, test_name, description, start_time, end_time, duration,
-			config, environment, tags
-		) VALUES (
-			'test_run_1', 'test1', 'desc1', NOW(), NOW(), '30m',
-			'{}', '{}', '{}'
-		)`)
-	require.NoError(t, err)
-
-	_, err = suite.db.Exec(`
-		INSERT INTO client_summary (
-			run_id, client_name, total_requests, total_errors, error_rate
-		) VALUES (
-			'test_run_1', 'client1', 1000, 10, 0.01
-		)`)
-	require.NoError(t, err)
-
-	// Verify data exists
-	var count int
-	err = suite.db.QueryRow("SELECT COUNT(*) FROM benchmark_runs").Scan(&count)
-	assert.NoError(t, err)
-	assert.Equal(t, 1, count)
-
-	err = suite.db.QueryRow("SELECT COUNT(*) FROM client_summary").Scan(&count)
-	assert.NoError(t, err)
-	assert.Equal(t, 1, count)
-
-	// Test rollback preserves referential integrity
-	err = suite.migration.Down(3) // Rollback to before historic tables
-	assert.NoError(t, err)
-
-	// Verify main data still exists
-	err = suite.db.QueryRow("SELECT COUNT(*) FROM benchmark_runs").Scan(&count)
-	assert.NoError(t, err)
-	assert.Equal(t, 1, count)
-
-	// Re-apply migrations
-	err = suite.migration.Up()
-	assert.NoError(t, err)
-
-	// Verify data is still there
-	err = suite.db.QueryRow("SELECT COUNT(*) FROM benchmark_runs").Scan(&count)
-	assert.NoError(t, err)
-	assert.Equal(t, 1, count)
-}
-
-// BenchmarkMigrationUp benchmarks full migration application
-func (suite *MigrationTestSuite) BenchmarkMigrationUp() {
-	b := suite.T()
-	if testing.Short() {
-		b.Skip("Skipping benchmark in short mode")
-	}
-
-	// ResetTimer not available in test suite context
-
-	for i := 0; i < 10; i++ {
-		// Reset database
-		err := suite.migration.Reset()
-		require.NoError(b, err)
-
-		// Apply all migrations
-		err = suite.migration.Up()
-		require.NoError(b, err)
-	}
-}
-
-// BenchmarkMigrationDown benchmarks migration rollback
-func (suite *MigrationTestSuite) BenchmarkMigrationDown() {
-	b := suite.T()
-	if testing.Short() {
-		b.Skip("Skipping benchmark in short mode")
-	}
-
-	// Setup: apply all migrations
-	err := suite.migration.Up()
-	require.NoError(b, err)
-
-	// ResetTimer not available in test suite context
-
-	for i := 0; i < 10; i++ {
-		// Rollback all migrations
-		err = suite.migration.Down(0)
-		require.NoError(b, err)
-
-		// Reapply all migrations for next iteration
-		err = suite.migration.Up()
-		require.NoError(b, err)
-	}
-}
-
-// Run the test suite
-func TestMigrationTestSuite(t *testing.T) {
-	// Skip if running in CI without Docker
-	if os.Getenv("SKIP_INTEGRATION_TESTS") != "" {
-		t.Skip("Skipping integration tests")
-	}
-
-	suite.Run(t, new(MigrationTestSuite))
-}
-
-// Unit tests for migration data and validation
-
-// TestMigrationStructure tests the migration structure and data
-func TestMigrationStructure(t *testing.T) {
-	// Test that we have migrations defined
-	assert.Greater(t, len(migrations), 0, "Should have at least one migration")
-
-	// Test that migration versions are unique and sequential
-	versions := make(map[int]bool)
-	for i, migration := range migrations {
-		// Check version uniqueness
-		assert.False(t, versions[migration.Version], "Migration version %d should be unique", migration.Version)
-		versions[migration.Version] = true
-
-		// Check sequential versions (starting from 1)
-		assert.Equal(t, i+1, migration.Version, "Migration versions should be sequential starting from 1")
-
-		// Check required fields
-		assert.NotEmpty(t, migration.Name, "Migration %d should have a name", migration.Version)
-		assert.NotEmpty(t, migration.Up, "Migration %d should have Up SQL", migration.Version)
-		assert.NotEmpty(t, migration.Down, "Migration %d should have Down SQL", migration.Version)
-
-		// Check SQL validity (basic checks)
-		assert.False(t, strings.Contains(migration.Up, "INVALID"), "Migration %d Up SQL should not contain INVALID", migration.Version)
-		assert.False(t, strings.Contains(migration.Down, "INVALID"), "Migration %d Down SQL should not contain INVALID", migration.Version)
-	}
-}
-
-// TestMigrationSQLSyntax tests basic SQL syntax in migrations
-func TestMigrationSQLSyntax(t *testing.T) {
-	for _, migration := range migrations {
-		// Check that Up SQL contains CREATE statements
-		upSQL := strings.ToUpper(migration.Up)
-		if migration.Version <= 3 { // First 3 migrations should create tables
-			assert.Contains(t, upSQL, "CREATE TABLE", "Migration %d should create tables", migration.Version)
-		}
-
-		// Check that Down SQL contains DROP statements
-		downSQL := strings.ToUpper(migration.Down)
-		assert.Contains(t, downSQL, "DROP", "Migration %d should drop objects in Down SQL", migration.Version)
-
-		// Check for common SQL injection patterns (basic security check)
-		assert.False(t, strings.Contains(migration.Up, "'; DROP"), "Migration %d Up SQL should not contain injection patterns", migration.Version)
-		assert.False(t, strings.Contains(migration.Down, "'; DROP"), "Migration %d Down SQL should not contain injection patterns", migration.Version)
-	}
-}
-
-// TestTableSchemaConstants tests the schema constant definitions
-func TestTableSchemaConstants(t *testing.T) {
-	schemas := []string{
-		BenchmarkRunsTableSchema,
-		BenchmarkMetricsTableSchema,
-		ClientSummaryTableSchema,
-		MethodSummaryTableSchema,
-		ComparisonResultsTableSchema,
-		ResponseDiffsTableSchema,
-		HistoricRunsTableSchema,
-		RegressionsTableSchema,
-		BaselinesTableSchema,
-	}
-
-	for i, schema := range schemas {
-		assert.NotEmpty(t, schema, "Schema %d should not be empty", i)
-		assert.Contains(t, strings.ToUpper(schema), "CREATE TABLE", "Schema %d should contain CREATE TABLE", i)
-		assert.Contains(t, strings.ToUpper(schema), "IF NOT EXISTS", "Schema %d should use IF NOT EXISTS", i)
-	}
-}
-
-// TestGrafanaQueriesStructure tests Grafana queries structure
-func TestGrafanaQueriesStructure(t *testing.T) {
-	queries := []string{
-		GrafanaQueries.LatencyOverTime,
-		GrafanaQueries.ErrorRateOverTime,
-		GrafanaQueries.ThroughputOverTime,
-		GrafanaQueries.ClientComparison,
-		GrafanaQueries.MethodBreakdown,
-		GrafanaQueries.RunComparison,
-		GrafanaQueries.HistoricTrends,
-		GrafanaQueries.RecentRegressions,
-		GrafanaQueries.PerformanceChanges,
-		GrafanaQueries.BaselineComparison,
-		GrafanaQueries.GitCommitCorrelation,
-	}
-
-	for i, query := range queries {
-		assert.NotEmpty(t, query, "Query %d should not be empty", i)
-		assert.Contains(t, strings.ToUpper(query), "SELECT", "Query %d should be a SELECT statement", i)
-
-		// Check for parameterized queries
-		paramCount := strings.Count(query, "$")
-		assert.Greater(t, paramCount, 0, "Query %d should use parameterized queries", i)
-	}
-}
-
-// TestMigrationReversibility tests that migrations can be applied and reversed
-func TestMigrationReversibility(t *testing.T) {
-	// This is a conceptual test - in practice, this would require a database
-	// Here we just verify the structure supports reversibility
-
-	for _, migration := range migrations {
-		// Check that Down SQL attempts to reverse Up SQL
-		upSQL := strings.ToUpper(migration.Up)
-		downSQL := strings.ToUpper(migration.Down)
-
-		if strings.Contains(upSQL, "CREATE TABLE") {
-			assert.Contains(t, downSQL, "DROP TABLE",
-				"Migration %d: If Up creates tables, Down should drop them", migration.Version)
-		}
-
-		if strings.Contains(upSQL, "CREATE INDEX") {
-			assert.Contains(t, downSQL, "DROP INDEX",
-				"Migration %d: If Up creates indices, Down should drop them", migration.Version)
-		}
-
-		if strings.Contains(upSQL, "CREATE FUNCTION") {
-			assert.Contains(t, downSQL, "DROP FUNCTION",
-				"Migration %d: If Up creates functions, Down should drop them", migration.Version)
-		}
-	}
-}
-
-// TestDefaultPostgresConfigMigrationCompatibility tests config compatibility
-func TestDefaultPostgresConfigMigrationCompatibility(t *testing.T) {
-	config := DefaultPostgresConfig()
-
-	// Test that default config has reasonable values for migrations
-	assert.Greater(t, config.MaxOpenConns, 0, "Max open connections should be positive")
-	assert.GreaterOrEqual(t, config.MaxOpenConns, config.MaxIdleConns, "Max open should be >= max idle")
-	assert.Greater(t, config.MaxLifetime, time.Duration(0), "Connection lifetime should be positive")
-
-	// Test SSL mode is valid
-	validSSLModes := []string{"disable", "require", "verify-ca", "verify-full"}
-	assert.Contains(t, validSSLModes, config.SSLMode, "SSL mode should be valid")
-}
-
-// Enhanced edge case tests for migration system
-
-// TestMigrationVersionGaps tests handling of non-sequential migration versions
-func (suite *MigrationTestSuite) TestMigrationVersionGaps() {
-	t := suite.T()
-
-	// Initialize migration table
-	err := suite.migration.Initialize()
-	require.NoError(t, err)
-
-	// Manually insert migration records with gaps
-	_, err = suite.db.Exec("INSERT INTO schema_migrations (version, name, applied_at) VALUES (1, 'migration_1', NOW())")
-	require.NoError(t, err)
-	_, err = suite.db.Exec("INSERT INTO schema_migrations (version, name, applied_at) VALUES (3, 'migration_3', NOW())")
-	require.NoError(t, err)
-	_, err = suite.db.Exec("INSERT INTO schema_migrations (version, name, applied_at) VALUES (5, 'migration_5', NOW())")
-	require.NoError(t, err)
-
-	// Get applied migrations
-	applied, err := suite.migration.GetAppliedMigrations()
-	assert.NoError(t, err)
-	assert.True(t, applied[1])
-	assert.False(t, applied[2]) // Gap
-	assert.True(t, applied[3])
-	assert.False(t, applied[4]) // Gap
-	assert.True(t, applied[5])
-
-	// Test version retrieval with gaps
-	version, err := suite.migration.GetVersion()
-	assert.NoError(t, err)
-	assert.Equal(t, 5, version) // Should return highest version
-}
-
-// TestMigrationDatabaseConnectionLoss tests behavior when database connection is lost
-func (suite *MigrationTestSuite) TestMigrationDatabaseConnectionLoss() {
-	t := suite.T()
-
-	// Initialize migrations
-	err := suite.migration.Up()
-	require.NoError(t, err)
-
-	// Close the database connection to simulate connection loss
-	originalDB := suite.migration.db
-	suite.migration.db.Close()
-
-	// Create a closed database connection
-	closedDB, _ := sql.Open("postgres", "invalid connection string")
-	closedDB.Close()
-	suite.migration.db = closedDB
-
-	// Try to get version - should fail gracefully
-	_, err = suite.migration.GetVersion()
-	assert.Error(t, err)
-
-	// Try to apply migrations - should fail gracefully
-	err = suite.migration.Up()
-	assert.Error(t, err)
-
-	// Restore original connection
-	suite.migration.db = originalDB
-}
-
-// TestMigrationLargeDatasets tests migrations with substantial data
-func (suite *MigrationTestSuite) TestMigrationLargeDatasets() {
-	t := suite.T()
-
-	// Apply initial migrations to have tables
-	err := suite.migration.Up()
-	require.NoError(t, err)
-
-	// Insert a large number of records
-	const recordCount = 10000
-	tx, err := suite.db.Begin()
-	require.NoError(t, err)
-
-	for i := 0; i < recordCount; i++ {
-		_, err = tx.Exec(`
-			INSERT INTO benchmark_runs (
-				run_id, test_name, description, start_time, end_time, duration,
-				config, environment, tags
-			) VALUES (
-				$1, 'large_test', 'Large dataset test', NOW(), NOW(), '30m',
-				'{}', '{}', '{}'
-			)`, fmt.Sprintf("large_run_%d", i))
-		if err != nil {
-			tx.Rollback()
-			require.NoError(t, err)
-		}
-	}
-
-	err = tx.Commit()
-	require.NoError(t, err)
-
-	// Verify data exists
-	var count int
-	err = suite.db.QueryRow("SELECT COUNT(*) FROM benchmark_runs WHERE test_name = 'large_test'").Scan(&count)
-	assert.NoError(t, err)
-	assert.Equal(t, recordCount, count)
-
-	// Test rollback with large dataset
-	initialVersion, err := suite.migration.GetVersion()
-	require.NoError(t, err)
-
-	// Rollback one migration
-	err = suite.migration.Down(initialVersion - 1)
-	assert.NoError(t, err)
-
-	// Re-apply migration
-	err = suite.migration.Up()
-	assert.NoError(t, err)
-
-	// Verify data integrity after migration operations
-	err = suite.db.QueryRow("SELECT COUNT(*) FROM benchmark_runs WHERE test_name = 'large_test'").Scan(&count)
-	assert.NoError(t, err)
-	assert.Equal(t, recordCount, count)
-}
-
-// TestMigrationPermissionRestrictions tests behavior with restricted database permissions
-func (suite *MigrationTestSuite) TestMigrationPermissionRestrictions() {
-	t := suite.T()
-
-	// This test simulates scenarios where the migration user has limited permissions
-	// Note: This is a conceptual test - actual permission testing would require
-	// setting up restricted database users
-
-	// Test detection of missing permissions
-	err := suite.migration.Up()
-	assert.NoError(t, err) // Should succeed with full permissions in test environment
-
-	// Verify that functions requiring elevated permissions were created
-	var functionExists bool
-	err = suite.db.QueryRow(`
-		SELECT EXISTS (
-			SELECT FROM information_schema.routines 
-			WHERE routine_schema = 'public' 
-			AND routine_name = 'get_metric_timeseries'
-		)`).Scan(&functionExists)
-	assert.NoError(t, err)
-	assert.True(t, functionExists)
-}
-
-// TestMigrationSchemaValidation tests comprehensive schema validation after migrations
-func (suite *MigrationTestSuite) TestMigrationSchemaValidation() {
-	t := suite.T()
-
-	// Apply all migrations
-	err := suite.migration.Up()
-	require.NoError(t, err)
-
-	// Test table constraints and relationships
-	expectedConstraints := map[string][]string{
-		"benchmark_runs": {"benchmark_runs_pkey"},
-		"client_summary": {"client_summary_pkey", "fk_client_summary_run_id"},
-		"method_summary": {"method_summary_pkey", "fk_method_summary_run_id"},
-		"historic_runs":  {"historic_runs_pkey"},
-		"regressions":    {"regressions_pkey"},
-		"baselines":      {"baselines_pkey"},
-	}
-
-	for tableName, expectedConstraintNames := range expectedConstraints {
-		rows, err := suite.db.Query(`
-			SELECT constraint_name 
-			FROM information_schema.table_constraints 
-			WHERE table_schema = 'public' 
-			AND table_name = $1
-			AND constraint_type IN ('PRIMARY KEY', 'FOREIGN KEY', 'UNIQUE')
-		`, tableName)
-		require.NoError(t, err)
-
-		var constraints []string
-		for rows.Next() {
-			var constraintName string
-			err = rows.Scan(&constraintName)
-			require.NoError(t, err)
-			constraints = append(constraints, constraintName)
-		}
-		rows.Close()
-
-		// Verify at least the expected constraints exist
-		for _, expectedConstraint := range expectedConstraintNames {
-			found := false
-			for _, constraint := range constraints {
-				if strings.Contains(constraint, expectedConstraint) || constraint == expectedConstraint {
-					found = true
-					break
-				}
-			}
-			assert.True(t, found, "Expected constraint %s not found for table %s", expectedConstraint, tableName)
-		}
-	}
-
-	// Test column types and nullability
-	expectedColumns := map[string]map[string]string{
-		"benchmark_runs": {
-			"run_id":     "text",
-			"test_name":  "text",
-			"start_time": "timestamp with time zone",
-			"end_time":   "timestamp with time zone",
-			"duration":   "text",
-		},
-		"client_summary": {
-			"run_id":         "text",
-			"client_name":    "text",
-			"total_requests": "bigint",
-			"total_errors":   "bigint",
-			"error_rate":     "double precision",
-		},
-	}
-
-	for tableName, expectedColumnTypes := range expectedColumns {
-		for columnName, expectedType := range expectedColumnTypes {
-			var dataType string
-			err = suite.db.QueryRow(`
-				SELECT data_type 
-				FROM information_schema.columns 
-				WHERE table_schema = 'public' 
-				AND table_name = $1 
-				AND column_name = $2
-			`, tableName, columnName).Scan(&dataType)
-			assert.NoError(t, err, "Column %s.%s should exist", tableName, columnName)
-			assert.Contains(t, dataType, expectedType, "Column %s.%s should have type %s, got %s", tableName, columnName, expectedType, dataType)
-		}
-	}
-}
-
-// TestMigrationPerformanceMetrics tests migration performance characteristics
-func (suite *MigrationTestSuite) TestMigrationPerformanceMetrics() {
-	t := suite.T()
-
-	// Measure time to apply all migrations
-	startTime := time.Now()
-	err := suite.migration.Up()
-	migrationDuration := time.Since(startTime)
-
-	assert.NoError(t, err)
-	assert.Less(t, migrationDuration, 30*time.Second, "Migration should complete within 30 seconds")
-
-	// Measure time to rollback all migrations
-	startTime = time.Now()
-	err = suite.migration.Down(0)
-	rollbackDuration := time.Since(startTime)
-
-	assert.NoError(t, err)
-	assert.Less(t, rollbackDuration, 15*time.Second, "Rollback should complete within 15 seconds")
-
-	// Test index creation performance
-	err = suite.migration.Up()
-	require.NoError(t, err)
-
-	startTime = time.Now()
-	err = suite.migration.CreateIndices()
-	indexCreationDuration := time.Since(startTime)
-
-	assert.NoError(t, err)
-	assert.Less(t, indexCreationDuration, 10*time.Second, "Index creation should complete within 10 seconds")
-}
-
-// TestMigrationDataIntegrity tests data integrity during migration operations
-func (suite *MigrationTestSuite) TestMigrationDataIntegrity() {
-	t := suite.T()
-
-	// Apply migrations and insert test data
-	err := suite.migration.Up()
-	require.NoError(t, err)
-
-	// Insert reference data
-	testData := []struct {
-		runID    string
-		testName string
-		client   string
-	}{
-		{"integrity_run_1", "integrity_test", "client_1"},
-		{"integrity_run_2", "integrity_test", "client_2"},
-		{"integrity_run_3", "integrity_test", "client_3"},
-	}
-
-	for _, data := range testData {
-		// Insert benchmark run
-		_, err = suite.db.Exec(`
-			INSERT INTO benchmark_runs (
-				run_id, test_name, description, start_time, end_time, duration,
-				config, environment, tags
-			) VALUES (
-				$1, $2, 'Integrity test', NOW(), NOW(), '30m',
-				'{}', '{}', '{}'
-			)`, data.runID, data.testName)
-		require.NoError(t, err)
-
-		// Insert client summary
-		_, err = suite.db.Exec(`
-			INSERT INTO client_summary (
-				run_id, client_name, total_requests, total_errors, error_rate
-			) VALUES (
-				$1, $2, 1000, 10, 0.01
-			)`, data.runID, data.client)
-		require.NoError(t, err)
-	}
-
-	// Calculate checksums before rollback
-	var beforeChecksums map[string]string = make(map[string]string)
-
-	for tableName := range map[string]bool{"benchmark_runs": true, "client_summary": true} {
-		var checksum string
-		err = suite.db.QueryRow(fmt.Sprintf("SELECT md5(string_agg(md5(t.*::text), '')) FROM %s t WHERE test_name = 'integrity_test'", tableName)).Scan(&checksum)
-		if err == nil {
-			beforeChecksums[tableName] = checksum
-		}
-	}
-
-	// Perform rollback and re-apply
-	initialVersion, err := suite.migration.GetVersion()
-	require.NoError(t, err)
-
-	err = suite.migration.Down(initialVersion - 2)
-	assert.NoError(t, err)
-
-	err = suite.migration.Up()
-	assert.NoError(t, err)
-
-	// Verify data integrity after migration operations
-	for _, data := range testData {
-		var count int
-		err = suite.db.QueryRow("SELECT COUNT(*) FROM benchmark_runs WHERE run_id = $1", data.runID).Scan(&count)
-		assert.NoError(t, err)
-		assert.Equal(t, 1, count, "Run %s should exist after migration operations", data.runID)
-
-		err = suite.db.QueryRow("SELECT COUNT(*) FROM client_summary WHERE run_id = $1", data.runID).Scan(&count)
-		assert.NoError(t, err)
-		assert.Equal(t, 1, count, "Client summary for run %s should exist after migration operations", data.runID)
-	}
-
-	// Compare checksums after operations (for tables that still exist)
-	for tableName, beforeChecksum := range beforeChecksums {
-		var afterChecksum string
-		err = suite.db.QueryRow(fmt.Sprintf("SELECT md5(string_agg(md5(t.*::text), '')) FROM %s t WHERE test_name = 'integrity_test'", tableName)).Scan(&afterChecksum)
-		if err == nil {
-			assert.Equal(t, beforeChecksum, afterChecksum, "Data integrity should be maintained for table %s", tableName)
-		}
-	}
-}
-
-// TestMigrationErrorRecovery tests recovery from various error conditions
-func (suite *MigrationTestSuite) TestMigrationErrorRecovery() {
-	t := suite.T()
-
-	// Test recovery from incomplete migration state
-	err := suite.migration.Initialize()
-	require.NoError(t, err)
-
-	// Manually insert an incomplete migration record
-	_, err = suite.db.Exec("INSERT INTO schema_migrations (version, name, applied_at) VALUES (999, 'incomplete_migration', NOW())")
-	require.NoError(t, err)
-
-	// Try to apply migrations - should handle the incomplete state
-	err = suite.migration.Up()
-	assert.NoError(t, err)
-
-	// Verify system recovered properly
-	applied, err := suite.migration.GetAppliedMigrations()
-	assert.NoError(t, err)
-	assert.True(t, applied[999]) // Incomplete migration should be preserved
-
-	// Test recovery from corrupted migration table
-	_, err = suite.db.Exec("UPDATE schema_migrations SET version = NULL WHERE version = 1")
-	require.NoError(t, err)
-
-	// System should handle NULL version gracefully
-	applied, err = suite.migration.GetAppliedMigrations()
-	// Should not panic and should return some result
-	assert.NotNil(t, applied)
-}
-
-// TestMigrationConcurrencyEdgeCases tests advanced concurrency scenarios
-func (suite *MigrationTestSuite) TestMigrationConcurrencyEdgeCases() {
-	t := suite.T()
-
-	// Test concurrent initialization attempts
-	const concurrency = 5
-	initResults := make(chan error, concurrency)
-
-	for i := 0; i < concurrency; i++ {
-		go func() {
-			migrationService := NewMigrationService(suite.db, suite.logger)
-			err := migrationService.Initialize()
-			initResults <- err
-		}()
-	}
-
-	// Collect initialization results
-	successCount := 0
-	for i := 0; i < concurrency; i++ {
-		err := <-initResults
-		if err == nil {
-			successCount++
-		}
-	}
-
-	// At least one should succeed, and system should be in consistent state
-	assert.Greater(t, successCount, 0, "At least one initialization should succeed")
-
-	// Verify migration table exists and is consistent
-	var exists bool
-	err := suite.db.QueryRow(`
-		SELECT EXISTS (
-			SELECT FROM information_schema.tables 
-			WHERE table_schema = 'public' 
-			AND table_name = 'schema_migrations'
-		)`).Scan(&exists)
-	assert.NoError(t, err)
-	assert.True(t, exists)
-
-	// Test concurrent migration attempts with different services
-	migrationResults := make(chan error, concurrency)
-
-	for i := 0; i < concurrency; i++ {
-		go func() {
-			migrationService := NewMigrationService(suite.db, suite.logger)
-			err := migrationService.Up()
-			migrationResults <- err
-		}()
-	}
-
-	// Collect migration results
-	migrationSuccessCount := 0
-	for i := 0; i < concurrency; i++ {
-		err := <-migrationResults
-		if err == nil {
-			migrationSuccessCount++
-		}
-	}
-
-	// System should handle concurrent migrations gracefully
-	assert.Greater(t, migrationSuccessCount, 0, "At least one migration should succeed")
-
-	// Verify final state is consistent
-	applied, err := suite.migration.GetAppliedMigrations()
-	assert.NoError(t, err)
-	assert.Len(t, applied, len(migrations))
-}
-
-// TestMigrationResourceLimits tests behavior under resource constraints
-func (suite *MigrationTestSuite) TestMigrationResourceLimits() {
-	t := suite.T()
-
-	// Test migration with limited connection pool
-	limitedDB, err := sql.Open("postgres", suite.db.Stats().OpenConnections)
-	if err == nil {
-		limitedDB.SetMaxOpenConns(1)
-		limitedDB.SetMaxIdleConns(1)
-
-		limitedMigration := NewMigrationService(limitedDB, suite.logger)
-		err = limitedMigration.Up()
-		assert.NoError(t, err, "Migration should succeed even with limited connections")
-
-		limitedDB.Close()
-	}
-
-	// Test with memory constraints (conceptual - actual memory limits would require OS-level controls)
-	// This tests whether migrations handle large operations efficiently
-	err = suite.migration.Up()
-	assert.NoError(t, err)
-
-	// Create a large temporary table to simulate memory pressure
-	_, err = suite.db.Exec(`
-		CREATE TEMPORARY TABLE large_temp_table AS 
-		SELECT generate_series(1, 100000) as id, 
-			   md5(random()::text) as data
-	`)
-	if err == nil {
-		// Test migration operations under memory pressure
-		err = suite.migration.CreateIndices()
-		assert.NoError(t, err, "Index creation should succeed under memory pressure")
-	}
-}
-
-// TestMigrationVersionConsistency tests version numbering consistency
-func (suite *MigrationTestSuite) TestMigrationVersionConsistency() {
-	t := suite.T()
-
-	// Verify migration versions are sequential without gaps
-	for i := 0; i < len(migrations)-1; i++ {
-		currentVersion := migrations[i].Version
-		nextVersion := migrations[i+1].Version
-		assert.Equal(t, currentVersion+1, nextVersion,
-			"Migration versions should be sequential: %d -> %d", currentVersion, nextVersion)
-	}
-
-	// Test version consistency after operations
-	err := suite.migration.Up()
-	require.NoError(t, err)
-
-	version, err := suite.migration.GetVersion()
-	assert.NoError(t, err)
-	assert.Equal(t, len(migrations), version)
-
-	applied, err := suite.migration.GetAppliedMigrations()
-	assert.NoError(t, err)
-
-	// Count applied migrations
-	appliedCount := 0
-	for _, isApplied := range applied {
-		if isApplied {
-			appliedCount++
-		}
-	}
-	assert.Equal(t, len(migrations), appliedCount)
-}
diff --git a/runner/storage/postgres.go b/runner/storage/postgres.go
index dc48784..536ba17 100644
--- a/runner/storage/postgres.go
+++ b/runner/storage/postgres.go
@@ -190,7 +190,6 @@ func (d *Database) ListRuns(filter types.RunFilter) ([]*types.HistoricRun, error
 	if !filter.Since.IsZero() {
 		query += fmt.Sprintf(" AND timestamp >= $%d", argCount)
 		args = append(args, filter.Since)
-		argCount++
 	}
 
 	query += " ORDER BY timestamp DESC"
@@ -295,7 +294,6 @@ func (d *Database) QueryMetrics(query types.MetricQuery) ([]types.TimeSeriesMetr
 	if !query.Since.IsZero() {
 		sqlQuery += fmt.Sprintf(" AND time >= $%d", argCount)
 		args = append(args, query.Since)
-		argCount++
 	}
 
 	sqlQuery += " ORDER BY time DESC"
diff --git a/runner/types/historic.go b/runner/types/historic.go
index 8bcf12a..dfdddfc 100644
--- a/runner/types/historic.go
+++ b/runner/types/historic.go
@@ -124,6 +124,7 @@ type RunFilter struct {
 
 // TrendFilter represents filtering criteria for trend analysis
 type TrendFilter struct {
+	TestName  string    `json:"test_name,omitempty"`
 	Client    string    `json:"client,omitempty"`
 	Method    string    `json:"method,omitempty"`
 	GitBranch string    `json:"git_branch,omitempty"`
diff --git a/runner/types/types.go b/runner/types/types.go
index 431b348..b0bbf7b 100644
--- a/runner/types/types.go
+++ b/runner/types/types.go
@@ -83,8 +83,6 @@ type ClientMetrics struct {
 	ConnectionMetrics ConnectionMetrics            `json:"connection_metrics"`
 	TimeSeries        map[string][]TimeSeriesPoint `json:"time_series"`
 	SystemMetrics     []SystemMetrics              `json:"system_metrics"`
-	// TODO(post-merge): populate from k6 Prometheus labels (error_code, status).
-	// Tracked separately; not in scope for the develop->main merge.
 	ErrorTypes  map[string]int64 `json:"error_types"`
 	StatusCodes map[int]int64    `json:"status_codes"`
 }